for-6.4/block-2023-04-21
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmRCvcIQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpk+JEACj01t7Xen2+Razagu3aTx9tmRGFnTNR3MY raFG6B1TADk1TgCWWa2C4Dj67SOispPLm8hbIcOxqB1UscDWCCwjmnr/debADFzW Ap6shv/IRwVGmDp+F7ocYas0ynwooOJg4WJTwkSKz2o4m4p3vzlwAKi4fLiSjbXp gJTrA7WEvDOVjzajlTFUtjr8rc6PdunbGm25cPIufAxUEhvttYex2VbVqjDmfNsE 8tyyk9RWbe4AY/ZYaGXVn4yQ/CgL/sXFkVc5noRXNfAQ/K3CVLQrFLJ3JlwUHpiA xXBor21TUWCZEo33Y2G5NConAYqE7etoPTkaTDO3/aZ+dAMFyhC/WAYLz1KZGMh1 +g1fDX1QKEd40H2lfDXvqF1ob7Ut8EzUx+gvBXcc3/AiRpJ5rjfOcj6LPUMUqQJk nucLLFTiMKecnDMBERbvixqbaTyrjvkFEj2wYJvgj1LKXAd+x/bj8SGajs9r88Nb 9YT9ai/+Yl7Ppfb67rCgXJU7oNZQSAQ2H+X/l2jbiqImOgq1u/45AmINnbanS7HH Y1I8pbH45AcnCgkJRoQwrNX3BnTOTBJ+D/4Fl4b8jsihq0D3UtwCwPCObHP4LW9S MUNPhP3tUuYsAgXqX80+Sao6SYvXDwnbWOM+LOaaZXgjb1ndwDUZXpto8Ra8WB1u 8kM6s6ZR7g== =W1Zb -----END PGP SIGNATURE----- Merge tag 'for-6.4/block-2023-04-21' of git://git.kernel.dk/linux Pull block updates from Jens Axboe: - drbd patches, bringing us closer to unifying the out-of-tree version and the in tree one (Andreas, Christoph) - support for auto-quiesce for the s390 dasd driver (Stefan) - MD pull request via Song: - md/bitmap: Optimal last page size (Jon Derrick) - Various raid10 fixes (Yu Kuai, Li Nan) - md: add error_handlers for raid0 and linear (Mariusz Tkaczyk) - NVMe pull request via Christoph: - Drop redundant pci_enable_pcie_error_reporting (Bjorn Helgaas) - Validate nvmet module parameters (Chaitanya Kulkarni) - Fence TCP socket on receive error (Chris Leech) - Fix async event trace event (Keith Busch) - Minor cleanups (Chaitanya Kulkarni, zhenwei pi) - Fix and cleanup nvmet Identify handling (Damien Le Moal, Christoph Hellwig) - Fix double blk_mq_complete_request race in the timeout handler (Lei Yin) - Fix irq locking in nvme-fcloop (Ming Lei) - Remove queue mapping helper for rdma devices (Sagi Grimberg) - use structured request attribute checks for nbd (Jakub) - fix blk-crypto race conditions between keyslot management (Eric) - add sed-opal support for reading read locking range attributes (Ondrej) - make fault injection configurable for null_blk (Akinobu) - clean up the request insertion API (Christoph) - clean up the queue running API (Christoph) - blkg config helper cleanups (Tejun) - lazy init support for blk-iolatency (Tejun) - various fixes and tweaks to ublk (Ming) - remove hybrid polling. It hasn't really been useful since we got async polled IO support, and these days we don't support sync polled IO at all (Keith) - misc fixes, cleanups, improvements (Zhong, Ondrej, Colin, Chengming, Chaitanya, me) * tag 'for-6.4/block-2023-04-21' of git://git.kernel.dk/linux: (118 commits) nbd: fix incomplete validation of ioctl arg ublk: don't return 0 in case of any failure sed-opal: geometry feature reporting command null_blk: Always check queue mode setting from configfs block: ublk: switch to ioctl command encoding blk-mq: fix the blk_mq_add_to_requeue_list call in blk_kick_flush block, bfq: Fix division by zero error on zero wsum fault-inject: fix build error when FAULT_INJECTION_CONFIGFS=y and CONFIGFS_FS=m block: store bdev->bd_disk->fops->submit_bio state in bdev block: re-arrange the struct block_device fields for better layout md/raid5: remove unused working_disks variable md/raid10: don't call bio_start_io_acct twice for bio which experienced read error md/raid10: fix memleak of md thread md/raid10: fix memleak for 'conf->bio_split' md/raid10: fix leak of 'r10bio->remaining' for recovery md/raid10: don't BUG_ON() in raise_barrier() md: fix soft lockup in status_resync md: add error_handlers for raid0 and linear md: Use optimal I/O size for last bitmap page md: Fix types in sb writer ...
This commit is contained in:
commit
9dd6956b38
@ -336,18 +336,11 @@ What: /sys/block/<disk>/queue/io_poll_delay
|
||||
Date: November 2016
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] If polling is enabled, this controls what kind of polling
|
||||
will be performed. It defaults to -1, which is classic polling.
|
||||
[RW] This was used to control what kind of polling will be
|
||||
performed. It is now fixed to -1, which is classic polling.
|
||||
In this mode, the CPU will repeatedly ask for completions
|
||||
without giving up any time. If set to 0, a hybrid polling mode
|
||||
is used, where the kernel will attempt to make an educated guess
|
||||
at when the IO will complete. Based on this guess, the kernel
|
||||
will put the process issuing IO to sleep for an amount of time,
|
||||
before entering a classic poll loop. This mode might be a little
|
||||
slower than pure classic polling, but it will be more efficient.
|
||||
If set to a value larger than 0, the kernel will put the process
|
||||
issuing IO to sleep for this amount of microseconds before
|
||||
entering classic polling.
|
||||
without giving up any time.
|
||||
<deprecated>
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/io_timeout
|
||||
|
@ -270,8 +270,7 @@ Request queue based layered devices like dm-rq that wish to support inline
|
||||
encryption need to create their own blk_crypto_profile for their request_queue,
|
||||
and expose whatever functionality they choose. When a layered device wants to
|
||||
pass a clone of that request to another request_queue, blk-crypto will
|
||||
initialize and prepare the clone as necessary; see
|
||||
``blk_crypto_insert_cloned_request()``.
|
||||
initialize and prepare the clone as necessary.
|
||||
|
||||
Interaction between inline encryption and blk integrity
|
||||
=======================================================
|
||||
|
@ -52,6 +52,14 @@ Available fault injection capabilities
|
||||
status code is NVME_SC_INVALID_OPCODE with no retry. The status code and
|
||||
retry flag can be set via the debugfs.
|
||||
|
||||
- Null test block driver fault injection
|
||||
|
||||
inject IO timeouts by setting config items under
|
||||
/sys/kernel/config/nullb/<disk>/timeout_inject,
|
||||
inject requeue requests by setting config items under
|
||||
/sys/kernel/config/nullb/<disk>/requeue_inject, and
|
||||
inject init_hctx() errors by setting config items under
|
||||
/sys/kernel/config/nullb/<disk>/init_hctx_fault_inject.
|
||||
|
||||
Configure fault-injection capabilities behavior
|
||||
-----------------------------------------------
|
||||
|
@ -78,6 +78,7 @@ typedef struct dasd_information2_t {
|
||||
* 0x040: give access to raw eckd data
|
||||
* 0x080: enable discard support
|
||||
* 0x100: enable autodisable for IFCC errors (default)
|
||||
* 0x200: enable requeue of all requests on autoquiesce
|
||||
*/
|
||||
#define DASD_FEATURE_READONLY 0x001
|
||||
#define DASD_FEATURE_USEDIAG 0x002
|
||||
@ -88,6 +89,7 @@ typedef struct dasd_information2_t {
|
||||
#define DASD_FEATURE_USERAW 0x040
|
||||
#define DASD_FEATURE_DISCARD 0x080
|
||||
#define DASD_FEATURE_PATH_AUTODISABLE 0x100
|
||||
#define DASD_FEATURE_REQUEUEQUIESCE 0x200
|
||||
#define DASD_FEATURE_DEFAULT DASD_FEATURE_PATH_AUTODISABLE
|
||||
|
||||
#define DASD_PARTN_BITS 2
|
||||
|
@ -215,11 +215,6 @@ config BLK_MQ_VIRTIO
|
||||
depends on VIRTIO
|
||||
default y
|
||||
|
||||
config BLK_MQ_RDMA
|
||||
bool
|
||||
depends on INFINIBAND
|
||||
default y
|
||||
|
||||
config BLK_PM
|
||||
def_bool PM
|
||||
|
||||
|
@ -30,7 +30,6 @@ obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o
|
||||
obj-$(CONFIG_BLK_DEV_INTEGRITY_T10) += t10-pi.o
|
||||
obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
|
||||
obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o
|
||||
obj-$(CONFIG_BLK_MQ_RDMA) += blk-mq-rdma.o
|
||||
obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
|
||||
obj-$(CONFIG_BLK_WBT) += blk-wbt.o
|
||||
obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
|
||||
|
@ -419,6 +419,7 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
|
||||
bdev->bd_inode = inode;
|
||||
bdev->bd_queue = disk->queue;
|
||||
bdev->bd_stats = alloc_percpu(struct disk_stats);
|
||||
bdev->bd_has_submit_bio = false;
|
||||
if (!bdev->bd_stats) {
|
||||
iput(inode);
|
||||
return NULL;
|
||||
|
@ -497,17 +497,11 @@ static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
|
||||
bgd = kzalloc(sizeof(*bgd), gfp);
|
||||
if (!bgd)
|
||||
return NULL;
|
||||
|
||||
bgd->weight = CGROUP_WEIGHT_DFL;
|
||||
return &bgd->pd;
|
||||
}
|
||||
|
||||
static void bfq_cpd_init(struct blkcg_policy_data *cpd)
|
||||
{
|
||||
struct bfq_group_data *d = cpd_to_bfqgd(cpd);
|
||||
|
||||
d->weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ?
|
||||
CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL;
|
||||
}
|
||||
|
||||
static void bfq_cpd_free(struct blkcg_policy_data *cpd)
|
||||
{
|
||||
kfree(cpd_to_bfqgd(cpd));
|
||||
@ -1111,9 +1105,11 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
|
||||
struct bfq_group *bfqg;
|
||||
u64 v;
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, buf, &ctx);
|
||||
blkg_conf_init(&ctx, buf);
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, &ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
if (sscanf(ctx.body, "%llu", &v) == 1) {
|
||||
/* require "default" on dfl */
|
||||
@ -1135,7 +1131,7 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
|
||||
ret = 0;
|
||||
}
|
||||
out:
|
||||
blkg_conf_finish(&ctx);
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
@ -1301,8 +1297,6 @@ struct blkcg_policy blkcg_policy_bfq = {
|
||||
.legacy_cftypes = bfq_blkcg_legacy_files,
|
||||
|
||||
.cpd_alloc_fn = bfq_cpd_alloc,
|
||||
.cpd_init_fn = bfq_cpd_init,
|
||||
.cpd_bind_fn = bfq_cpd_init,
|
||||
.cpd_free_fn = bfq_cpd_free,
|
||||
|
||||
.pd_alloc_fn = bfq_pd_alloc,
|
||||
|
@ -129,7 +129,6 @@
|
||||
#include "elevator.h"
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "bfq-iosched.h"
|
||||
#include "blk-wbt.h"
|
||||
@ -649,6 +648,8 @@ retry:
|
||||
sched_data->service_tree[i].wsum;
|
||||
}
|
||||
}
|
||||
if (!wsum)
|
||||
continue;
|
||||
limit = DIV_ROUND_CLOSEST(limit * entity->weight, wsum);
|
||||
if (entity->allocated >= limit) {
|
||||
bfq_log_bfqq(bfqq->bfqd, bfqq,
|
||||
@ -6232,7 +6233,7 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
|
||||
static struct bfq_queue *bfq_init_rq(struct request *rq);
|
||||
|
||||
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bool at_head)
|
||||
blk_insert_t flags)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct bfq_data *bfqd = q->elevator->elevator_data;
|
||||
@ -6255,11 +6256,10 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
|
||||
trace_block_rq_insert(rq);
|
||||
|
||||
if (!bfqq || at_head) {
|
||||
if (at_head)
|
||||
list_add(&rq->queuelist, &bfqd->dispatch);
|
||||
else
|
||||
list_add_tail(&rq->queuelist, &bfqd->dispatch);
|
||||
if (flags & BLK_MQ_INSERT_AT_HEAD) {
|
||||
list_add(&rq->queuelist, &bfqd->dispatch);
|
||||
} else if (!bfqq) {
|
||||
list_add_tail(&rq->queuelist, &bfqd->dispatch);
|
||||
} else {
|
||||
idle_timer_disabled = __bfq_insert_request(bfqd, rq);
|
||||
/*
|
||||
@ -6289,14 +6289,15 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
}
|
||||
|
||||
static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *list, bool at_head)
|
||||
struct list_head *list,
|
||||
blk_insert_t flags)
|
||||
{
|
||||
while (!list_empty(list)) {
|
||||
struct request *rq;
|
||||
|
||||
rq = list_first_entry(list, struct request, queuelist);
|
||||
list_del_init(&rq->queuelist);
|
||||
bfq_insert_request(hctx, rq, at_head);
|
||||
bfq_insert_request(hctx, rq, flags);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -20,7 +20,6 @@
|
||||
|
||||
#define BFQ_DEFAULT_QUEUE_IOPRIO 4
|
||||
|
||||
#define BFQ_WEIGHT_LEGACY_DFL 100
|
||||
#define BFQ_DEFAULT_GRP_IOPRIO 0
|
||||
#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE
|
||||
|
||||
|
@ -33,7 +33,6 @@
|
||||
#include "blk-cgroup.h"
|
||||
#include "blk-ioprio.h"
|
||||
#include "blk-throttle.h"
|
||||
#include "blk-rq-qos.h"
|
||||
|
||||
/*
|
||||
* blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
|
||||
@ -693,69 +692,93 @@ u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
|
||||
EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
|
||||
|
||||
/**
|
||||
* blkcg_conf_open_bdev - parse and open bdev for per-blkg config update
|
||||
* @inputp: input string pointer
|
||||
* blkg_conf_init - initialize a blkg_conf_ctx
|
||||
* @ctx: blkg_conf_ctx to initialize
|
||||
* @input: input string
|
||||
*
|
||||
* Parse the device node prefix part, MAJ:MIN, of per-blkg config update
|
||||
* from @input and get and return the matching bdev. *@inputp is
|
||||
* updated to point past the device node prefix. Returns an ERR_PTR()
|
||||
* value on error.
|
||||
*
|
||||
* Use this function iff blkg_conf_prep() can't be used for some reason.
|
||||
* Initialize @ctx which can be used to parse blkg config input string @input.
|
||||
* Once initialized, @ctx can be used with blkg_conf_open_bdev() and
|
||||
* blkg_conf_prep(), and must be cleaned up with blkg_conf_exit().
|
||||
*/
|
||||
struct block_device *blkcg_conf_open_bdev(char **inputp)
|
||||
void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input)
|
||||
{
|
||||
char *input = *inputp;
|
||||
*ctx = (struct blkg_conf_ctx){ .input = input };
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_conf_init);
|
||||
|
||||
/**
|
||||
* blkg_conf_open_bdev - parse and open bdev for per-blkg config update
|
||||
* @ctx: blkg_conf_ctx initialized with blkg_conf_init()
|
||||
*
|
||||
* Parse the device node prefix part, MAJ:MIN, of per-blkg config update from
|
||||
* @ctx->input and get and store the matching bdev in @ctx->bdev. @ctx->body is
|
||||
* set to point past the device node prefix.
|
||||
*
|
||||
* This function may be called multiple times on @ctx and the extra calls become
|
||||
* NOOPs. blkg_conf_prep() implicitly calls this function. Use this function
|
||||
* explicitly if bdev access is needed without resolving the blkcg / policy part
|
||||
* of @ctx->input. Returns -errno on error.
|
||||
*/
|
||||
int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
|
||||
{
|
||||
char *input = ctx->input;
|
||||
unsigned int major, minor;
|
||||
struct block_device *bdev;
|
||||
int key_len;
|
||||
|
||||
if (ctx->bdev)
|
||||
return 0;
|
||||
|
||||
if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
|
||||
return ERR_PTR(-EINVAL);
|
||||
return -EINVAL;
|
||||
|
||||
input += key_len;
|
||||
if (!isspace(*input))
|
||||
return ERR_PTR(-EINVAL);
|
||||
return -EINVAL;
|
||||
input = skip_spaces(input);
|
||||
|
||||
bdev = blkdev_get_no_open(MKDEV(major, minor));
|
||||
if (!bdev)
|
||||
return ERR_PTR(-ENODEV);
|
||||
return -ENODEV;
|
||||
if (bdev_is_partition(bdev)) {
|
||||
blkdev_put_no_open(bdev);
|
||||
return ERR_PTR(-ENODEV);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
*inputp = input;
|
||||
return bdev;
|
||||
ctx->body = input;
|
||||
ctx->bdev = bdev;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_conf_prep - parse and prepare for per-blkg config update
|
||||
* @blkcg: target block cgroup
|
||||
* @pol: target policy
|
||||
* @input: input string
|
||||
* @ctx: blkg_conf_ctx to be filled
|
||||
* @ctx: blkg_conf_ctx initialized with blkg_conf_init()
|
||||
*
|
||||
* Parse per-blkg config update from @input and initialize @ctx with the
|
||||
* result. @ctx->blkg points to the blkg to be updated and @ctx->body the
|
||||
* part of @input following MAJ:MIN. This function returns with RCU read
|
||||
* lock and queue lock held and must be paired with blkg_conf_finish().
|
||||
* Parse per-blkg config update from @ctx->input and initialize @ctx
|
||||
* accordingly. On success, @ctx->body points to the part of @ctx->input
|
||||
* following MAJ:MIN, @ctx->bdev points to the target block device and
|
||||
* @ctx->blkg to the blkg being configured.
|
||||
*
|
||||
* blkg_conf_open_bdev() may be called on @ctx beforehand. On success, this
|
||||
* function returns with queue lock held and must be followed by
|
||||
* blkg_conf_exit().
|
||||
*/
|
||||
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
char *input, struct blkg_conf_ctx *ctx)
|
||||
__acquires(rcu) __acquires(&bdev->bd_queue->queue_lock)
|
||||
struct blkg_conf_ctx *ctx)
|
||||
__acquires(&bdev->bd_queue->queue_lock)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
struct gendisk *disk;
|
||||
struct request_queue *q;
|
||||
struct blkcg_gq *blkg;
|
||||
int ret;
|
||||
|
||||
bdev = blkcg_conf_open_bdev(&input);
|
||||
if (IS_ERR(bdev))
|
||||
return PTR_ERR(bdev);
|
||||
disk = bdev->bd_disk;
|
||||
ret = blkg_conf_open_bdev(ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
disk = ctx->bdev->bd_disk;
|
||||
q = disk->queue;
|
||||
|
||||
/*
|
||||
@ -766,7 +789,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
if (ret)
|
||||
goto fail;
|
||||
|
||||
rcu_read_lock();
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
|
||||
if (!blkcg_policy_enabled(q, pol)) {
|
||||
@ -795,7 +817,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
|
||||
/* Drop locks to do new blkg allocation with GFP_KERNEL. */
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
new_blkg = blkg_alloc(pos, disk, GFP_KERNEL);
|
||||
if (unlikely(!new_blkg)) {
|
||||
@ -809,7 +830,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
goto fail_exit_queue;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
|
||||
if (!blkcg_policy_enabled(q, pol)) {
|
||||
@ -836,20 +856,16 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
}
|
||||
success:
|
||||
blk_queue_exit(q);
|
||||
ctx->bdev = bdev;
|
||||
ctx->blkg = blkg;
|
||||
ctx->body = input;
|
||||
return 0;
|
||||
|
||||
fail_preloaded:
|
||||
radix_tree_preload_end();
|
||||
fail_unlock:
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
rcu_read_unlock();
|
||||
fail_exit_queue:
|
||||
blk_queue_exit(q);
|
||||
fail:
|
||||
blkdev_put_no_open(bdev);
|
||||
/*
|
||||
* If queue was bypassing, we should retry. Do so after a
|
||||
* short msleep(). It isn't strictly necessary but queue
|
||||
@ -865,20 +881,27 @@ fail:
|
||||
EXPORT_SYMBOL_GPL(blkg_conf_prep);
|
||||
|
||||
/**
|
||||
* blkg_conf_finish - finish up per-blkg config update
|
||||
* @ctx: blkg_conf_ctx initialized by blkg_conf_prep()
|
||||
* blkg_conf_exit - clean up per-blkg config update
|
||||
* @ctx: blkg_conf_ctx initialized with blkg_conf_init()
|
||||
*
|
||||
* Finish up after per-blkg config update. This function must be paired
|
||||
* with blkg_conf_prep().
|
||||
* Clean up after per-blkg config update. This function must be called on all
|
||||
* blkg_conf_ctx's initialized with blkg_conf_init().
|
||||
*/
|
||||
void blkg_conf_finish(struct blkg_conf_ctx *ctx)
|
||||
__releases(&ctx->bdev->bd_queue->queue_lock) __releases(rcu)
|
||||
void blkg_conf_exit(struct blkg_conf_ctx *ctx)
|
||||
__releases(&ctx->bdev->bd_queue->queue_lock)
|
||||
{
|
||||
spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
|
||||
rcu_read_unlock();
|
||||
blkdev_put_no_open(ctx->bdev);
|
||||
if (ctx->blkg) {
|
||||
spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
|
||||
ctx->blkg = NULL;
|
||||
}
|
||||
|
||||
if (ctx->bdev) {
|
||||
blkdev_put_no_open(ctx->bdev);
|
||||
ctx->body = NULL;
|
||||
ctx->bdev = NULL;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_conf_finish);
|
||||
EXPORT_SYMBOL_GPL(blkg_conf_exit);
|
||||
|
||||
static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
|
||||
{
|
||||
@ -1289,8 +1312,6 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||
blkcg->cpd[i] = cpd;
|
||||
cpd->blkcg = blkcg;
|
||||
cpd->plid = i;
|
||||
if (pol->cpd_init_fn)
|
||||
pol->cpd_init_fn(cpd);
|
||||
}
|
||||
|
||||
spin_lock_init(&blkcg->lock);
|
||||
@ -1368,14 +1389,8 @@ int blkcg_init_disk(struct gendisk *disk)
|
||||
if (ret)
|
||||
goto err_ioprio_exit;
|
||||
|
||||
ret = blk_iolatency_init(disk);
|
||||
if (ret)
|
||||
goto err_throtl_exit;
|
||||
|
||||
return 0;
|
||||
|
||||
err_throtl_exit:
|
||||
blk_throtl_exit(disk);
|
||||
err_ioprio_exit:
|
||||
blk_ioprio_exit(disk);
|
||||
err_destroy_all:
|
||||
@ -1391,30 +1406,9 @@ err_unlock:
|
||||
void blkcg_exit_disk(struct gendisk *disk)
|
||||
{
|
||||
blkg_destroy_all(disk);
|
||||
rq_qos_exit(disk->queue);
|
||||
blk_throtl_exit(disk);
|
||||
}
|
||||
|
||||
static void blkcg_bind(struct cgroup_subsys_state *root_css)
|
||||
{
|
||||
int i;
|
||||
|
||||
mutex_lock(&blkcg_pol_mutex);
|
||||
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
||||
struct blkcg_policy *pol = blkcg_policy[i];
|
||||
struct blkcg *blkcg;
|
||||
|
||||
if (!pol || !pol->cpd_bind_fn)
|
||||
continue;
|
||||
|
||||
list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node)
|
||||
if (blkcg->cpd[pol->plid])
|
||||
pol->cpd_bind_fn(blkcg->cpd[pol->plid]);
|
||||
}
|
||||
mutex_unlock(&blkcg_pol_mutex);
|
||||
}
|
||||
|
||||
static void blkcg_exit(struct task_struct *tsk)
|
||||
{
|
||||
if (tsk->throttle_disk)
|
||||
@ -1428,7 +1422,6 @@ struct cgroup_subsys io_cgrp_subsys = {
|
||||
.css_offline = blkcg_css_offline,
|
||||
.css_free = blkcg_css_free,
|
||||
.css_rstat_flush = blkcg_rstat_flush,
|
||||
.bind = blkcg_bind,
|
||||
.dfl_cftypes = blkcg_files,
|
||||
.legacy_cftypes = blkcg_legacy_files,
|
||||
.legacy_name = "blkio",
|
||||
@ -1666,8 +1659,6 @@ int blkcg_policy_register(struct blkcg_policy *pol)
|
||||
blkcg->cpd[pol->plid] = cpd;
|
||||
cpd->blkcg = blkcg;
|
||||
cpd->plid = pol->plid;
|
||||
if (pol->cpd_init_fn)
|
||||
pol->cpd_init_fn(cpd);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -174,9 +174,7 @@ struct blkcg_policy {
|
||||
|
||||
/* operations */
|
||||
blkcg_pol_alloc_cpd_fn *cpd_alloc_fn;
|
||||
blkcg_pol_init_cpd_fn *cpd_init_fn;
|
||||
blkcg_pol_free_cpd_fn *cpd_free_fn;
|
||||
blkcg_pol_bind_cpd_fn *cpd_bind_fn;
|
||||
|
||||
blkcg_pol_alloc_pd_fn *pd_alloc_fn;
|
||||
blkcg_pol_init_pd_fn *pd_init_fn;
|
||||
@ -209,15 +207,17 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
|
||||
u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
|
||||
|
||||
struct blkg_conf_ctx {
|
||||
char *input;
|
||||
char *body;
|
||||
struct block_device *bdev;
|
||||
struct blkcg_gq *blkg;
|
||||
char *body;
|
||||
};
|
||||
|
||||
struct block_device *blkcg_conf_open_bdev(char **inputp);
|
||||
void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input);
|
||||
int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx);
|
||||
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
char *input, struct blkg_conf_ctx *ctx);
|
||||
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
|
||||
struct blkg_conf_ctx *ctx);
|
||||
void blkg_conf_exit(struct blkg_conf_ctx *ctx);
|
||||
|
||||
/**
|
||||
* bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
|
||||
|
@ -263,13 +263,7 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
|
||||
|
||||
static void blk_free_queue(struct request_queue *q)
|
||||
{
|
||||
if (q->poll_stat)
|
||||
blk_stat_remove_callback(q, q->poll_cb);
|
||||
blk_stat_free_callback(q->poll_cb);
|
||||
|
||||
blk_free_queue_stats(q->stats);
|
||||
kfree(q->poll_stat);
|
||||
|
||||
if (queue_is_mq(q))
|
||||
blk_mq_release(q);
|
||||
|
||||
@ -593,14 +587,14 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
|
||||
|
||||
static void __submit_bio(struct bio *bio)
|
||||
{
|
||||
struct gendisk *disk = bio->bi_bdev->bd_disk;
|
||||
|
||||
if (unlikely(!blk_crypto_bio_prep(&bio)))
|
||||
return;
|
||||
|
||||
if (!disk->fops->submit_bio) {
|
||||
if (!bio->bi_bdev->bd_has_submit_bio) {
|
||||
blk_mq_submit_bio(bio);
|
||||
} else if (likely(bio_queue_enter(bio) == 0)) {
|
||||
struct gendisk *disk = bio->bi_bdev->bd_disk;
|
||||
|
||||
disk->fops->submit_bio(bio);
|
||||
blk_queue_exit(disk->queue);
|
||||
}
|
||||
@ -704,7 +698,7 @@ void submit_bio_noacct_nocheck(struct bio *bio)
|
||||
*/
|
||||
if (current->bio_list)
|
||||
bio_list_add(¤t->bio_list[0], bio);
|
||||
else if (!bio->bi_bdev->bd_disk->fops->submit_bio)
|
||||
else if (!bio->bi_bdev->bd_has_submit_bio)
|
||||
__submit_bio_noacct_mq(bio);
|
||||
else
|
||||
__submit_bio_noacct(bio);
|
||||
|
@ -65,6 +65,11 @@ static inline bool blk_crypto_rq_is_encrypted(struct request *rq)
|
||||
return rq->crypt_ctx;
|
||||
}
|
||||
|
||||
static inline bool blk_crypto_rq_has_keyslot(struct request *rq)
|
||||
{
|
||||
return rq->crypt_keyslot;
|
||||
}
|
||||
|
||||
blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile,
|
||||
const struct blk_crypto_key *key,
|
||||
struct blk_crypto_keyslot **slot_ptr);
|
||||
@ -119,6 +124,11 @@ static inline bool blk_crypto_rq_is_encrypted(struct request *rq)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool blk_crypto_rq_has_keyslot(struct request *rq)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_BLK_INLINE_ENCRYPTION */
|
||||
|
||||
void __bio_crypt_advance(struct bio *bio, unsigned int bytes);
|
||||
@ -153,14 +163,21 @@ static inline bool blk_crypto_bio_prep(struct bio **bio_ptr)
|
||||
return true;
|
||||
}
|
||||
|
||||
blk_status_t __blk_crypto_init_request(struct request *rq);
|
||||
static inline blk_status_t blk_crypto_init_request(struct request *rq)
|
||||
blk_status_t __blk_crypto_rq_get_keyslot(struct request *rq);
|
||||
static inline blk_status_t blk_crypto_rq_get_keyslot(struct request *rq)
|
||||
{
|
||||
if (blk_crypto_rq_is_encrypted(rq))
|
||||
return __blk_crypto_init_request(rq);
|
||||
return __blk_crypto_rq_get_keyslot(rq);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
void __blk_crypto_rq_put_keyslot(struct request *rq);
|
||||
static inline void blk_crypto_rq_put_keyslot(struct request *rq)
|
||||
{
|
||||
if (blk_crypto_rq_has_keyslot(rq))
|
||||
__blk_crypto_rq_put_keyslot(rq);
|
||||
}
|
||||
|
||||
void __blk_crypto_free_request(struct request *rq);
|
||||
static inline void blk_crypto_free_request(struct request *rq)
|
||||
{
|
||||
@ -188,21 +205,6 @@ static inline int blk_crypto_rq_bio_prep(struct request *rq, struct bio *bio,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_crypto_insert_cloned_request - Prepare a cloned request to be inserted
|
||||
* into a request queue.
|
||||
* @rq: the request being queued
|
||||
*
|
||||
* Return: BLK_STS_OK on success, nonzero on error.
|
||||
*/
|
||||
static inline blk_status_t blk_crypto_insert_cloned_request(struct request *rq)
|
||||
{
|
||||
|
||||
if (blk_crypto_rq_is_encrypted(rq))
|
||||
return blk_crypto_init_request(rq);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK
|
||||
|
||||
int blk_crypto_fallback_start_using_mode(enum blk_crypto_mode_num mode_num);
|
||||
|
@ -227,14 +227,13 @@ EXPORT_SYMBOL_GPL(blk_crypto_keyslot_index);
|
||||
* @profile: the crypto profile of the device the key will be used on
|
||||
* @key: the key that will be used
|
||||
* @slot_ptr: If a keyslot is allocated, an opaque pointer to the keyslot struct
|
||||
* will be stored here; otherwise NULL will be stored here.
|
||||
* will be stored here. blk_crypto_put_keyslot() must be called
|
||||
* later to release it. Otherwise, NULL will be stored here.
|
||||
*
|
||||
* If the device has keyslots, this gets a keyslot that's been programmed with
|
||||
* the specified key. If the key is already in a slot, this reuses it;
|
||||
* otherwise this waits for a slot to become idle and programs the key into it.
|
||||
*
|
||||
* This must be paired with a call to blk_crypto_put_keyslot().
|
||||
*
|
||||
* Context: Process context. Takes and releases profile->lock.
|
||||
* Return: BLK_STS_OK on success, meaning that either a keyslot was allocated or
|
||||
* one wasn't needed; or a blk_status_t error on failure.
|
||||
@ -312,20 +311,15 @@ success:
|
||||
|
||||
/**
|
||||
* blk_crypto_put_keyslot() - Release a reference to a keyslot
|
||||
* @slot: The keyslot to release the reference of (may be NULL).
|
||||
* @slot: The keyslot to release the reference of
|
||||
*
|
||||
* Context: Any context.
|
||||
*/
|
||||
void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot)
|
||||
{
|
||||
struct blk_crypto_profile *profile;
|
||||
struct blk_crypto_profile *profile = slot->profile;
|
||||
unsigned long flags;
|
||||
|
||||
if (!slot)
|
||||
return;
|
||||
|
||||
profile = slot->profile;
|
||||
|
||||
if (atomic_dec_and_lock_irqsave(&slot->slot_refs,
|
||||
&profile->idle_slots_lock, flags)) {
|
||||
list_add_tail(&slot->idle_slot_node, &profile->idle_slots);
|
||||
@ -354,28 +348,16 @@ bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile,
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* __blk_crypto_evict_key() - Evict a key from a device.
|
||||
* @profile: the crypto profile of the device
|
||||
* @key: the key to evict. It must not still be used in any I/O.
|
||||
*
|
||||
* If the device has keyslots, this finds the keyslot (if any) that contains the
|
||||
* specified key and calls the driver's keyslot_evict function to evict it.
|
||||
*
|
||||
* Otherwise, this just calls the driver's keyslot_evict function if it is
|
||||
* implemented, passing just the key (without any particular keyslot). This
|
||||
* allows layered devices to evict the key from their underlying devices.
|
||||
*
|
||||
* Context: Process context. Takes and releases profile->lock.
|
||||
* Return: 0 on success or if there's no keyslot with the specified key, -EBUSY
|
||||
* if the keyslot is still in use, or another -errno value on other
|
||||
* error.
|
||||
/*
|
||||
* This is an internal function that evicts a key from an inline encryption
|
||||
* device that can be either a real device or the blk-crypto-fallback "device".
|
||||
* It is used only by blk_crypto_evict_key(); see that function for details.
|
||||
*/
|
||||
int __blk_crypto_evict_key(struct blk_crypto_profile *profile,
|
||||
const struct blk_crypto_key *key)
|
||||
{
|
||||
struct blk_crypto_keyslot *slot;
|
||||
int err = 0;
|
||||
int err;
|
||||
|
||||
if (profile->num_slots == 0) {
|
||||
if (profile->ll_ops.keyslot_evict) {
|
||||
@ -389,22 +371,30 @@ int __blk_crypto_evict_key(struct blk_crypto_profile *profile,
|
||||
|
||||
blk_crypto_hw_enter(profile);
|
||||
slot = blk_crypto_find_keyslot(profile, key);
|
||||
if (!slot)
|
||||
goto out_unlock;
|
||||
if (!slot) {
|
||||
/*
|
||||
* Not an error, since a key not in use by I/O is not guaranteed
|
||||
* to be in a keyslot. There can be more keys than keyslots.
|
||||
*/
|
||||
err = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE(atomic_read(&slot->slot_refs) != 0)) {
|
||||
/* BUG: key is still in use by I/O */
|
||||
err = -EBUSY;
|
||||
goto out_unlock;
|
||||
goto out_remove;
|
||||
}
|
||||
err = profile->ll_ops.keyslot_evict(profile, key,
|
||||
blk_crypto_keyslot_index(slot));
|
||||
if (err)
|
||||
goto out_unlock;
|
||||
|
||||
out_remove:
|
||||
/*
|
||||
* Callers free the key even on error, so unlink the key from the hash
|
||||
* table and clear slot->key even on error.
|
||||
*/
|
||||
hlist_del(&slot->hash_node);
|
||||
slot->key = NULL;
|
||||
err = 0;
|
||||
out_unlock:
|
||||
out:
|
||||
blk_crypto_hw_exit(profile);
|
||||
return err;
|
||||
}
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-crypto-profile.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include "blk-crypto-internal.h"
|
||||
@ -224,27 +225,27 @@ static bool bio_crypt_check_alignment(struct bio *bio)
|
||||
return true;
|
||||
}
|
||||
|
||||
blk_status_t __blk_crypto_init_request(struct request *rq)
|
||||
blk_status_t __blk_crypto_rq_get_keyslot(struct request *rq)
|
||||
{
|
||||
return blk_crypto_get_keyslot(rq->q->crypto_profile,
|
||||
rq->crypt_ctx->bc_key,
|
||||
&rq->crypt_keyslot);
|
||||
}
|
||||
|
||||
/**
|
||||
* __blk_crypto_free_request - Uninitialize the crypto fields of a request.
|
||||
*
|
||||
* @rq: The request whose crypto fields to uninitialize.
|
||||
*
|
||||
* Completely uninitializes the crypto fields of a request. If a keyslot has
|
||||
* been programmed into some inline encryption hardware, that keyslot is
|
||||
* released. The rq->crypt_ctx is also freed.
|
||||
*/
|
||||
void __blk_crypto_free_request(struct request *rq)
|
||||
void __blk_crypto_rq_put_keyslot(struct request *rq)
|
||||
{
|
||||
blk_crypto_put_keyslot(rq->crypt_keyslot);
|
||||
rq->crypt_keyslot = NULL;
|
||||
}
|
||||
|
||||
void __blk_crypto_free_request(struct request *rq)
|
||||
{
|
||||
/* The keyslot, if one was needed, should have been released earlier. */
|
||||
if (WARN_ON_ONCE(rq->crypt_keyslot))
|
||||
__blk_crypto_rq_put_keyslot(rq);
|
||||
|
||||
mempool_free(rq->crypt_ctx, bio_crypt_ctx_pool);
|
||||
blk_crypto_rq_set_defaults(rq);
|
||||
rq->crypt_ctx = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -399,30 +400,39 @@ int blk_crypto_start_using_key(struct block_device *bdev,
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_crypto_evict_key() - Evict a key from any inline encryption hardware
|
||||
* it may have been programmed into
|
||||
* @bdev: The block_device who's associated inline encryption hardware this key
|
||||
* might have been programmed into
|
||||
* @key: The key to evict
|
||||
* blk_crypto_evict_key() - Evict a blk_crypto_key from a block_device
|
||||
* @bdev: a block_device on which I/O using the key may have been done
|
||||
* @key: the key to evict
|
||||
*
|
||||
* Upper layers (filesystems) must call this function to ensure that a key is
|
||||
* evicted from any hardware that it might have been programmed into. The key
|
||||
* must not be in use by any in-flight IO when this function is called.
|
||||
* For a given block_device, this function removes the given blk_crypto_key from
|
||||
* the keyslot management structures and evicts it from any underlying hardware
|
||||
* keyslot(s) or blk-crypto-fallback keyslot it may have been programmed into.
|
||||
*
|
||||
* Return: 0 on success or if the key wasn't in any keyslot; -errno on error.
|
||||
* Upper layers must call this before freeing the blk_crypto_key. It must be
|
||||
* called for every block_device the key may have been used on. The key must no
|
||||
* longer be in use by any I/O when this function is called.
|
||||
*
|
||||
* Context: May sleep.
|
||||
*/
|
||||
int blk_crypto_evict_key(struct block_device *bdev,
|
||||
const struct blk_crypto_key *key)
|
||||
void blk_crypto_evict_key(struct block_device *bdev,
|
||||
const struct blk_crypto_key *key)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
int err;
|
||||
|
||||
if (blk_crypto_config_supported_natively(bdev, &key->crypto_cfg))
|
||||
return __blk_crypto_evict_key(q->crypto_profile, key);
|
||||
|
||||
err = __blk_crypto_evict_key(q->crypto_profile, key);
|
||||
else
|
||||
err = blk_crypto_fallback_evict_key(key);
|
||||
/*
|
||||
* If the block_device didn't support the key, then blk-crypto-fallback
|
||||
* may have been used, so try to evict the key from blk-crypto-fallback.
|
||||
* An error can only occur here if the key failed to be evicted from a
|
||||
* keyslot (due to a hardware or driver issue) or is allegedly still in
|
||||
* use by I/O (due to a kernel bug). Even in these cases, the key is
|
||||
* still unlinked from the keyslot management structures, and the caller
|
||||
* is allowed and expected to free it right away. There's nothing
|
||||
* callers can do to handle errors, so just log them and return void.
|
||||
*/
|
||||
return blk_crypto_fallback_evict_key(key);
|
||||
if (err)
|
||||
pr_warn_ratelimited("%pg: error %d evicting key\n", bdev, err);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_crypto_evict_key);
|
||||
|
@ -68,12 +68,10 @@
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/part_stat.h>
|
||||
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-mq-sched.h"
|
||||
|
||||
/* PREFLUSH/FUA sequences */
|
||||
@ -138,11 +136,6 @@ static void blk_flush_restore_request(struct request *rq)
|
||||
rq->end_io = rq->flush.saved_end_io;
|
||||
}
|
||||
|
||||
static void blk_flush_queue_rq(struct request *rq, bool add_front)
|
||||
{
|
||||
blk_mq_add_to_requeue_list(rq, add_front, true);
|
||||
}
|
||||
|
||||
static void blk_account_io_flush(struct request *rq)
|
||||
{
|
||||
struct block_device *part = rq->q->disk->part0;
|
||||
@ -195,7 +188,8 @@ static void blk_flush_complete_seq(struct request *rq,
|
||||
|
||||
case REQ_FSEQ_DATA:
|
||||
list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
|
||||
blk_flush_queue_rq(rq, true);
|
||||
blk_mq_add_to_requeue_list(rq, BLK_MQ_INSERT_AT_HEAD);
|
||||
blk_mq_kick_requeue_list(q);
|
||||
break;
|
||||
|
||||
case REQ_FSEQ_DONE:
|
||||
@ -352,7 +346,8 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
|
||||
smp_wmb();
|
||||
req_ref_set(flush_rq, 1);
|
||||
|
||||
blk_flush_queue_rq(flush_rq, false);
|
||||
blk_mq_add_to_requeue_list(flush_rq, 0);
|
||||
blk_mq_kick_requeue_list(q);
|
||||
}
|
||||
|
||||
static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq,
|
||||
@ -396,6 +391,7 @@ void blk_insert_flush(struct request *rq)
|
||||
unsigned long fflags = q->queue_flags; /* may change, cache */
|
||||
unsigned int policy = blk_flush_policy(fflags, rq);
|
||||
struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
|
||||
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
||||
|
||||
/*
|
||||
* @policy now records what operations need to be done. Adjust
|
||||
@ -432,7 +428,8 @@ void blk_insert_flush(struct request *rq)
|
||||
*/
|
||||
if ((policy & REQ_FSEQ_DATA) &&
|
||||
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
|
||||
blk_mq_request_bypass_insert(rq, false, true);
|
||||
blk_mq_request_bypass_insert(rq, 0);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -3106,9 +3106,11 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, buf, &ctx);
|
||||
blkg_conf_init(&ctx, buf);
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, &ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto err;
|
||||
|
||||
iocg = blkg_to_iocg(ctx.blkg);
|
||||
|
||||
@ -3127,12 +3129,14 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
|
||||
weight_updated(iocg, &now);
|
||||
spin_unlock(&iocg->ioc->lock);
|
||||
|
||||
blkg_conf_finish(&ctx);
|
||||
blkg_conf_exit(&ctx);
|
||||
return nbytes;
|
||||
|
||||
einval:
|
||||
blkg_conf_finish(&ctx);
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
err:
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
@ -3189,19 +3193,22 @@ static const match_table_t qos_tokens = {
|
||||
static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
||||
size_t nbytes, loff_t off)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
struct blkg_conf_ctx ctx;
|
||||
struct gendisk *disk;
|
||||
struct ioc *ioc;
|
||||
u32 qos[NR_QOS_PARAMS];
|
||||
bool enable, user;
|
||||
char *p;
|
||||
char *body, *p;
|
||||
int ret;
|
||||
|
||||
bdev = blkcg_conf_open_bdev(&input);
|
||||
if (IS_ERR(bdev))
|
||||
return PTR_ERR(bdev);
|
||||
blkg_conf_init(&ctx, input);
|
||||
|
||||
disk = bdev->bd_disk;
|
||||
ret = blkg_conf_open_bdev(&ctx);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
body = ctx.body;
|
||||
disk = ctx.bdev->bd_disk;
|
||||
if (!queue_is_mq(disk->queue)) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto err;
|
||||
@ -3223,7 +3230,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
||||
enable = ioc->enabled;
|
||||
user = ioc->user_qos_params;
|
||||
|
||||
while ((p = strsep(&input, " \t\n"))) {
|
||||
while ((p = strsep(&body, " \t\n"))) {
|
||||
substring_t args[MAX_OPT_ARGS];
|
||||
char buf[32];
|
||||
int tok;
|
||||
@ -3313,7 +3320,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
||||
blk_mq_unquiesce_queue(disk->queue);
|
||||
blk_mq_unfreeze_queue(disk->queue);
|
||||
|
||||
blkdev_put_no_open(bdev);
|
||||
blkg_conf_exit(&ctx);
|
||||
return nbytes;
|
||||
einval:
|
||||
spin_unlock_irq(&ioc->lock);
|
||||
@ -3323,7 +3330,7 @@ einval:
|
||||
|
||||
ret = -EINVAL;
|
||||
err:
|
||||
blkdev_put_no_open(bdev);
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -3376,19 +3383,22 @@ static const match_table_t i_lcoef_tokens = {
|
||||
static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
|
||||
size_t nbytes, loff_t off)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
struct blkg_conf_ctx ctx;
|
||||
struct request_queue *q;
|
||||
struct ioc *ioc;
|
||||
u64 u[NR_I_LCOEFS];
|
||||
bool user;
|
||||
char *p;
|
||||
char *body, *p;
|
||||
int ret;
|
||||
|
||||
bdev = blkcg_conf_open_bdev(&input);
|
||||
if (IS_ERR(bdev))
|
||||
return PTR_ERR(bdev);
|
||||
blkg_conf_init(&ctx, input);
|
||||
|
||||
q = bdev_get_queue(bdev);
|
||||
ret = blkg_conf_open_bdev(&ctx);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
body = ctx.body;
|
||||
q = bdev_get_queue(ctx.bdev);
|
||||
if (!queue_is_mq(q)) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto err;
|
||||
@ -3396,7 +3406,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
|
||||
|
||||
ioc = q_to_ioc(q);
|
||||
if (!ioc) {
|
||||
ret = blk_iocost_init(bdev->bd_disk);
|
||||
ret = blk_iocost_init(ctx.bdev->bd_disk);
|
||||
if (ret)
|
||||
goto err;
|
||||
ioc = q_to_ioc(q);
|
||||
@ -3409,7 +3419,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
|
||||
memcpy(u, ioc->params.i_lcoefs, sizeof(u));
|
||||
user = ioc->user_cost_model;
|
||||
|
||||
while ((p = strsep(&input, " \t\n"))) {
|
||||
while ((p = strsep(&body, " \t\n"))) {
|
||||
substring_t args[MAX_OPT_ARGS];
|
||||
char buf[32];
|
||||
int tok;
|
||||
@ -3456,7 +3466,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
|
||||
blk_mq_unquiesce_queue(q);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
blkdev_put_no_open(bdev);
|
||||
blkg_conf_exit(&ctx);
|
||||
return nbytes;
|
||||
|
||||
einval:
|
||||
@ -3467,7 +3477,7 @@ einval:
|
||||
|
||||
ret = -EINVAL;
|
||||
err:
|
||||
blkdev_put_no_open(bdev);
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -755,7 +755,7 @@ static void blkiolatency_enable_work_fn(struct work_struct *work)
|
||||
}
|
||||
}
|
||||
|
||||
int blk_iolatency_init(struct gendisk *disk)
|
||||
static int blk_iolatency_init(struct gendisk *disk)
|
||||
{
|
||||
struct blk_iolatency *blkiolat;
|
||||
int ret;
|
||||
@ -824,6 +824,29 @@ static void iolatency_clear_scaling(struct blkcg_gq *blkg)
|
||||
}
|
||||
}
|
||||
|
||||
static int blk_iolatency_try_init(struct blkg_conf_ctx *ctx)
|
||||
{
|
||||
static DEFINE_MUTEX(init_mutex);
|
||||
int ret;
|
||||
|
||||
ret = blkg_conf_open_bdev(ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* blk_iolatency_init() may fail after rq_qos_add() succeeds which can
|
||||
* confuse iolat_rq_qos() test. Make the test and init atomic.
|
||||
*/
|
||||
mutex_lock(&init_mutex);
|
||||
|
||||
if (!iolat_rq_qos(ctx->bdev->bd_queue))
|
||||
ret = blk_iolatency_init(ctx->bdev->bd_disk);
|
||||
|
||||
mutex_unlock(&init_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
|
||||
size_t nbytes, loff_t off)
|
||||
{
|
||||
@ -836,9 +859,15 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
|
||||
u64 oldval;
|
||||
int ret;
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
|
||||
blkg_conf_init(&ctx, buf);
|
||||
|
||||
ret = blk_iolatency_try_init(&ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, &ctx);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
iolat = blkg_to_lat(ctx.blkg);
|
||||
p = ctx.body;
|
||||
@ -874,7 +903,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
|
||||
iolatency_clear_scaling(blkg);
|
||||
ret = 0;
|
||||
out:
|
||||
blkg_conf_finish(&ctx);
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
@ -967,7 +996,7 @@ static void iolatency_pd_init(struct blkg_policy_data *pd)
|
||||
{
|
||||
struct iolatency_grp *iolat = pd_to_lat(pd);
|
||||
struct blkcg_gq *blkg = lat_to_blkg(iolat);
|
||||
struct rq_qos *rqos = blkcg_rq_qos(blkg->q);
|
||||
struct rq_qos *rqos = iolat_rq_qos(blkg->q);
|
||||
struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
|
||||
u64 now = ktime_to_ns(ktime_get());
|
||||
int cpu;
|
||||
|
@ -867,6 +867,8 @@ static struct request *attempt_merge(struct request_queue *q,
|
||||
if (!blk_discard_mergable(req))
|
||||
elv_merge_requests(q, req, next);
|
||||
|
||||
blk_crypto_rq_put_keyslot(next);
|
||||
|
||||
/*
|
||||
* 'next' is going away, so update stats accordingly
|
||||
*/
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/group_cpus.h>
|
||||
|
||||
#include <linux/blk-mq.h>
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
|
||||
|
@ -7,41 +7,14 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
#include <linux/blk-mq.h>
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-rq-qos.h"
|
||||
|
||||
static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
|
||||
{
|
||||
if (stat->nr_samples) {
|
||||
seq_printf(m, "samples=%d, mean=%llu, min=%llu, max=%llu",
|
||||
stat->nr_samples, stat->mean, stat->min, stat->max);
|
||||
} else {
|
||||
seq_puts(m, "samples=0");
|
||||
}
|
||||
}
|
||||
|
||||
static int queue_poll_stat_show(void *data, struct seq_file *m)
|
||||
{
|
||||
struct request_queue *q = data;
|
||||
int bucket;
|
||||
|
||||
if (!q->poll_stat)
|
||||
return 0;
|
||||
|
||||
for (bucket = 0; bucket < (BLK_MQ_POLL_STATS_BKTS / 2); bucket++) {
|
||||
seq_printf(m, "read (%d Bytes): ", 1 << (9 + bucket));
|
||||
print_stat(m, &q->poll_stat[2 * bucket]);
|
||||
seq_puts(m, "\n");
|
||||
|
||||
seq_printf(m, "write (%d Bytes): ", 1 << (9 + bucket));
|
||||
print_stat(m, &q->poll_stat[2 * bucket + 1]);
|
||||
seq_puts(m, "\n");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -282,7 +255,6 @@ static const char *const rqf_name[] = {
|
||||
RQF_NAME(STATS),
|
||||
RQF_NAME(SPECIAL_PAYLOAD),
|
||||
RQF_NAME(ZONE_WRITE_LOCKED),
|
||||
RQF_NAME(MQ_POLL_SLEPT),
|
||||
RQF_NAME(TIMED_OUT),
|
||||
RQF_NAME(ELV),
|
||||
RQF_NAME(RESV),
|
||||
|
@ -4,7 +4,6 @@
|
||||
*/
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blk-mq-pci.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/module.h>
|
||||
|
@ -1,44 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (c) 2017 Sagi Grimberg.
|
||||
*/
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blk-mq-rdma.h>
|
||||
#include <rdma/ib_verbs.h>
|
||||
|
||||
/**
|
||||
* blk_mq_rdma_map_queues - provide a default queue mapping for rdma device
|
||||
* @map: CPU to hardware queue map.
|
||||
* @dev: rdma device to provide a mapping for.
|
||||
* @first_vec: first interrupt vectors to use for queues (usually 0)
|
||||
*
|
||||
* This function assumes the rdma device @dev has at least as many available
|
||||
* interrupt vetors as @set has queues. It will then query it's affinity mask
|
||||
* and built queue mapping that maps a queue to the CPUs that have irq affinity
|
||||
* for the corresponding vector.
|
||||
*
|
||||
* In case either the driver passed a @dev with less vectors than
|
||||
* @set->nr_hw_queues, or @dev does not provide an affinity mask for a
|
||||
* vector, we fallback to the naive mapping.
|
||||
*/
|
||||
void blk_mq_rdma_map_queues(struct blk_mq_queue_map *map,
|
||||
struct ib_device *dev, int first_vec)
|
||||
{
|
||||
const struct cpumask *mask;
|
||||
unsigned int queue, cpu;
|
||||
|
||||
for (queue = 0; queue < map->nr_queues; queue++) {
|
||||
mask = ib_get_vector_affinity(dev, first_vec + queue);
|
||||
if (!mask)
|
||||
goto fallback;
|
||||
|
||||
for_each_cpu(cpu, mask)
|
||||
map->mq_map[cpu] = map->queue_offset + queue;
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
fallback:
|
||||
blk_mq_map_queues(map);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_rdma_map_queues);
|
@ -6,7 +6,6 @@
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/list_sort.h>
|
||||
|
||||
#include <trace/events/block.h>
|
||||
@ -15,7 +14,6 @@
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-wbt.h"
|
||||
|
||||
/*
|
||||
@ -271,9 +269,7 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
|
||||
|
||||
static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
const bool has_sched = q->elevator;
|
||||
int ret = 0;
|
||||
bool need_dispatch = false;
|
||||
LIST_HEAD(rq_list);
|
||||
|
||||
/*
|
||||
@ -302,23 +298,22 @@ static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||
*/
|
||||
if (!list_empty(&rq_list)) {
|
||||
blk_mq_sched_mark_restart_hctx(hctx);
|
||||
if (blk_mq_dispatch_rq_list(hctx, &rq_list, 0)) {
|
||||
if (has_sched)
|
||||
ret = blk_mq_do_dispatch_sched(hctx);
|
||||
else
|
||||
ret = blk_mq_do_dispatch_ctx(hctx);
|
||||
}
|
||||
} else if (has_sched) {
|
||||
ret = blk_mq_do_dispatch_sched(hctx);
|
||||
} else if (hctx->dispatch_busy) {
|
||||
/* dequeue request one by one from sw queue if queue is busy */
|
||||
ret = blk_mq_do_dispatch_ctx(hctx);
|
||||
if (!blk_mq_dispatch_rq_list(hctx, &rq_list, 0))
|
||||
return 0;
|
||||
need_dispatch = true;
|
||||
} else {
|
||||
blk_mq_flush_busy_ctxs(hctx, &rq_list);
|
||||
blk_mq_dispatch_rq_list(hctx, &rq_list, 0);
|
||||
need_dispatch = hctx->dispatch_busy;
|
||||
}
|
||||
|
||||
return ret;
|
||||
if (hctx->queue->elevator)
|
||||
return blk_mq_do_dispatch_sched(hctx);
|
||||
|
||||
/* dequeue request one by one from sw queue if queue is busy */
|
||||
if (need_dispatch)
|
||||
return blk_mq_do_dispatch_ctx(hctx);
|
||||
blk_mq_flush_busy_ctxs(hctx, &rq_list);
|
||||
blk_mq_dispatch_rq_list(hctx, &rq_list, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||
@ -384,116 +379,6 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
|
||||
|
||||
static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq)
|
||||
{
|
||||
/*
|
||||
* dispatch flush and passthrough rq directly
|
||||
*
|
||||
* passthrough request has to be added to hctx->dispatch directly.
|
||||
* For some reason, device may be in one situation which can't
|
||||
* handle FS request, so STS_RESOURCE is always returned and the
|
||||
* FS request will be added to hctx->dispatch. However passthrough
|
||||
* request may be required at that time for fixing the problem. If
|
||||
* passthrough request is added to scheduler queue, there isn't any
|
||||
* chance to dispatch it given we prioritize requests in hctx->dispatch.
|
||||
*/
|
||||
if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void blk_mq_sched_insert_request(struct request *rq, bool at_head,
|
||||
bool run_queue, bool async)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
struct elevator_queue *e = q->elevator;
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
||||
|
||||
WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG));
|
||||
|
||||
if (blk_mq_sched_bypass_insert(hctx, rq)) {
|
||||
/*
|
||||
* Firstly normal IO request is inserted to scheduler queue or
|
||||
* sw queue, meantime we add flush request to dispatch queue(
|
||||
* hctx->dispatch) directly and there is at most one in-flight
|
||||
* flush request for each hw queue, so it doesn't matter to add
|
||||
* flush request to tail or front of the dispatch queue.
|
||||
*
|
||||
* Secondly in case of NCQ, flush request belongs to non-NCQ
|
||||
* command, and queueing it will fail when there is any
|
||||
* in-flight normal IO request(NCQ command). When adding flush
|
||||
* rq to the front of hctx->dispatch, it is easier to introduce
|
||||
* extra time to flush rq's latency because of S_SCHED_RESTART
|
||||
* compared with adding to the tail of dispatch queue, then
|
||||
* chance of flush merge is increased, and less flush requests
|
||||
* will be issued to controller. It is observed that ~10% time
|
||||
* is saved in blktests block/004 on disk attached to AHCI/NCQ
|
||||
* drive when adding flush rq to the front of hctx->dispatch.
|
||||
*
|
||||
* Simply queue flush rq to the front of hctx->dispatch so that
|
||||
* intensive flush workloads can benefit in case of NCQ HW.
|
||||
*/
|
||||
at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head;
|
||||
blk_mq_request_bypass_insert(rq, at_head, false);
|
||||
goto run;
|
||||
}
|
||||
|
||||
if (e) {
|
||||
LIST_HEAD(list);
|
||||
|
||||
list_add(&rq->queuelist, &list);
|
||||
e->type->ops.insert_requests(hctx, &list, at_head);
|
||||
} else {
|
||||
spin_lock(&ctx->lock);
|
||||
__blk_mq_insert_request(hctx, rq, at_head);
|
||||
spin_unlock(&ctx->lock);
|
||||
}
|
||||
|
||||
run:
|
||||
if (run_queue)
|
||||
blk_mq_run_hw_queue(hctx, async);
|
||||
}
|
||||
|
||||
void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_ctx *ctx,
|
||||
struct list_head *list, bool run_queue_async)
|
||||
{
|
||||
struct elevator_queue *e;
|
||||
struct request_queue *q = hctx->queue;
|
||||
|
||||
/*
|
||||
* blk_mq_sched_insert_requests() is called from flush plug
|
||||
* context only, and hold one usage counter to prevent queue
|
||||
* from being released.
|
||||
*/
|
||||
percpu_ref_get(&q->q_usage_counter);
|
||||
|
||||
e = hctx->queue->elevator;
|
||||
if (e) {
|
||||
e->type->ops.insert_requests(hctx, list, false);
|
||||
} else {
|
||||
/*
|
||||
* try to issue requests directly if the hw queue isn't
|
||||
* busy in case of 'none' scheduler, and this way may save
|
||||
* us one extra enqueue & dequeue to sw queue.
|
||||
*/
|
||||
if (!hctx->dispatch_busy && !run_queue_async) {
|
||||
blk_mq_run_dispatch_ops(hctx->queue,
|
||||
blk_mq_try_issue_list_directly(hctx, list));
|
||||
if (list_empty(list))
|
||||
goto out;
|
||||
}
|
||||
blk_mq_insert_requests(hctx, ctx, list);
|
||||
}
|
||||
|
||||
blk_mq_run_hw_queue(hctx, run_queue_async);
|
||||
out:
|
||||
percpu_ref_put(&q->q_usage_counter);
|
||||
}
|
||||
|
||||
static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q,
|
||||
struct blk_mq_hw_ctx *hctx,
|
||||
unsigned int hctx_idx)
|
||||
|
@ -4,7 +4,6 @@
|
||||
|
||||
#include "elevator.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
#define MAX_SCHED_RQ (16 * BLKDEV_DEFAULT_RQ)
|
||||
|
||||
@ -17,12 +16,6 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq,
|
||||
void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx);
|
||||
void __blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx);
|
||||
|
||||
void blk_mq_sched_insert_request(struct request *rq, bool at_head,
|
||||
bool run_queue, bool async);
|
||||
void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_ctx *ctx,
|
||||
struct list_head *list, bool run_queue_async);
|
||||
|
||||
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
|
||||
|
||||
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
|
||||
|
@ -10,10 +10,8 @@
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <linux/blk-mq.h>
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
static void blk_mq_sysfs_release(struct kobject *kobj)
|
||||
{
|
||||
|
@ -9,12 +9,10 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/delay.h>
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
/*
|
||||
* Recalculate wakeup batch when tag is shared by hctx.
|
||||
|
@ -1,73 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef INT_BLK_MQ_TAG_H
|
||||
#define INT_BLK_MQ_TAG_H
|
||||
|
||||
struct blk_mq_alloc_data;
|
||||
|
||||
extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
|
||||
unsigned int reserved_tags,
|
||||
int node, int alloc_policy);
|
||||
extern void blk_mq_free_tags(struct blk_mq_tags *tags);
|
||||
extern int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags,
|
||||
struct sbitmap_queue *breserved_tags,
|
||||
unsigned int queue_depth,
|
||||
unsigned int reserved,
|
||||
int node, int alloc_policy);
|
||||
|
||||
extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
|
||||
unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags,
|
||||
unsigned int *offset);
|
||||
extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
|
||||
unsigned int tag);
|
||||
void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags);
|
||||
extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_tags **tags,
|
||||
unsigned int depth, bool can_grow);
|
||||
extern void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set,
|
||||
unsigned int size);
|
||||
extern void blk_mq_tag_update_sched_shared_tags(struct request_queue *q);
|
||||
|
||||
extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
|
||||
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
|
||||
void *priv);
|
||||
void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
|
||||
void *priv);
|
||||
|
||||
static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt,
|
||||
struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (!hctx)
|
||||
return &bt->ws[0];
|
||||
return sbq_wait_ptr(bt, &hctx->wait_index);
|
||||
}
|
||||
|
||||
enum {
|
||||
BLK_MQ_NO_TAG = -1U,
|
||||
BLK_MQ_TAG_MIN = 1,
|
||||
BLK_MQ_TAG_MAX = BLK_MQ_NO_TAG - 1,
|
||||
};
|
||||
|
||||
extern void __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
|
||||
extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *);
|
||||
|
||||
static inline void blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
|
||||
__blk_mq_tag_busy(hctx);
|
||||
}
|
||||
|
||||
static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
|
||||
return;
|
||||
|
||||
__blk_mq_tag_idle(hctx);
|
||||
}
|
||||
|
||||
static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags,
|
||||
unsigned int tag)
|
||||
{
|
||||
return tag < tags->nr_reserved_tags;
|
||||
}
|
||||
|
||||
#endif
|
@ -3,7 +3,6 @@
|
||||
* Copyright (c) 2016 Christoph Hellwig.
|
||||
*/
|
||||
#include <linux/device.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blk-mq-virtio.h>
|
||||
#include <linux/virtio_config.h>
|
||||
#include <linux/module.h>
|
||||
|
667
block/blk-mq.c
667
block/blk-mq.c
@ -32,12 +32,10 @@
|
||||
|
||||
#include <trace/events/block.h>
|
||||
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/t10-pi.h>
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-pm.h"
|
||||
#include "blk-stat.h"
|
||||
#include "blk-mq-sched.h"
|
||||
@ -46,51 +44,19 @@
|
||||
|
||||
static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
|
||||
|
||||
static void blk_mq_poll_stats_start(struct request_queue *q);
|
||||
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
|
||||
|
||||
static int blk_mq_poll_stats_bkt(const struct request *rq)
|
||||
{
|
||||
int ddir, sectors, bucket;
|
||||
|
||||
ddir = rq_data_dir(rq);
|
||||
sectors = blk_rq_stats_sectors(rq);
|
||||
|
||||
bucket = ddir + 2 * ilog2(sectors);
|
||||
|
||||
if (bucket < 0)
|
||||
return -1;
|
||||
else if (bucket >= BLK_MQ_POLL_STATS_BKTS)
|
||||
return ddir + BLK_MQ_POLL_STATS_BKTS - 2;
|
||||
|
||||
return bucket;
|
||||
}
|
||||
|
||||
#define BLK_QC_T_SHIFT 16
|
||||
#define BLK_QC_T_INTERNAL (1U << 31)
|
||||
static void blk_mq_insert_request(struct request *rq, blk_insert_t flags);
|
||||
static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *list);
|
||||
|
||||
static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q,
|
||||
blk_qc_t qc)
|
||||
{
|
||||
return xa_load(&q->hctx_table,
|
||||
(qc & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT);
|
||||
}
|
||||
|
||||
static inline struct request *blk_qc_to_rq(struct blk_mq_hw_ctx *hctx,
|
||||
blk_qc_t qc)
|
||||
{
|
||||
unsigned int tag = qc & ((1U << BLK_QC_T_SHIFT) - 1);
|
||||
|
||||
if (qc & BLK_QC_T_INTERNAL)
|
||||
return blk_mq_tag_to_rq(hctx->sched_tags, tag);
|
||||
return blk_mq_tag_to_rq(hctx->tags, tag);
|
||||
return xa_load(&q->hctx_table, qc);
|
||||
}
|
||||
|
||||
static inline blk_qc_t blk_rq_to_qc(struct request *rq)
|
||||
{
|
||||
return (rq->mq_hctx->queue_num << BLK_QC_T_SHIFT) |
|
||||
(rq->tag != -1 ?
|
||||
rq->tag : (rq->internal_tag | BLK_QC_T_INTERNAL));
|
||||
return rq->mq_hctx->queue_num;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -840,6 +806,12 @@ static void blk_complete_request(struct request *req)
|
||||
req->q->integrity.profile->complete_fn(req, total_bytes);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Upper layers may call blk_crypto_evict_key() anytime after the last
|
||||
* bio_endio(). Therefore, the keyslot must be released before that.
|
||||
*/
|
||||
blk_crypto_rq_put_keyslot(req);
|
||||
|
||||
blk_account_io_completion(req, total_bytes);
|
||||
|
||||
do {
|
||||
@ -905,6 +877,13 @@ bool blk_update_request(struct request *req, blk_status_t error,
|
||||
req->q->integrity.profile->complete_fn(req, nr_bytes);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Upper layers may call blk_crypto_evict_key() anytime after the last
|
||||
* bio_endio(). Therefore, the keyslot must be released before that.
|
||||
*/
|
||||
if (blk_crypto_rq_has_keyslot(req) && nr_bytes >= blk_rq_bytes(req))
|
||||
__blk_crypto_rq_put_keyslot(req);
|
||||
|
||||
if (unlikely(error && !blk_rq_is_passthrough(req) &&
|
||||
!(req->rq_flags & RQF_QUIET)) &&
|
||||
!test_bit(GD_DEAD, &req->q->disk->state)) {
|
||||
@ -976,17 +955,6 @@ bool blk_update_request(struct request *req, blk_status_t error,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_update_request);
|
||||
|
||||
static void __blk_account_io_done(struct request *req, u64 now)
|
||||
{
|
||||
const int sgrp = op_stat_group(req_op(req));
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(req->part, jiffies, true);
|
||||
part_stat_inc(req->part, ios[sgrp]);
|
||||
part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns);
|
||||
part_stat_unlock();
|
||||
}
|
||||
|
||||
static inline void blk_account_io_done(struct request *req, u64 now)
|
||||
{
|
||||
/*
|
||||
@ -995,40 +963,41 @@ static inline void blk_account_io_done(struct request *req, u64 now)
|
||||
* containing request is enough.
|
||||
*/
|
||||
if (blk_do_io_stat(req) && req->part &&
|
||||
!(req->rq_flags & RQF_FLUSH_SEQ))
|
||||
__blk_account_io_done(req, now);
|
||||
}
|
||||
!(req->rq_flags & RQF_FLUSH_SEQ)) {
|
||||
const int sgrp = op_stat_group(req_op(req));
|
||||
|
||||
static void __blk_account_io_start(struct request *rq)
|
||||
{
|
||||
/*
|
||||
* All non-passthrough requests are created from a bio with one
|
||||
* exception: when a flush command that is part of a flush sequence
|
||||
* generated by the state machine in blk-flush.c is cloned onto the
|
||||
* lower device by dm-multipath we can get here without a bio.
|
||||
*/
|
||||
if (rq->bio)
|
||||
rq->part = rq->bio->bi_bdev;
|
||||
else
|
||||
rq->part = rq->q->disk->part0;
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(rq->part, jiffies, false);
|
||||
part_stat_unlock();
|
||||
part_stat_lock();
|
||||
update_io_ticks(req->part, jiffies, true);
|
||||
part_stat_inc(req->part, ios[sgrp]);
|
||||
part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns);
|
||||
part_stat_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void blk_account_io_start(struct request *req)
|
||||
{
|
||||
if (blk_do_io_stat(req))
|
||||
__blk_account_io_start(req);
|
||||
if (blk_do_io_stat(req)) {
|
||||
/*
|
||||
* All non-passthrough requests are created from a bio with one
|
||||
* exception: when a flush command that is part of a flush sequence
|
||||
* generated by the state machine in blk-flush.c is cloned onto the
|
||||
* lower device by dm-multipath we can get here without a bio.
|
||||
*/
|
||||
if (req->bio)
|
||||
req->part = req->bio->bi_bdev;
|
||||
else
|
||||
req->part = req->q->disk->part0;
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(req->part, jiffies, false);
|
||||
part_stat_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void __blk_mq_end_request_acct(struct request *rq, u64 now)
|
||||
{
|
||||
if (rq->rq_flags & RQF_STATS) {
|
||||
blk_mq_poll_stats_start(rq->q);
|
||||
if (rq->rq_flags & RQF_STATS)
|
||||
blk_stat_add(rq, now);
|
||||
}
|
||||
|
||||
blk_mq_sched_completed_request(rq, now);
|
||||
blk_account_io_done(rq, now);
|
||||
@ -1322,6 +1291,8 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
|
||||
*/
|
||||
void blk_execute_rq_nowait(struct request *rq, bool at_head)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
||||
|
||||
WARN_ON(irqs_disabled());
|
||||
WARN_ON(!blk_rq_is_passthrough(rq));
|
||||
|
||||
@ -1332,10 +1303,13 @@ void blk_execute_rq_nowait(struct request *rq, bool at_head)
|
||||
* device, directly accessing the plug instead of using blk_mq_plug()
|
||||
* should not have any consequences.
|
||||
*/
|
||||
if (current->plug)
|
||||
if (current->plug && !at_head) {
|
||||
blk_add_rq_to_plug(current->plug, rq);
|
||||
else
|
||||
blk_mq_sched_insert_request(rq, at_head, true, false);
|
||||
return;
|
||||
}
|
||||
|
||||
blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
|
||||
|
||||
@ -1383,6 +1357,7 @@ static void blk_rq_poll_completion(struct request *rq, struct completion *wait)
|
||||
*/
|
||||
blk_status_t blk_execute_rq(struct request *rq, bool at_head)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
||||
struct blk_rq_wait wait = {
|
||||
.done = COMPLETION_INITIALIZER_ONSTACK(wait.done),
|
||||
};
|
||||
@ -1394,7 +1369,8 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head)
|
||||
rq->end_io = blk_end_sync_rq;
|
||||
|
||||
blk_account_io_start(rq);
|
||||
blk_mq_sched_insert_request(rq, at_head, true, false);
|
||||
blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
|
||||
if (blk_rq_is_poll(rq)) {
|
||||
blk_rq_poll_completion(rq, &wait.done);
|
||||
@ -1434,12 +1410,17 @@ static void __blk_mq_requeue_request(struct request *rq)
|
||||
|
||||
void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
|
||||
__blk_mq_requeue_request(rq);
|
||||
|
||||
/* this request will be re-inserted to io scheduler queue */
|
||||
blk_mq_sched_requeue_request(rq);
|
||||
|
||||
blk_mq_add_to_requeue_list(rq, true, kick_requeue_list);
|
||||
blk_mq_add_to_requeue_list(rq, BLK_MQ_INSERT_AT_HEAD);
|
||||
|
||||
if (kick_requeue_list)
|
||||
blk_mq_kick_requeue_list(q);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_requeue_request);
|
||||
|
||||
@ -1455,33 +1436,33 @@ static void blk_mq_requeue_work(struct work_struct *work)
|
||||
spin_unlock_irq(&q->requeue_lock);
|
||||
|
||||
list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
|
||||
if (!(rq->rq_flags & (RQF_SOFTBARRIER | RQF_DONTPREP)))
|
||||
continue;
|
||||
|
||||
rq->rq_flags &= ~RQF_SOFTBARRIER;
|
||||
list_del_init(&rq->queuelist);
|
||||
/*
|
||||
* If RQF_DONTPREP, rq has contained some driver specific
|
||||
* data, so insert it to hctx dispatch list to avoid any
|
||||
* merge.
|
||||
* If RQF_DONTPREP ist set, the request has been started by the
|
||||
* driver already and might have driver-specific data allocated
|
||||
* already. Insert it into the hctx dispatch list to avoid
|
||||
* block layer merges for the request.
|
||||
*/
|
||||
if (rq->rq_flags & RQF_DONTPREP)
|
||||
blk_mq_request_bypass_insert(rq, false, false);
|
||||
else
|
||||
blk_mq_sched_insert_request(rq, true, false, false);
|
||||
if (rq->rq_flags & RQF_DONTPREP) {
|
||||
rq->rq_flags &= ~RQF_SOFTBARRIER;
|
||||
list_del_init(&rq->queuelist);
|
||||
blk_mq_request_bypass_insert(rq, 0);
|
||||
} else if (rq->rq_flags & RQF_SOFTBARRIER) {
|
||||
rq->rq_flags &= ~RQF_SOFTBARRIER;
|
||||
list_del_init(&rq->queuelist);
|
||||
blk_mq_insert_request(rq, BLK_MQ_INSERT_AT_HEAD);
|
||||
}
|
||||
}
|
||||
|
||||
while (!list_empty(&rq_list)) {
|
||||
rq = list_entry(rq_list.next, struct request, queuelist);
|
||||
list_del_init(&rq->queuelist);
|
||||
blk_mq_sched_insert_request(rq, false, false, false);
|
||||
blk_mq_insert_request(rq, 0);
|
||||
}
|
||||
|
||||
blk_mq_run_hw_queues(q, false);
|
||||
}
|
||||
|
||||
void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
|
||||
bool kick_requeue_list)
|
||||
void blk_mq_add_to_requeue_list(struct request *rq, blk_insert_t insert_flags)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
unsigned long flags;
|
||||
@ -1493,16 +1474,13 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
|
||||
BUG_ON(rq->rq_flags & RQF_SOFTBARRIER);
|
||||
|
||||
spin_lock_irqsave(&q->requeue_lock, flags);
|
||||
if (at_head) {
|
||||
if (insert_flags & BLK_MQ_INSERT_AT_HEAD) {
|
||||
rq->rq_flags |= RQF_SOFTBARRIER;
|
||||
list_add(&rq->queuelist, &q->requeue_list);
|
||||
} else {
|
||||
list_add_tail(&rq->queuelist, &q->requeue_list);
|
||||
}
|
||||
spin_unlock_irqrestore(&q->requeue_lock, flags);
|
||||
|
||||
if (kick_requeue_list)
|
||||
blk_mq_kick_requeue_list(q);
|
||||
}
|
||||
|
||||
void blk_mq_kick_requeue_list(struct request_queue *q)
|
||||
@ -2158,24 +2136,6 @@ out:
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* __blk_mq_run_hw_queue - Run a hardware queue.
|
||||
* @hctx: Pointer to the hardware queue to run.
|
||||
*
|
||||
* Send pending requests to the hardware.
|
||||
*/
|
||||
static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
/*
|
||||
* We can't run the queue inline with ints disabled. Ensure that
|
||||
* we catch bad users of this early.
|
||||
*/
|
||||
WARN_ON_ONCE(in_interrupt());
|
||||
|
||||
blk_mq_run_dispatch_ops(hctx->queue,
|
||||
blk_mq_sched_dispatch_requests(hctx));
|
||||
}
|
||||
|
||||
static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask);
|
||||
@ -2231,32 +2191,6 @@ select_cpu:
|
||||
return next_cpu;
|
||||
}
|
||||
|
||||
/**
|
||||
* __blk_mq_delay_run_hw_queue - Run (or schedule to run) a hardware queue.
|
||||
* @hctx: Pointer to the hardware queue to run.
|
||||
* @async: If we want to run the queue asynchronously.
|
||||
* @msecs: Milliseconds of delay to wait before running the queue.
|
||||
*
|
||||
* If !@async, try to run the queue now. Else, run the queue asynchronously and
|
||||
* with a delay of @msecs.
|
||||
*/
|
||||
static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
|
||||
unsigned long msecs)
|
||||
{
|
||||
if (unlikely(blk_mq_hctx_stopped(hctx)))
|
||||
return;
|
||||
|
||||
if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
|
||||
if (cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
|
||||
__blk_mq_run_hw_queue(hctx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
|
||||
msecs_to_jiffies(msecs));
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously.
|
||||
* @hctx: Pointer to the hardware queue to run.
|
||||
@ -2266,7 +2200,10 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
|
||||
*/
|
||||
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
|
||||
{
|
||||
__blk_mq_delay_run_hw_queue(hctx, true, msecs);
|
||||
if (unlikely(blk_mq_hctx_stopped(hctx)))
|
||||
return;
|
||||
kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
|
||||
msecs_to_jiffies(msecs));
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
|
||||
|
||||
@ -2283,6 +2220,11 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
|
||||
{
|
||||
bool need_run;
|
||||
|
||||
/*
|
||||
* We can't run the queue inline with interrupts disabled.
|
||||
*/
|
||||
WARN_ON_ONCE(!async && in_interrupt());
|
||||
|
||||
/*
|
||||
* When queue is quiesced, we may be switching io scheduler, or
|
||||
* updating nr_hw_queues, or other things, and we can't run queue
|
||||
@ -2295,8 +2237,17 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
|
||||
need_run = !blk_queue_quiesced(hctx->queue) &&
|
||||
blk_mq_hctx_has_pending(hctx));
|
||||
|
||||
if (need_run)
|
||||
__blk_mq_delay_run_hw_queue(hctx, async, 0);
|
||||
if (!need_run)
|
||||
return;
|
||||
|
||||
if (async || (hctx->flags & BLK_MQ_F_BLOCKING) ||
|
||||
!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
|
||||
blk_mq_delay_run_hw_queue(hctx, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
blk_mq_run_dispatch_ops(hctx->queue,
|
||||
blk_mq_sched_dispatch_requests(hctx));
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_run_hw_queue);
|
||||
|
||||
@ -2461,79 +2412,51 @@ EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
|
||||
|
||||
static void blk_mq_run_work_fn(struct work_struct *work)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
struct blk_mq_hw_ctx *hctx =
|
||||
container_of(work, struct blk_mq_hw_ctx, run_work.work);
|
||||
|
||||
hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
|
||||
|
||||
/*
|
||||
* If we are stopped, don't run the queue.
|
||||
*/
|
||||
if (blk_mq_hctx_stopped(hctx))
|
||||
return;
|
||||
|
||||
__blk_mq_run_hw_queue(hctx);
|
||||
}
|
||||
|
||||
static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq,
|
||||
bool at_head)
|
||||
{
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
enum hctx_type type = hctx->type;
|
||||
|
||||
lockdep_assert_held(&ctx->lock);
|
||||
|
||||
trace_block_rq_insert(rq);
|
||||
|
||||
if (at_head)
|
||||
list_add(&rq->queuelist, &ctx->rq_lists[type]);
|
||||
else
|
||||
list_add_tail(&rq->queuelist, &ctx->rq_lists[type]);
|
||||
}
|
||||
|
||||
void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bool at_head)
|
||||
{
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
|
||||
lockdep_assert_held(&ctx->lock);
|
||||
|
||||
__blk_mq_insert_req_list(hctx, rq, at_head);
|
||||
blk_mq_hctx_mark_pending(hctx, ctx);
|
||||
blk_mq_run_dispatch_ops(hctx->queue,
|
||||
blk_mq_sched_dispatch_requests(hctx));
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_request_bypass_insert - Insert a request at dispatch list.
|
||||
* @rq: Pointer to request to be inserted.
|
||||
* @at_head: true if the request should be inserted at the head of the list.
|
||||
* @run_queue: If we should run the hardware queue after inserting the request.
|
||||
* @flags: BLK_MQ_INSERT_*
|
||||
*
|
||||
* Should only be used carefully, when the caller knows we want to
|
||||
* bypass a potential IO scheduler on the target device.
|
||||
*/
|
||||
void blk_mq_request_bypass_insert(struct request *rq, bool at_head,
|
||||
bool run_queue)
|
||||
void blk_mq_request_bypass_insert(struct request *rq, blk_insert_t flags)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
||||
|
||||
spin_lock(&hctx->lock);
|
||||
if (at_head)
|
||||
if (flags & BLK_MQ_INSERT_AT_HEAD)
|
||||
list_add(&rq->queuelist, &hctx->dispatch);
|
||||
else
|
||||
list_add_tail(&rq->queuelist, &hctx->dispatch);
|
||||
spin_unlock(&hctx->lock);
|
||||
|
||||
if (run_queue)
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
}
|
||||
|
||||
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
|
||||
struct list_head *list)
|
||||
|
||||
static void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_ctx *ctx, struct list_head *list,
|
||||
bool run_queue_async)
|
||||
{
|
||||
struct request *rq;
|
||||
enum hctx_type type = hctx->type;
|
||||
|
||||
/*
|
||||
* Try to issue requests directly if the hw queue isn't busy to save an
|
||||
* extra enqueue & dequeue to the sw queue.
|
||||
*/
|
||||
if (!hctx->dispatch_busy && !run_queue_async) {
|
||||
blk_mq_run_dispatch_ops(hctx->queue,
|
||||
blk_mq_try_issue_list_directly(hctx, list));
|
||||
if (list_empty(list))
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* preemption doesn't flush plug list, so it's possible ctx->cpu is
|
||||
* offline now
|
||||
@ -2547,6 +2470,70 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
|
||||
list_splice_tail_init(list, &ctx->rq_lists[type]);
|
||||
blk_mq_hctx_mark_pending(hctx, ctx);
|
||||
spin_unlock(&ctx->lock);
|
||||
out:
|
||||
blk_mq_run_hw_queue(hctx, run_queue_async);
|
||||
}
|
||||
|
||||
static void blk_mq_insert_request(struct request *rq, blk_insert_t flags)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
||||
|
||||
if (blk_rq_is_passthrough(rq)) {
|
||||
/*
|
||||
* Passthrough request have to be added to hctx->dispatch
|
||||
* directly. The device may be in a situation where it can't
|
||||
* handle FS request, and always returns BLK_STS_RESOURCE for
|
||||
* them, which gets them added to hctx->dispatch.
|
||||
*
|
||||
* If a passthrough request is required to unblock the queues,
|
||||
* and it is added to the scheduler queue, there is no chance to
|
||||
* dispatch it given we prioritize requests in hctx->dispatch.
|
||||
*/
|
||||
blk_mq_request_bypass_insert(rq, flags);
|
||||
} else if (rq->rq_flags & RQF_FLUSH_SEQ) {
|
||||
/*
|
||||
* Firstly normal IO request is inserted to scheduler queue or
|
||||
* sw queue, meantime we add flush request to dispatch queue(
|
||||
* hctx->dispatch) directly and there is at most one in-flight
|
||||
* flush request for each hw queue, so it doesn't matter to add
|
||||
* flush request to tail or front of the dispatch queue.
|
||||
*
|
||||
* Secondly in case of NCQ, flush request belongs to non-NCQ
|
||||
* command, and queueing it will fail when there is any
|
||||
* in-flight normal IO request(NCQ command). When adding flush
|
||||
* rq to the front of hctx->dispatch, it is easier to introduce
|
||||
* extra time to flush rq's latency because of S_SCHED_RESTART
|
||||
* compared with adding to the tail of dispatch queue, then
|
||||
* chance of flush merge is increased, and less flush requests
|
||||
* will be issued to controller. It is observed that ~10% time
|
||||
* is saved in blktests block/004 on disk attached to AHCI/NCQ
|
||||
* drive when adding flush rq to the front of hctx->dispatch.
|
||||
*
|
||||
* Simply queue flush rq to the front of hctx->dispatch so that
|
||||
* intensive flush workloads can benefit in case of NCQ HW.
|
||||
*/
|
||||
blk_mq_request_bypass_insert(rq, BLK_MQ_INSERT_AT_HEAD);
|
||||
} else if (q->elevator) {
|
||||
LIST_HEAD(list);
|
||||
|
||||
WARN_ON_ONCE(rq->tag != BLK_MQ_NO_TAG);
|
||||
|
||||
list_add(&rq->queuelist, &list);
|
||||
q->elevator->type->ops.insert_requests(hctx, &list, flags);
|
||||
} else {
|
||||
trace_block_rq_insert(rq);
|
||||
|
||||
spin_lock(&ctx->lock);
|
||||
if (flags & BLK_MQ_INSERT_AT_HEAD)
|
||||
list_add(&rq->queuelist, &ctx->rq_lists[hctx->type]);
|
||||
else
|
||||
list_add_tail(&rq->queuelist,
|
||||
&ctx->rq_lists[hctx->type]);
|
||||
blk_mq_hctx_mark_pending(hctx, ctx);
|
||||
spin_unlock(&ctx->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
|
||||
@ -2600,49 +2587,19 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq,
|
||||
bool bypass_insert, bool last)
|
||||
static bool blk_mq_get_budget_and_tag(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
bool run_queue = true;
|
||||
int budget_token;
|
||||
|
||||
/*
|
||||
* RCU or SRCU read lock is needed before checking quiesced flag.
|
||||
*
|
||||
* When queue is stopped or quiesced, ignore 'bypass_insert' from
|
||||
* blk_mq_request_issue_directly(), and return BLK_STS_OK to caller,
|
||||
* and avoid driver to try to dispatch again.
|
||||
*/
|
||||
if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
|
||||
run_queue = false;
|
||||
bypass_insert = false;
|
||||
goto insert;
|
||||
}
|
||||
|
||||
if ((rq->rq_flags & RQF_ELV) && !bypass_insert)
|
||||
goto insert;
|
||||
|
||||
budget_token = blk_mq_get_dispatch_budget(q);
|
||||
budget_token = blk_mq_get_dispatch_budget(rq->q);
|
||||
if (budget_token < 0)
|
||||
goto insert;
|
||||
|
||||
return false;
|
||||
blk_mq_set_rq_budget_token(rq, budget_token);
|
||||
|
||||
if (!blk_mq_get_driver_tag(rq)) {
|
||||
blk_mq_put_dispatch_budget(q, budget_token);
|
||||
goto insert;
|
||||
blk_mq_put_dispatch_budget(rq->q, budget_token);
|
||||
return false;
|
||||
}
|
||||
|
||||
return __blk_mq_issue_directly(hctx, rq, last);
|
||||
insert:
|
||||
if (bypass_insert)
|
||||
return BLK_STS_RESOURCE;
|
||||
|
||||
blk_mq_sched_insert_request(rq, false, run_queue, false);
|
||||
|
||||
return BLK_STS_OK;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2658,18 +2615,46 @@ insert:
|
||||
static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq)
|
||||
{
|
||||
blk_status_t ret =
|
||||
__blk_mq_try_issue_directly(hctx, rq, false, true);
|
||||
blk_status_t ret;
|
||||
|
||||
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
|
||||
blk_mq_request_bypass_insert(rq, false, true);
|
||||
else if (ret != BLK_STS_OK)
|
||||
if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) {
|
||||
blk_mq_insert_request(rq, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
if ((rq->rq_flags & RQF_ELV) || !blk_mq_get_budget_and_tag(rq)) {
|
||||
blk_mq_insert_request(rq, 0);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = __blk_mq_issue_directly(hctx, rq, true);
|
||||
switch (ret) {
|
||||
case BLK_STS_OK:
|
||||
break;
|
||||
case BLK_STS_RESOURCE:
|
||||
case BLK_STS_DEV_RESOURCE:
|
||||
blk_mq_request_bypass_insert(rq, 0);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
break;
|
||||
default:
|
||||
blk_mq_end_request(rq, ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
|
||||
{
|
||||
return __blk_mq_try_issue_directly(rq->mq_hctx, rq, true, last);
|
||||
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
||||
|
||||
if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) {
|
||||
blk_mq_insert_request(rq, 0);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
if (!blk_mq_get_budget_and_tag(rq))
|
||||
return BLK_STS_RESOURCE;
|
||||
return __blk_mq_issue_directly(hctx, rq, last);
|
||||
}
|
||||
|
||||
static void blk_mq_plug_issue_direct(struct blk_plug *plug)
|
||||
@ -2697,7 +2682,8 @@ static void blk_mq_plug_issue_direct(struct blk_plug *plug)
|
||||
break;
|
||||
case BLK_STS_RESOURCE:
|
||||
case BLK_STS_DEV_RESOURCE:
|
||||
blk_mq_request_bypass_insert(rq, false, true);
|
||||
blk_mq_request_bypass_insert(rq, 0);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
goto out;
|
||||
default:
|
||||
blk_mq_end_request(rq, ret);
|
||||
@ -2743,7 +2729,16 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
|
||||
|
||||
plug->mq_list = requeue_list;
|
||||
trace_block_unplug(this_hctx->queue, depth, !from_sched);
|
||||
blk_mq_sched_insert_requests(this_hctx, this_ctx, &list, from_sched);
|
||||
|
||||
percpu_ref_get(&this_hctx->queue->q_usage_counter);
|
||||
if (this_hctx->queue->elevator) {
|
||||
this_hctx->queue->elevator->type->ops.insert_requests(this_hctx,
|
||||
&list, 0);
|
||||
blk_mq_run_hw_queue(this_hctx, from_sched);
|
||||
} else {
|
||||
blk_mq_insert_requests(this_hctx, this_ctx, &list, from_sched);
|
||||
}
|
||||
percpu_ref_put(&this_hctx->queue->q_usage_counter);
|
||||
}
|
||||
|
||||
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
|
||||
@ -2789,7 +2784,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
|
||||
} while (!rq_list_empty(plug->mq_list));
|
||||
}
|
||||
|
||||
void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
|
||||
static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *list)
|
||||
{
|
||||
int queued = 0;
|
||||
@ -2807,8 +2802,9 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
|
||||
break;
|
||||
case BLK_STS_RESOURCE:
|
||||
case BLK_STS_DEV_RESOURCE:
|
||||
blk_mq_request_bypass_insert(rq, false,
|
||||
list_empty(list));
|
||||
blk_mq_request_bypass_insert(rq, 0);
|
||||
if (list_empty(list))
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
goto out;
|
||||
default:
|
||||
blk_mq_end_request(rq, ret);
|
||||
@ -2934,6 +2930,7 @@ void blk_mq_submit_bio(struct bio *bio)
|
||||
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
|
||||
struct blk_plug *plug = blk_mq_plug(bio);
|
||||
const int is_sync = op_is_sync(bio->bi_opf);
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
struct request *rq;
|
||||
unsigned int nr_segs = 1;
|
||||
blk_status_t ret;
|
||||
@ -2965,7 +2962,7 @@ void blk_mq_submit_bio(struct bio *bio)
|
||||
|
||||
blk_mq_bio_to_request(rq, bio, nr_segs);
|
||||
|
||||
ret = blk_crypto_init_request(rq);
|
||||
ret = blk_crypto_rq_get_keyslot(rq);
|
||||
if (ret != BLK_STS_OK) {
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
@ -2978,15 +2975,19 @@ void blk_mq_submit_bio(struct bio *bio)
|
||||
return;
|
||||
}
|
||||
|
||||
if (plug)
|
||||
if (plug) {
|
||||
blk_add_rq_to_plug(plug, rq);
|
||||
else if ((rq->rq_flags & RQF_ELV) ||
|
||||
(rq->mq_hctx->dispatch_busy &&
|
||||
(q->nr_hw_queues == 1 || !is_sync)))
|
||||
blk_mq_sched_insert_request(rq, false, true, true);
|
||||
else
|
||||
blk_mq_run_dispatch_ops(rq->q,
|
||||
blk_mq_try_issue_directly(rq->mq_hctx, rq));
|
||||
return;
|
||||
}
|
||||
|
||||
hctx = rq->mq_hctx;
|
||||
if ((rq->rq_flags & RQF_ELV) ||
|
||||
(hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) {
|
||||
blk_mq_insert_request(rq, 0);
|
||||
blk_mq_run_hw_queue(hctx, true);
|
||||
} else {
|
||||
blk_mq_run_dispatch_ops(q, blk_mq_try_issue_directly(hctx, rq));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_MQ_STACKING
|
||||
@ -3034,8 +3035,9 @@ blk_status_t blk_insert_cloned_request(struct request *rq)
|
||||
if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq)))
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
if (blk_crypto_insert_cloned_request(rq))
|
||||
return BLK_STS_IOERR;
|
||||
ret = blk_crypto_rq_get_keyslot(rq);
|
||||
if (ret != BLK_STS_OK)
|
||||
return ret;
|
||||
|
||||
blk_account_io_start(rq);
|
||||
|
||||
@ -4206,14 +4208,8 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
/* mark the queue as mq asap */
|
||||
q->mq_ops = set->ops;
|
||||
|
||||
q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn,
|
||||
blk_mq_poll_stats_bkt,
|
||||
BLK_MQ_POLL_STATS_BKTS, q);
|
||||
if (!q->poll_cb)
|
||||
goto err_exit;
|
||||
|
||||
if (blk_mq_alloc_ctxs(q))
|
||||
goto err_poll;
|
||||
goto err_exit;
|
||||
|
||||
/* init q->mq_kobj and sw queues' kobjects */
|
||||
blk_mq_sysfs_init(q);
|
||||
@ -4241,11 +4237,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
|
||||
q->nr_requests = set->queue_depth;
|
||||
|
||||
/*
|
||||
* Default to classic polling
|
||||
*/
|
||||
q->poll_nsec = BLK_MQ_POLL_CLASSIC;
|
||||
|
||||
blk_mq_init_cpu_queues(q, set->nr_hw_queues);
|
||||
blk_mq_add_queue_tag_set(set, q);
|
||||
blk_mq_map_swqueue(q);
|
||||
@ -4253,9 +4244,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
|
||||
err_hctxs:
|
||||
blk_mq_release(q);
|
||||
err_poll:
|
||||
blk_stat_free_callback(q->poll_cb);
|
||||
q->poll_cb = NULL;
|
||||
err_exit:
|
||||
q->mq_ops = NULL;
|
||||
return -ENOMEM;
|
||||
@ -4752,138 +4740,8 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
|
||||
|
||||
/* Enable polling stats and return whether they were already enabled. */
|
||||
static bool blk_poll_stats_enable(struct request_queue *q)
|
||||
{
|
||||
if (q->poll_stat)
|
||||
return true;
|
||||
|
||||
return blk_stats_alloc_enable(q);
|
||||
}
|
||||
|
||||
static void blk_mq_poll_stats_start(struct request_queue *q)
|
||||
{
|
||||
/*
|
||||
* We don't arm the callback if polling stats are not enabled or the
|
||||
* callback is already active.
|
||||
*/
|
||||
if (!q->poll_stat || blk_stat_is_active(q->poll_cb))
|
||||
return;
|
||||
|
||||
blk_stat_activate_msecs(q->poll_cb, 100);
|
||||
}
|
||||
|
||||
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb)
|
||||
{
|
||||
struct request_queue *q = cb->data;
|
||||
int bucket;
|
||||
|
||||
for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS; bucket++) {
|
||||
if (cb->stat[bucket].nr_samples)
|
||||
q->poll_stat[bucket] = cb->stat[bucket];
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
|
||||
struct request *rq)
|
||||
{
|
||||
unsigned long ret = 0;
|
||||
int bucket;
|
||||
|
||||
/*
|
||||
* If stats collection isn't on, don't sleep but turn it on for
|
||||
* future users
|
||||
*/
|
||||
if (!blk_poll_stats_enable(q))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* As an optimistic guess, use half of the mean service time
|
||||
* for this type of request. We can (and should) make this smarter.
|
||||
* For instance, if the completion latencies are tight, we can
|
||||
* get closer than just half the mean. This is especially
|
||||
* important on devices where the completion latencies are longer
|
||||
* than ~10 usec. We do use the stats for the relevant IO size
|
||||
* if available which does lead to better estimates.
|
||||
*/
|
||||
bucket = blk_mq_poll_stats_bkt(rq);
|
||||
if (bucket < 0)
|
||||
return ret;
|
||||
|
||||
if (q->poll_stat[bucket].nr_samples)
|
||||
ret = (q->poll_stat[bucket].mean + 1) / 2;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool blk_mq_poll_hybrid(struct request_queue *q, blk_qc_t qc)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, qc);
|
||||
struct request *rq = blk_qc_to_rq(hctx, qc);
|
||||
struct hrtimer_sleeper hs;
|
||||
enum hrtimer_mode mode;
|
||||
unsigned int nsecs;
|
||||
ktime_t kt;
|
||||
|
||||
/*
|
||||
* If a request has completed on queue that uses an I/O scheduler, we
|
||||
* won't get back a request from blk_qc_to_rq.
|
||||
*/
|
||||
if (!rq || (rq->rq_flags & RQF_MQ_POLL_SLEPT))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If we get here, hybrid polling is enabled. Hence poll_nsec can be:
|
||||
*
|
||||
* 0: use half of prev avg
|
||||
* >0: use this specific value
|
||||
*/
|
||||
if (q->poll_nsec > 0)
|
||||
nsecs = q->poll_nsec;
|
||||
else
|
||||
nsecs = blk_mq_poll_nsecs(q, rq);
|
||||
|
||||
if (!nsecs)
|
||||
return false;
|
||||
|
||||
rq->rq_flags |= RQF_MQ_POLL_SLEPT;
|
||||
|
||||
/*
|
||||
* This will be replaced with the stats tracking code, using
|
||||
* 'avg_completion_time / 2' as the pre-sleep target.
|
||||
*/
|
||||
kt = nsecs;
|
||||
|
||||
mode = HRTIMER_MODE_REL;
|
||||
hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode);
|
||||
hrtimer_set_expires(&hs.timer, kt);
|
||||
|
||||
do {
|
||||
if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
|
||||
break;
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
hrtimer_sleeper_start_expires(&hs, mode);
|
||||
if (hs.task)
|
||||
io_schedule();
|
||||
hrtimer_cancel(&hs.timer);
|
||||
mode = HRTIMER_MODE_ABS;
|
||||
} while (hs.task && !signal_pending(current));
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
destroy_hrtimer_on_stack(&hs.timer);
|
||||
|
||||
/*
|
||||
* If we sleep, have the caller restart the poll loop to reset the
|
||||
* state. Like for the other success return cases, the caller is
|
||||
* responsible for checking if the IO completed. If the IO isn't
|
||||
* complete, we'll get called again and will go straight to the busy
|
||||
* poll loop.
|
||||
*/
|
||||
return true;
|
||||
}
|
||||
|
||||
static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
|
||||
struct io_comp_batch *iob, unsigned int flags)
|
||||
int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie);
|
||||
long state = get_current_state();
|
||||
@ -4910,17 +4768,6 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
|
||||
unsigned int flags)
|
||||
{
|
||||
if (!(flags & BLK_POLL_NOSLEEP) &&
|
||||
q->poll_nsec != BLK_MQ_POLL_CLASSIC) {
|
||||
if (blk_mq_poll_hybrid(q, cookie))
|
||||
return 1;
|
||||
}
|
||||
return blk_mq_poll_classic(q, cookie, iob, flags);
|
||||
}
|
||||
|
||||
unsigned int blk_mq_rq_cpu(struct request *rq)
|
||||
{
|
||||
return rq->mq_ctx->cpu;
|
||||
|
@ -2,8 +2,8 @@
|
||||
#ifndef INT_BLK_MQ_H
|
||||
#define INT_BLK_MQ_H
|
||||
|
||||
#include <linux/blk-mq.h>
|
||||
#include "blk-stat.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
struct blk_mq_tag_set;
|
||||
|
||||
@ -30,6 +30,15 @@ struct blk_mq_ctx {
|
||||
struct kobject kobj;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
enum {
|
||||
BLK_MQ_NO_TAG = -1U,
|
||||
BLK_MQ_TAG_MIN = 1,
|
||||
BLK_MQ_TAG_MAX = BLK_MQ_NO_TAG - 1,
|
||||
};
|
||||
|
||||
typedef unsigned int __bitwise blk_insert_t;
|
||||
#define BLK_MQ_INSERT_AT_HEAD ((__force blk_insert_t)0x01)
|
||||
|
||||
void blk_mq_submit_bio(struct bio *bio);
|
||||
int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
|
||||
unsigned int flags);
|
||||
@ -38,8 +47,7 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
|
||||
void blk_mq_wake_waiters(struct request_queue *q);
|
||||
bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *,
|
||||
unsigned int);
|
||||
void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
|
||||
bool kick_requeue_list);
|
||||
void blk_mq_add_to_requeue_list(struct request *rq, blk_insert_t insert_flags);
|
||||
void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
|
||||
struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_ctx *start);
|
||||
@ -59,14 +67,7 @@ void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
|
||||
/*
|
||||
* Internal helpers for request insertion into sw queues
|
||||
*/
|
||||
void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bool at_head);
|
||||
void blk_mq_request_bypass_insert(struct request *rq, bool at_head,
|
||||
bool run_queue);
|
||||
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
|
||||
struct list_head *list);
|
||||
void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *list);
|
||||
void blk_mq_request_bypass_insert(struct request *rq, blk_insert_t flags);
|
||||
|
||||
/*
|
||||
* CPU -> queue mappings
|
||||
@ -164,6 +165,60 @@ struct blk_mq_alloc_data {
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
};
|
||||
|
||||
struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
|
||||
unsigned int reserved_tags, int node, int alloc_policy);
|
||||
void blk_mq_free_tags(struct blk_mq_tags *tags);
|
||||
int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags,
|
||||
struct sbitmap_queue *breserved_tags, unsigned int queue_depth,
|
||||
unsigned int reserved, int node, int alloc_policy);
|
||||
|
||||
unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
|
||||
unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags,
|
||||
unsigned int *offset);
|
||||
void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
|
||||
unsigned int tag);
|
||||
void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags);
|
||||
int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_tags **tags, unsigned int depth, bool can_grow);
|
||||
void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set,
|
||||
unsigned int size);
|
||||
void blk_mq_tag_update_sched_shared_tags(struct request_queue *q);
|
||||
|
||||
void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
|
||||
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
|
||||
void *priv);
|
||||
void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
|
||||
void *priv);
|
||||
|
||||
static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt,
|
||||
struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (!hctx)
|
||||
return &bt->ws[0];
|
||||
return sbq_wait_ptr(bt, &hctx->wait_index);
|
||||
}
|
||||
|
||||
void __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
|
||||
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *);
|
||||
|
||||
static inline void blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
|
||||
__blk_mq_tag_busy(hctx);
|
||||
}
|
||||
|
||||
static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
|
||||
__blk_mq_tag_idle(hctx);
|
||||
}
|
||||
|
||||
static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags,
|
||||
unsigned int tag)
|
||||
{
|
||||
return tag < tags->nr_reserved_tags;
|
||||
}
|
||||
|
||||
static inline bool blk_mq_is_shared_tags(unsigned int flags)
|
||||
{
|
||||
return flags & BLK_MQ_F_TAG_HCTX_SHARED;
|
||||
|
@ -1,11 +1,9 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blk-pm.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
/**
|
||||
* blk_pm_runtime_init - Block layer runtime PM initialization routine
|
||||
|
@ -74,7 +74,7 @@ static inline struct rq_qos *wbt_rq_qos(struct request_queue *q)
|
||||
return rq_qos_id(q, RQ_QOS_WBT);
|
||||
}
|
||||
|
||||
static inline struct rq_qos *blkcg_rq_qos(struct request_queue *q)
|
||||
static inline struct rq_qos *iolat_rq_qos(struct request_queue *q)
|
||||
{
|
||||
return rq_qos_id(q, RQ_QOS_LATENCY);
|
||||
}
|
||||
|
@ -6,7 +6,6 @@
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/rculist.h>
|
||||
#include <linux/blk-mq.h>
|
||||
|
||||
#include "blk-stat.h"
|
||||
#include "blk-mq.h"
|
||||
@ -190,7 +189,7 @@ void blk_stat_disable_accounting(struct request_queue *q)
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&q->stats->lock, flags);
|
||||
if (!--q->stats->accounting)
|
||||
if (!--q->stats->accounting && list_empty(&q->stats->callbacks))
|
||||
blk_queue_flag_clear(QUEUE_FLAG_STATS, q);
|
||||
spin_unlock_irqrestore(&q->stats->lock, flags);
|
||||
}
|
||||
@ -201,7 +200,7 @@ void blk_stat_enable_accounting(struct request_queue *q)
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&q->stats->lock, flags);
|
||||
if (!q->stats->accounting++)
|
||||
if (!q->stats->accounting++ && list_empty(&q->stats->callbacks))
|
||||
blk_queue_flag_set(QUEUE_FLAG_STATS, q);
|
||||
spin_unlock_irqrestore(&q->stats->lock, flags);
|
||||
}
|
||||
@ -231,21 +230,3 @@ void blk_free_queue_stats(struct blk_queue_stats *stats)
|
||||
|
||||
kfree(stats);
|
||||
}
|
||||
|
||||
bool blk_stats_alloc_enable(struct request_queue *q)
|
||||
{
|
||||
struct blk_rq_stat *poll_stat;
|
||||
|
||||
poll_stat = kcalloc(BLK_MQ_POLL_STATS_BKTS, sizeof(*poll_stat),
|
||||
GFP_ATOMIC);
|
||||
if (!poll_stat)
|
||||
return false;
|
||||
|
||||
if (cmpxchg(&q->poll_stat, NULL, poll_stat) != NULL) {
|
||||
kfree(poll_stat);
|
||||
return true;
|
||||
}
|
||||
|
||||
blk_stat_add_callback(q, q->poll_cb);
|
||||
return false;
|
||||
}
|
||||
|
@ -9,7 +9,6 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/blktrace_api.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
#include "blk.h"
|
||||
@ -408,35 +407,12 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
|
||||
|
||||
static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
|
||||
{
|
||||
int val;
|
||||
|
||||
if (q->poll_nsec == BLK_MQ_POLL_CLASSIC)
|
||||
val = BLK_MQ_POLL_CLASSIC;
|
||||
else
|
||||
val = q->poll_nsec / 1000;
|
||||
|
||||
return sprintf(page, "%d\n", val);
|
||||
return sprintf(page, "%d\n", -1);
|
||||
}
|
||||
|
||||
static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
|
||||
size_t count)
|
||||
{
|
||||
int err, val;
|
||||
|
||||
if (!q->mq_ops || !q->mq_ops->poll)
|
||||
return -EINVAL;
|
||||
|
||||
err = kstrtoint(page, 10, &val);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
if (val == BLK_MQ_POLL_CLASSIC)
|
||||
q->poll_nsec = BLK_MQ_POLL_CLASSIC;
|
||||
else if (val >= 0)
|
||||
q->poll_nsec = val * 1000;
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
@ -1368,9 +1368,11 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
|
||||
int ret;
|
||||
u64 v;
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
|
||||
blkg_conf_init(&ctx, buf);
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, &ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out_finish;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (sscanf(ctx.body, "%llu", &v) != 1)
|
||||
@ -1389,7 +1391,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
|
||||
tg_conf_updated(tg, false);
|
||||
ret = 0;
|
||||
out_finish:
|
||||
blkg_conf_finish(&ctx);
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
@ -1561,9 +1563,11 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
|
||||
int ret;
|
||||
int index = of_cft(of)->private;
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
|
||||
blkg_conf_init(&ctx, buf);
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, &ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out_finish;
|
||||
|
||||
tg = blkg_to_tg(ctx.blkg);
|
||||
tg_update_carryover(tg);
|
||||
@ -1662,7 +1666,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
|
||||
tg->td->limit_valid[LIMIT_LOW]);
|
||||
ret = 0;
|
||||
out_finish:
|
||||
blkg_conf_finish(&ctx);
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
@ -2439,11 +2443,12 @@ void blk_throtl_register(struct gendisk *disk)
|
||||
#ifndef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
/* if no low limit, use previous default */
|
||||
td->throtl_slice = DFL_THROTL_SLICE_HD;
|
||||
#endif
|
||||
|
||||
#else
|
||||
td->track_bio_latency = !queue_is_mq(q);
|
||||
if (!td->track_bio_latency)
|
||||
blk_stat_enable_accounting(q);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
|
@ -399,12 +399,6 @@ static inline struct bio *blk_queue_bounce(struct bio *bio,
|
||||
return bio;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP_IOLATENCY
|
||||
int blk_iolatency_init(struct gendisk *disk);
|
||||
#else
|
||||
static inline int blk_iolatency_init(struct gendisk *disk) { return 0; };
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
void disk_free_zone_bitmaps(struct gendisk *disk);
|
||||
void disk_clear_zone_settings(struct gendisk *disk);
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/hashtable.h>
|
||||
#include "blk-mq.h"
|
||||
|
||||
struct io_cq;
|
||||
struct elevator_type;
|
||||
@ -37,7 +38,8 @@ struct elevator_mq_ops {
|
||||
void (*limit_depth)(blk_opf_t, struct blk_mq_alloc_data *);
|
||||
void (*prepare_request)(struct request *);
|
||||
void (*finish_request)(struct request *);
|
||||
void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
|
||||
void (*insert_requests)(struct blk_mq_hw_ctx *hctx, struct list_head *list,
|
||||
blk_insert_t flags);
|
||||
struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
|
||||
bool (*has_work)(struct blk_mq_hw_ctx *);
|
||||
void (*completed_request)(struct request *, u64);
|
||||
|
@ -426,6 +426,9 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
*/
|
||||
elevator_init_mq(disk->queue);
|
||||
|
||||
/* Mark bdev as having a submit_bio, if needed */
|
||||
disk->part0->bd_has_submit_bio = disk->fops->submit_bio != NULL;
|
||||
|
||||
/*
|
||||
* If the driver provides an explicit major number it also must provide
|
||||
* the number of minors numbers supported, and those will be used to
|
||||
|
@ -8,7 +8,6 @@
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sbitmap.h>
|
||||
|
||||
@ -19,7 +18,6 @@
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/kyber.h>
|
||||
@ -590,7 +588,8 @@ static void kyber_prepare_request(struct request *rq)
|
||||
}
|
||||
|
||||
static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *rq_list, bool at_head)
|
||||
struct list_head *rq_list,
|
||||
blk_insert_t flags)
|
||||
{
|
||||
struct kyber_hctx_data *khd = hctx->sched_data;
|
||||
struct request *rq, *next;
|
||||
@ -602,7 +601,7 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
|
||||
spin_lock(&kcq->lock);
|
||||
trace_block_rq_insert(rq);
|
||||
if (at_head)
|
||||
if (flags & BLK_MQ_INSERT_AT_HEAD)
|
||||
list_move(&rq->queuelist, head);
|
||||
else
|
||||
list_move_tail(&rq->queuelist, head);
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
@ -23,7 +22,6 @@
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-mq-sched.h"
|
||||
|
||||
/*
|
||||
@ -768,7 +766,7 @@ static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
|
||||
* add rq to rbtree and fifo
|
||||
*/
|
||||
static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bool at_head)
|
||||
blk_insert_t flags)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
@ -801,7 +799,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
|
||||
trace_block_rq_insert(rq);
|
||||
|
||||
if (at_head) {
|
||||
if (flags & BLK_MQ_INSERT_AT_HEAD) {
|
||||
list_add(&rq->queuelist, &per_prio->dispatch);
|
||||
rq->fifo_time = jiffies;
|
||||
} else {
|
||||
@ -822,10 +820,11 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from blk_mq_sched_insert_request() or blk_mq_sched_insert_requests().
|
||||
* Called from blk_mq_insert_request() or blk_mq_dispatch_plug_list().
|
||||
*/
|
||||
static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *list, bool at_head)
|
||||
struct list_head *list,
|
||||
blk_insert_t flags)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
@ -836,7 +835,7 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
|
||||
rq = list_first_entry(list, struct request, queuelist);
|
||||
list_del_init(&rq->queuelist);
|
||||
dd_insert_request(hctx, rq, at_head);
|
||||
dd_insert_request(hctx, rq, flags);
|
||||
}
|
||||
spin_unlock(&dd->lock);
|
||||
}
|
||||
|
@ -86,6 +86,15 @@ enum opal_response_token {
|
||||
#define OPAL_MSID_KEYLEN 15
|
||||
#define OPAL_UID_LENGTH_HALF 4
|
||||
|
||||
/*
|
||||
* Boolean operators from TCG Core spec 2.01 Section:
|
||||
* 5.1.3.11
|
||||
* Table 61
|
||||
*/
|
||||
#define OPAL_BOOLEAN_AND 0
|
||||
#define OPAL_BOOLEAN_OR 1
|
||||
#define OPAL_BOOLEAN_NOT 2
|
||||
|
||||
/* Enum to index OPALUID array */
|
||||
enum opal_uid {
|
||||
/* users */
|
||||
@ -105,6 +114,7 @@ enum opal_uid {
|
||||
/* tables */
|
||||
OPAL_TABLE_TABLE,
|
||||
OPAL_LOCKINGRANGE_GLOBAL,
|
||||
OPAL_LOCKINGRANGE_ACE_START_TO_KEY,
|
||||
OPAL_LOCKINGRANGE_ACE_RDLOCKED,
|
||||
OPAL_LOCKINGRANGE_ACE_WRLOCKED,
|
||||
OPAL_MBRCONTROL,
|
||||
|
330
block/sed-opal.c
330
block/sed-opal.c
@ -83,8 +83,10 @@ struct opal_dev {
|
||||
u16 comid;
|
||||
u32 hsn;
|
||||
u32 tsn;
|
||||
u64 align;
|
||||
u64 align; /* alignment granularity */
|
||||
u64 lowest_lba;
|
||||
u32 logical_block_size;
|
||||
u8 align_required; /* ALIGN: 0 or 1 */
|
||||
|
||||
size_t pos;
|
||||
u8 *cmd;
|
||||
@ -132,6 +134,8 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = {
|
||||
{ 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01 },
|
||||
[OPAL_LOCKINGRANGE_GLOBAL] =
|
||||
{ 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x01 },
|
||||
[OPAL_LOCKINGRANGE_ACE_START_TO_KEY] =
|
||||
{ 0x00, 0x00, 0x00, 0x08, 0x00, 0x03, 0xD0, 0x01 },
|
||||
[OPAL_LOCKINGRANGE_ACE_RDLOCKED] =
|
||||
{ 0x00, 0x00, 0x00, 0x08, 0x00, 0x03, 0xE0, 0x01 },
|
||||
[OPAL_LOCKINGRANGE_ACE_WRLOCKED] =
|
||||
@ -407,6 +411,8 @@ static void check_geometry(struct opal_dev *dev, const void *data)
|
||||
|
||||
dev->align = be64_to_cpu(geo->alignment_granularity);
|
||||
dev->lowest_lba = be64_to_cpu(geo->lowest_aligned_lba);
|
||||
dev->logical_block_size = be32_to_cpu(geo->logical_block_size);
|
||||
dev->align_required = geo->reserved01 & 1;
|
||||
}
|
||||
|
||||
static int execute_step(struct opal_dev *dev,
|
||||
@ -1147,12 +1153,8 @@ static int finalize_and_send(struct opal_dev *dev, cont_fn cont)
|
||||
return opal_send_recv(dev, cont);
|
||||
}
|
||||
|
||||
/*
|
||||
* request @column from table @table on device @dev. On success, the column
|
||||
* data will be available in dev->resp->tok[4]
|
||||
*/
|
||||
static int generic_get_column(struct opal_dev *dev, const u8 *table,
|
||||
u64 column)
|
||||
static int generic_get_columns(struct opal_dev *dev, const u8 *table,
|
||||
u64 start_column, u64 end_column)
|
||||
{
|
||||
int err;
|
||||
|
||||
@ -1162,12 +1164,12 @@ static int generic_get_column(struct opal_dev *dev, const u8 *table,
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, OPAL_STARTCOLUMN);
|
||||
add_token_u64(&err, dev, column);
|
||||
add_token_u64(&err, dev, start_column);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, OPAL_ENDCOLUMN);
|
||||
add_token_u64(&err, dev, column);
|
||||
add_token_u64(&err, dev, end_column);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||
@ -1178,6 +1180,16 @@ static int generic_get_column(struct opal_dev *dev, const u8 *table,
|
||||
return finalize_and_send(dev, parse_and_check_status);
|
||||
}
|
||||
|
||||
/*
|
||||
* request @column from table @table on device @dev. On success, the column
|
||||
* data will be available in dev->resp->tok[4]
|
||||
*/
|
||||
static int generic_get_column(struct opal_dev *dev, const u8 *table,
|
||||
u64 column)
|
||||
{
|
||||
return generic_get_columns(dev, table, column, column);
|
||||
}
|
||||
|
||||
/*
|
||||
* see TCG SAS 5.3.2.3 for a description of the available columns
|
||||
*
|
||||
@ -1437,6 +1449,129 @@ static int setup_locking_range(struct opal_dev *dev, void *data)
|
||||
return finalize_and_send(dev, parse_and_check_status);
|
||||
}
|
||||
|
||||
static int response_get_column(const struct parsed_resp *resp,
|
||||
int *iter,
|
||||
u8 column,
|
||||
u64 *value)
|
||||
{
|
||||
const struct opal_resp_tok *tok;
|
||||
int n = *iter;
|
||||
u64 val;
|
||||
|
||||
tok = response_get_token(resp, n);
|
||||
if (IS_ERR(tok))
|
||||
return PTR_ERR(tok);
|
||||
|
||||
if (!response_token_matches(tok, OPAL_STARTNAME)) {
|
||||
pr_debug("Unexpected response token type %d.\n", n);
|
||||
return OPAL_INVAL_PARAM;
|
||||
}
|
||||
n++;
|
||||
|
||||
if (response_get_u64(resp, n) != column) {
|
||||
pr_debug("Token %d does not match expected column %u.\n",
|
||||
n, column);
|
||||
return OPAL_INVAL_PARAM;
|
||||
}
|
||||
n++;
|
||||
|
||||
val = response_get_u64(resp, n);
|
||||
n++;
|
||||
|
||||
tok = response_get_token(resp, n);
|
||||
if (IS_ERR(tok))
|
||||
return PTR_ERR(tok);
|
||||
|
||||
if (!response_token_matches(tok, OPAL_ENDNAME)) {
|
||||
pr_debug("Unexpected response token type %d.\n", n);
|
||||
return OPAL_INVAL_PARAM;
|
||||
}
|
||||
n++;
|
||||
|
||||
*value = val;
|
||||
*iter = n;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int locking_range_status(struct opal_dev *dev, void *data)
|
||||
{
|
||||
u8 lr_buffer[OPAL_UID_LENGTH];
|
||||
u64 resp;
|
||||
bool rlocked, wlocked;
|
||||
int err, tok_n = 2;
|
||||
struct opal_lr_status *lrst = data;
|
||||
|
||||
err = build_locking_range(lr_buffer, sizeof(lr_buffer),
|
||||
lrst->session.opal_key.lr);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = generic_get_columns(dev, lr_buffer, OPAL_RANGESTART,
|
||||
OPAL_WRITELOCKED);
|
||||
if (err) {
|
||||
pr_debug("Couldn't get lr %u table columns %d to %d.\n",
|
||||
lrst->session.opal_key.lr, OPAL_RANGESTART,
|
||||
OPAL_WRITELOCKED);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* range start */
|
||||
err = response_get_column(&dev->parsed, &tok_n, OPAL_RANGESTART,
|
||||
&lrst->range_start);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* range length */
|
||||
err = response_get_column(&dev->parsed, &tok_n, OPAL_RANGELENGTH,
|
||||
&lrst->range_length);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* RLE */
|
||||
err = response_get_column(&dev->parsed, &tok_n, OPAL_READLOCKENABLED,
|
||||
&resp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
lrst->RLE = !!resp;
|
||||
|
||||
/* WLE */
|
||||
err = response_get_column(&dev->parsed, &tok_n, OPAL_WRITELOCKENABLED,
|
||||
&resp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
lrst->WLE = !!resp;
|
||||
|
||||
/* read locked */
|
||||
err = response_get_column(&dev->parsed, &tok_n, OPAL_READLOCKED, &resp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
rlocked = !!resp;
|
||||
|
||||
/* write locked */
|
||||
err = response_get_column(&dev->parsed, &tok_n, OPAL_WRITELOCKED, &resp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
wlocked = !!resp;
|
||||
|
||||
/* opal_lock_state can not map 'read locked' only state. */
|
||||
lrst->l_state = OPAL_RW;
|
||||
if (rlocked && wlocked)
|
||||
lrst->l_state = OPAL_LK;
|
||||
else if (wlocked)
|
||||
lrst->l_state = OPAL_RO;
|
||||
else if (rlocked) {
|
||||
pr_debug("Can not report read locked only state.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int start_generic_opal_session(struct opal_dev *dev,
|
||||
enum opal_uid auth,
|
||||
enum opal_uid sp_type,
|
||||
@ -1759,25 +1894,43 @@ static int set_sid_cpin_pin(struct opal_dev *dev, void *data)
|
||||
return finalize_and_send(dev, parse_and_check_status);
|
||||
}
|
||||
|
||||
static int add_user_to_lr(struct opal_dev *dev, void *data)
|
||||
static void add_authority_object_ref(int *err,
|
||||
struct opal_dev *dev,
|
||||
const u8 *uid,
|
||||
size_t uid_len)
|
||||
{
|
||||
add_token_u8(err, dev, OPAL_STARTNAME);
|
||||
add_token_bytestring(err, dev,
|
||||
opaluid[OPAL_HALF_UID_AUTHORITY_OBJ_REF],
|
||||
OPAL_UID_LENGTH/2);
|
||||
add_token_bytestring(err, dev, uid, uid_len);
|
||||
add_token_u8(err, dev, OPAL_ENDNAME);
|
||||
}
|
||||
|
||||
static void add_boolean_object_ref(int *err,
|
||||
struct opal_dev *dev,
|
||||
u8 boolean_op)
|
||||
{
|
||||
add_token_u8(err, dev, OPAL_STARTNAME);
|
||||
add_token_bytestring(err, dev, opaluid[OPAL_HALF_UID_BOOLEAN_ACE],
|
||||
OPAL_UID_LENGTH/2);
|
||||
add_token_u8(err, dev, boolean_op);
|
||||
add_token_u8(err, dev, OPAL_ENDNAME);
|
||||
}
|
||||
|
||||
static int set_lr_boolean_ace(struct opal_dev *dev,
|
||||
unsigned int opal_uid,
|
||||
u8 lr,
|
||||
const u8 *users,
|
||||
size_t users_len)
|
||||
{
|
||||
u8 lr_buffer[OPAL_UID_LENGTH];
|
||||
u8 user_uid[OPAL_UID_LENGTH];
|
||||
struct opal_lock_unlock *lkul = data;
|
||||
u8 u;
|
||||
int err;
|
||||
|
||||
memcpy(lr_buffer, opaluid[OPAL_LOCKINGRANGE_ACE_RDLOCKED],
|
||||
OPAL_UID_LENGTH);
|
||||
|
||||
if (lkul->l_state == OPAL_RW)
|
||||
memcpy(lr_buffer, opaluid[OPAL_LOCKINGRANGE_ACE_WRLOCKED],
|
||||
OPAL_UID_LENGTH);
|
||||
|
||||
lr_buffer[7] = lkul->session.opal_key.lr;
|
||||
|
||||
memcpy(user_uid, opaluid[OPAL_USER1_UID], OPAL_UID_LENGTH);
|
||||
|
||||
user_uid[7] = lkul->session.who;
|
||||
memcpy(lr_buffer, opaluid[opal_uid], OPAL_UID_LENGTH);
|
||||
lr_buffer[7] = lr;
|
||||
|
||||
err = cmd_start(dev, lr_buffer, opalmethod[OPAL_SET]);
|
||||
|
||||
@ -1790,35 +1943,49 @@ static int add_user_to_lr(struct opal_dev *dev, void *data)
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTLIST);
|
||||
|
||||
for (u = 0; u < users_len; u++) {
|
||||
if (users[u] == OPAL_ADMIN1)
|
||||
memcpy(user_uid, opaluid[OPAL_ADMIN1_UID],
|
||||
OPAL_UID_LENGTH);
|
||||
else {
|
||||
memcpy(user_uid, opaluid[OPAL_USER1_UID],
|
||||
OPAL_UID_LENGTH);
|
||||
user_uid[7] = users[u];
|
||||
}
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_bytestring(&err, dev,
|
||||
opaluid[OPAL_HALF_UID_AUTHORITY_OBJ_REF],
|
||||
OPAL_UID_LENGTH/2);
|
||||
add_token_bytestring(&err, dev, user_uid, OPAL_UID_LENGTH);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_bytestring(&err, dev,
|
||||
opaluid[OPAL_HALF_UID_AUTHORITY_OBJ_REF],
|
||||
OPAL_UID_LENGTH/2);
|
||||
add_token_bytestring(&err, dev, user_uid, OPAL_UID_LENGTH);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_bytestring(&err, dev, opaluid[OPAL_HALF_UID_BOOLEAN_ACE],
|
||||
OPAL_UID_LENGTH/2);
|
||||
add_token_u8(&err, dev, 1);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
add_authority_object_ref(&err, dev, user_uid, sizeof(user_uid));
|
||||
|
||||
/*
|
||||
* Add boolean operator in postfix only with
|
||||
* two or more authorities being added in ACE
|
||||
* expresion.
|
||||
* */
|
||||
if (u > 0)
|
||||
add_boolean_object_ref(&err, dev, OPAL_BOOLEAN_OR);
|
||||
}
|
||||
|
||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int add_user_to_lr(struct opal_dev *dev, void *data)
|
||||
{
|
||||
int err;
|
||||
struct opal_lock_unlock *lkul = data;
|
||||
const u8 users[] = {
|
||||
lkul->session.who
|
||||
};
|
||||
|
||||
err = set_lr_boolean_ace(dev,
|
||||
lkul->l_state == OPAL_RW ?
|
||||
OPAL_LOCKINGRANGE_ACE_WRLOCKED :
|
||||
OPAL_LOCKINGRANGE_ACE_RDLOCKED,
|
||||
lkul->session.opal_key.lr, users,
|
||||
ARRAY_SIZE(users));
|
||||
if (err) {
|
||||
pr_debug("Error building add user to locking range command.\n");
|
||||
return err;
|
||||
@ -1827,6 +1994,27 @@ static int add_user_to_lr(struct opal_dev *dev, void *data)
|
||||
return finalize_and_send(dev, parse_and_check_status);
|
||||
}
|
||||
|
||||
static int add_user_to_lr_ace(struct opal_dev *dev, void *data)
|
||||
{
|
||||
int err;
|
||||
struct opal_lock_unlock *lkul = data;
|
||||
const u8 users[] = {
|
||||
OPAL_ADMIN1,
|
||||
lkul->session.who
|
||||
};
|
||||
|
||||
err = set_lr_boolean_ace(dev, OPAL_LOCKINGRANGE_ACE_START_TO_KEY,
|
||||
lkul->session.opal_key.lr, users,
|
||||
ARRAY_SIZE(users));
|
||||
|
||||
if (err) {
|
||||
pr_debug("Error building add user to locking ranges ACEs.\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
return finalize_and_send(dev, parse_and_check_status);
|
||||
}
|
||||
|
||||
static int lock_unlock_locking_range(struct opal_dev *dev, void *data)
|
||||
{
|
||||
u8 lr_buffer[OPAL_UID_LENGTH];
|
||||
@ -2364,6 +2552,7 @@ static int opal_add_user_to_lr(struct opal_dev *dev,
|
||||
const struct opal_step steps[] = {
|
||||
{ start_admin1LSP_opal_session, &lk_unlk->session.opal_key },
|
||||
{ add_user_to_lr, lk_unlk },
|
||||
{ add_user_to_lr_ace, lk_unlk },
|
||||
{ end_opal_session, }
|
||||
};
|
||||
int ret;
|
||||
@ -2580,6 +2769,33 @@ static int opal_setup_locking_range(struct opal_dev *dev,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int opal_locking_range_status(struct opal_dev *dev,
|
||||
struct opal_lr_status *opal_lrst,
|
||||
void __user *data)
|
||||
{
|
||||
const struct opal_step lr_steps[] = {
|
||||
{ start_auth_opal_session, &opal_lrst->session },
|
||||
{ locking_range_status, opal_lrst },
|
||||
{ end_opal_session, }
|
||||
};
|
||||
int ret;
|
||||
|
||||
mutex_lock(&dev->dev_lock);
|
||||
setup_opal_dev(dev);
|
||||
ret = execute_steps(dev, lr_steps, ARRAY_SIZE(lr_steps));
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
/* skip session info when copying back to uspace */
|
||||
if (!ret && copy_to_user(data + offsetof(struct opal_lr_status, range_start),
|
||||
(void *)opal_lrst + offsetof(struct opal_lr_status, range_start),
|
||||
sizeof(*opal_lrst) - offsetof(struct opal_lr_status, range_start))) {
|
||||
pr_debug("Error copying status to userspace\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw)
|
||||
{
|
||||
const struct opal_step pw_steps[] = {
|
||||
@ -2744,6 +2960,26 @@ static int opal_get_status(struct opal_dev *dev, void __user *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int opal_get_geometry(struct opal_dev *dev, void __user *data)
|
||||
{
|
||||
struct opal_geometry geo = {0};
|
||||
|
||||
if (check_opal_support(dev))
|
||||
return -EINVAL;
|
||||
|
||||
geo.align = dev->align_required;
|
||||
geo.logical_block_size = dev->logical_block_size;
|
||||
geo.alignment_granularity = dev->align;
|
||||
geo.lowest_aligned_lba = dev->lowest_lba;
|
||||
|
||||
if (copy_to_user(data, &geo, sizeof(geo))) {
|
||||
pr_debug("Error copying geometry data to userspace\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
|
||||
{
|
||||
void *p;
|
||||
@ -2814,6 +3050,12 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
|
||||
case IOC_OPAL_GET_STATUS:
|
||||
ret = opal_get_status(dev, arg);
|
||||
break;
|
||||
case IOC_OPAL_GET_LR_STATUS:
|
||||
ret = opal_locking_range_status(dev, p, arg);
|
||||
break;
|
||||
case IOC_OPAL_GET_GEOMETRY:
|
||||
ret = opal_get_geometry(dev, arg);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -385,6 +385,23 @@ config BLK_DEV_UBLK
|
||||
can handle batch more effectively, but task_work_add() isn't exported
|
||||
for module, so ublk has to be built to kernel.
|
||||
|
||||
config BLKDEV_UBLK_LEGACY_OPCODES
|
||||
bool "Support legacy command opcode"
|
||||
depends on BLK_DEV_UBLK
|
||||
default y
|
||||
help
|
||||
ublk driver started to take plain command encoding, which turns out
|
||||
one bad way. The traditional ioctl command opcode encodes more
|
||||
info and basically defines each code uniquely, so opcode conflict
|
||||
is avoided, and driver can handle wrong command easily, meantime it
|
||||
may help security subsystem to audit io_uring command.
|
||||
|
||||
Say Y if your application still uses legacy command opcode.
|
||||
|
||||
Say N if you don't want to support legacy command opcode. It is
|
||||
suggested to enable N if your application(ublk server) switches to
|
||||
ioctl command encoding.
|
||||
|
||||
source "drivers/block/rnbd/Kconfig"
|
||||
|
||||
endif # BLK_DEV
|
||||
|
@ -735,8 +735,9 @@ static bool update_rs_extent(struct drbd_device *device,
|
||||
return false;
|
||||
}
|
||||
|
||||
void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go)
|
||||
void drbd_advance_rs_marks(struct drbd_peer_device *peer_device, unsigned long still_to_go)
|
||||
{
|
||||
struct drbd_device *device = peer_device->device;
|
||||
unsigned long now = jiffies;
|
||||
unsigned long last = device->rs_mark_time[device->rs_last_mark];
|
||||
int next = (device->rs_last_mark + 1) % DRBD_SYNC_MARKS;
|
||||
@ -819,7 +820,7 @@ static int update_sync_bits(struct drbd_device *device,
|
||||
if (mode == SET_IN_SYNC) {
|
||||
unsigned long still_to_go = drbd_bm_total_weight(device);
|
||||
bool rs_is_done = (still_to_go <= device->rs_failed);
|
||||
drbd_advance_rs_marks(device, still_to_go);
|
||||
drbd_advance_rs_marks(first_peer_device(device), still_to_go);
|
||||
if (cleared || rs_is_done)
|
||||
maybe_schedule_on_disk_bitmap_update(device, rs_is_done);
|
||||
} else if (mode == RECORD_RS_FAILED)
|
||||
@ -843,10 +844,11 @@ static bool plausible_request_size(int size)
|
||||
* called by worker on C_SYNC_TARGET and receiver on SyncSource.
|
||||
*
|
||||
*/
|
||||
int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
|
||||
int __drbd_change_sync(struct drbd_peer_device *peer_device, sector_t sector, int size,
|
||||
enum update_sync_bits_mode mode)
|
||||
{
|
||||
/* Is called from worker and receiver context _only_ */
|
||||
struct drbd_device *device = peer_device->device;
|
||||
unsigned long sbnr, ebnr, lbnr;
|
||||
unsigned long count = 0;
|
||||
sector_t esector, nr_sectors;
|
||||
@ -1009,14 +1011,15 @@ retry:
|
||||
* tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN
|
||||
* if there is still application IO going on in this area.
|
||||
*/
|
||||
int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector)
|
||||
int drbd_try_rs_begin_io(struct drbd_peer_device *peer_device, sector_t sector)
|
||||
{
|
||||
struct drbd_device *device = peer_device->device;
|
||||
unsigned int enr = BM_SECT_TO_EXT(sector);
|
||||
const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT;
|
||||
struct lc_element *e;
|
||||
struct bm_extent *bm_ext;
|
||||
int i;
|
||||
bool throttle = drbd_rs_should_slow_down(device, sector, true);
|
||||
bool throttle = drbd_rs_should_slow_down(peer_device, sector, true);
|
||||
|
||||
/* If we need to throttle, a half-locked (only marked BME_NO_WRITES,
|
||||
* not yet BME_LOCKED) extent needs to be kicked out explicitly if we
|
||||
|
@ -1216,7 +1216,9 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned
|
||||
* drbd_bm_read() - Read the whole bitmap from its on disk location.
|
||||
* @device: DRBD device.
|
||||
*/
|
||||
int drbd_bm_read(struct drbd_device *device) __must_hold(local)
|
||||
int drbd_bm_read(struct drbd_device *device,
|
||||
struct drbd_peer_device *peer_device) __must_hold(local)
|
||||
|
||||
{
|
||||
return bm_rw(device, BM_AIO_READ, 0);
|
||||
}
|
||||
@ -1227,7 +1229,8 @@ int drbd_bm_read(struct drbd_device *device) __must_hold(local)
|
||||
*
|
||||
* Will only write pages that have changed since last IO.
|
||||
*/
|
||||
int drbd_bm_write(struct drbd_device *device) __must_hold(local)
|
||||
int drbd_bm_write(struct drbd_device *device,
|
||||
struct drbd_peer_device *peer_device) __must_hold(local)
|
||||
{
|
||||
return bm_rw(device, 0, 0);
|
||||
}
|
||||
@ -1238,7 +1241,8 @@ int drbd_bm_write(struct drbd_device *device) __must_hold(local)
|
||||
*
|
||||
* Will write all pages.
|
||||
*/
|
||||
int drbd_bm_write_all(struct drbd_device *device) __must_hold(local)
|
||||
int drbd_bm_write_all(struct drbd_device *device,
|
||||
struct drbd_peer_device *peer_device) __must_hold(local)
|
||||
{
|
||||
return bm_rw(device, BM_AIO_WRITE_ALL_PAGES, 0);
|
||||
}
|
||||
@ -1264,7 +1268,8 @@ int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_ho
|
||||
* verify is aborted due to a failed peer disk, while local IO continues, or
|
||||
* pending resync acks are still being processed.
|
||||
*/
|
||||
int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local)
|
||||
int drbd_bm_write_copy_pages(struct drbd_device *device,
|
||||
struct drbd_peer_device *peer_device) __must_hold(local)
|
||||
{
|
||||
return bm_rw(device, BM_AIO_COPY_PAGES, 0);
|
||||
}
|
||||
|
@ -66,6 +66,7 @@ extern int drbd_proc_details;
|
||||
|
||||
struct drbd_device;
|
||||
struct drbd_connection;
|
||||
struct drbd_peer_device;
|
||||
|
||||
/* Defines to control fault insertion */
|
||||
enum {
|
||||
@ -126,8 +127,8 @@ struct bm_xfer_ctx {
|
||||
unsigned bytes[2];
|
||||
};
|
||||
|
||||
extern void INFO_bm_xfer_stats(struct drbd_device *device,
|
||||
const char *direction, struct bm_xfer_ctx *c);
|
||||
extern void INFO_bm_xfer_stats(struct drbd_peer_device *peer_device,
|
||||
const char *direction, struct bm_xfer_ctx *c);
|
||||
|
||||
static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c)
|
||||
{
|
||||
@ -541,9 +542,10 @@ struct drbd_md_io {
|
||||
|
||||
struct bm_io_work {
|
||||
struct drbd_work w;
|
||||
struct drbd_peer_device *peer_device;
|
||||
char *why;
|
||||
enum bm_flag flags;
|
||||
int (*io_fn)(struct drbd_device *device);
|
||||
int (*io_fn)(struct drbd_device *device, struct drbd_peer_device *peer_device);
|
||||
void (*done)(struct drbd_device *device, int rv);
|
||||
};
|
||||
|
||||
@ -1041,7 +1043,7 @@ extern int drbd_send_drequest_csum(struct drbd_peer_device *, sector_t sector,
|
||||
enum drbd_packet cmd);
|
||||
extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int size);
|
||||
|
||||
extern int drbd_send_bitmap(struct drbd_device *device);
|
||||
extern int drbd_send_bitmap(struct drbd_device *device, struct drbd_peer_device *peer_device);
|
||||
extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode);
|
||||
extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode);
|
||||
extern int drbd_send_rs_deallocated(struct drbd_peer_device *, struct drbd_peer_request *);
|
||||
@ -1065,17 +1067,22 @@ extern void drbd_md_clear_flag(struct drbd_device *device, int flags)__must_hold
|
||||
extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
|
||||
extern void drbd_md_mark_dirty(struct drbd_device *device);
|
||||
extern void drbd_queue_bitmap_io(struct drbd_device *device,
|
||||
int (*io_fn)(struct drbd_device *),
|
||||
int (*io_fn)(struct drbd_device *, struct drbd_peer_device *),
|
||||
void (*done)(struct drbd_device *, int),
|
||||
char *why, enum bm_flag flags);
|
||||
char *why, enum bm_flag flags,
|
||||
struct drbd_peer_device *peer_device);
|
||||
extern int drbd_bitmap_io(struct drbd_device *device,
|
||||
int (*io_fn)(struct drbd_device *),
|
||||
char *why, enum bm_flag flags);
|
||||
int (*io_fn)(struct drbd_device *, struct drbd_peer_device *),
|
||||
char *why, enum bm_flag flags,
|
||||
struct drbd_peer_device *peer_device);
|
||||
extern int drbd_bitmap_io_from_worker(struct drbd_device *device,
|
||||
int (*io_fn)(struct drbd_device *),
|
||||
char *why, enum bm_flag flags);
|
||||
extern int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local);
|
||||
extern int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local);
|
||||
int (*io_fn)(struct drbd_device *, struct drbd_peer_device *),
|
||||
char *why, enum bm_flag flags,
|
||||
struct drbd_peer_device *peer_device);
|
||||
extern int drbd_bmio_set_n_write(struct drbd_device *device,
|
||||
struct drbd_peer_device *peer_device) __must_hold(local);
|
||||
extern int drbd_bmio_clear_n_write(struct drbd_device *device,
|
||||
struct drbd_peer_device *peer_device) __must_hold(local);
|
||||
|
||||
/* Meta data layout
|
||||
*
|
||||
@ -1284,14 +1291,18 @@ extern void _drbd_bm_set_bits(struct drbd_device *device,
|
||||
const unsigned long s, const unsigned long e);
|
||||
extern int drbd_bm_test_bit(struct drbd_device *device, unsigned long bitnr);
|
||||
extern int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr);
|
||||
extern int drbd_bm_read(struct drbd_device *device) __must_hold(local);
|
||||
extern int drbd_bm_read(struct drbd_device *device,
|
||||
struct drbd_peer_device *peer_device) __must_hold(local);
|
||||
extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr);
|
||||
extern int drbd_bm_write(struct drbd_device *device) __must_hold(local);
|
||||
extern int drbd_bm_write(struct drbd_device *device,
|
||||
struct drbd_peer_device *peer_device) __must_hold(local);
|
||||
extern void drbd_bm_reset_al_hints(struct drbd_device *device) __must_hold(local);
|
||||
extern int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local);
|
||||
extern int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local);
|
||||
extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local);
|
||||
extern int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local);
|
||||
extern int drbd_bm_write_all(struct drbd_device *device,
|
||||
struct drbd_peer_device *peer_device) __must_hold(local);
|
||||
extern int drbd_bm_write_copy_pages(struct drbd_device *device,
|
||||
struct drbd_peer_device *peer_device) __must_hold(local);
|
||||
extern size_t drbd_bm_words(struct drbd_device *device);
|
||||
extern unsigned long drbd_bm_bits(struct drbd_device *device);
|
||||
extern sector_t drbd_bm_capacity(struct drbd_device *device);
|
||||
@ -1422,21 +1433,24 @@ void drbd_resync_after_changed(struct drbd_device *device);
|
||||
extern void drbd_start_resync(struct drbd_device *device, enum drbd_conns side);
|
||||
extern void resume_next_sg(struct drbd_device *device);
|
||||
extern void suspend_other_sg(struct drbd_device *device);
|
||||
extern int drbd_resync_finished(struct drbd_device *device);
|
||||
extern int drbd_resync_finished(struct drbd_peer_device *peer_device);
|
||||
/* maybe rather drbd_main.c ? */
|
||||
extern void *drbd_md_get_buffer(struct drbd_device *device, const char *intent);
|
||||
extern void drbd_md_put_buffer(struct drbd_device *device);
|
||||
extern int drbd_md_sync_page_io(struct drbd_device *device,
|
||||
struct drbd_backing_dev *bdev, sector_t sector, enum req_op op);
|
||||
extern void drbd_ov_out_of_sync_found(struct drbd_device *, sector_t, int);
|
||||
extern void drbd_ov_out_of_sync_found(struct drbd_peer_device *peer_device,
|
||||
sector_t sector, int size);
|
||||
extern void wait_until_done_or_force_detached(struct drbd_device *device,
|
||||
struct drbd_backing_dev *bdev, unsigned int *done);
|
||||
extern void drbd_rs_controller_reset(struct drbd_device *device);
|
||||
extern void drbd_rs_controller_reset(struct drbd_peer_device *peer_device);
|
||||
|
||||
static inline void ov_out_of_sync_print(struct drbd_device *device)
|
||||
static inline void ov_out_of_sync_print(struct drbd_peer_device *peer_device)
|
||||
{
|
||||
struct drbd_device *device = peer_device->device;
|
||||
|
||||
if (device->ov_last_oos_size) {
|
||||
drbd_err(device, "Out of sync: start=%llu, size=%lu (sectors)\n",
|
||||
drbd_err(peer_device, "Out of sync: start=%llu, size=%lu (sectors)\n",
|
||||
(unsigned long long)device->ov_last_oos_start,
|
||||
(unsigned long)device->ov_last_oos_size);
|
||||
}
|
||||
@ -1475,7 +1489,7 @@ extern int drbd_ack_receiver(struct drbd_thread *thi);
|
||||
extern void drbd_send_ping_wf(struct work_struct *ws);
|
||||
extern void drbd_send_acks_wf(struct work_struct *ws);
|
||||
extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
|
||||
extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
|
||||
extern bool drbd_rs_should_slow_down(struct drbd_peer_device *peer_device, sector_t sector,
|
||||
bool throttle_if_app_is_waiting);
|
||||
extern int drbd_submit_peer_request(struct drbd_peer_request *peer_req);
|
||||
extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *);
|
||||
@ -1531,22 +1545,22 @@ extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i
|
||||
extern void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i);
|
||||
extern void drbd_rs_complete_io(struct drbd_device *device, sector_t sector);
|
||||
extern int drbd_rs_begin_io(struct drbd_device *device, sector_t sector);
|
||||
extern int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector);
|
||||
extern int drbd_try_rs_begin_io(struct drbd_peer_device *peer_device, sector_t sector);
|
||||
extern void drbd_rs_cancel_all(struct drbd_device *device);
|
||||
extern int drbd_rs_del_all(struct drbd_device *device);
|
||||
extern void drbd_rs_failed_io(struct drbd_device *device,
|
||||
extern void drbd_rs_failed_io(struct drbd_peer_device *peer_device,
|
||||
sector_t sector, int size);
|
||||
extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go);
|
||||
extern void drbd_advance_rs_marks(struct drbd_peer_device *peer_device, unsigned long still_to_go);
|
||||
|
||||
enum update_sync_bits_mode { RECORD_RS_FAILED, SET_OUT_OF_SYNC, SET_IN_SYNC };
|
||||
extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
|
||||
extern int __drbd_change_sync(struct drbd_peer_device *peer_device, sector_t sector, int size,
|
||||
enum update_sync_bits_mode mode);
|
||||
#define drbd_set_in_sync(device, sector, size) \
|
||||
__drbd_change_sync(device, sector, size, SET_IN_SYNC)
|
||||
#define drbd_set_out_of_sync(device, sector, size) \
|
||||
__drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC)
|
||||
#define drbd_rs_failed_io(device, sector, size) \
|
||||
__drbd_change_sync(device, sector, size, RECORD_RS_FAILED)
|
||||
#define drbd_set_in_sync(peer_device, sector, size) \
|
||||
__drbd_change_sync(peer_device, sector, size, SET_IN_SYNC)
|
||||
#define drbd_set_out_of_sync(peer_device, sector, size) \
|
||||
__drbd_change_sync(peer_device, sector, size, SET_OUT_OF_SYNC)
|
||||
#define drbd_rs_failed_io(peer_device, sector, size) \
|
||||
__drbd_change_sync(peer_device, sector, size, RECORD_RS_FAILED)
|
||||
extern void drbd_al_shrink(struct drbd_device *device);
|
||||
extern int drbd_al_initialize(struct drbd_device *, void *);
|
||||
|
||||
@ -1918,18 +1932,14 @@ static inline void inc_ap_pending(struct drbd_device *device)
|
||||
atomic_inc(&device->ap_pending_cnt);
|
||||
}
|
||||
|
||||
#define ERR_IF_CNT_IS_NEGATIVE(which, func, line) \
|
||||
if (atomic_read(&device->which) < 0) \
|
||||
drbd_err(device, "in %s:%d: " #which " = %d < 0 !\n", \
|
||||
func, line, \
|
||||
atomic_read(&device->which))
|
||||
|
||||
#define dec_ap_pending(device) _dec_ap_pending(device, __func__, __LINE__)
|
||||
static inline void _dec_ap_pending(struct drbd_device *device, const char *func, int line)
|
||||
#define dec_ap_pending(device) ((void)expect((device), __dec_ap_pending(device) >= 0))
|
||||
static inline int __dec_ap_pending(struct drbd_device *device)
|
||||
{
|
||||
if (atomic_dec_and_test(&device->ap_pending_cnt))
|
||||
int ap_pending_cnt = atomic_dec_return(&device->ap_pending_cnt);
|
||||
|
||||
if (ap_pending_cnt == 0)
|
||||
wake_up(&device->misc_wait);
|
||||
ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt, func, line);
|
||||
return ap_pending_cnt;
|
||||
}
|
||||
|
||||
/* counts how many resync-related answers we still expect from the peer
|
||||
@ -1938,16 +1948,16 @@ static inline void _dec_ap_pending(struct drbd_device *device, const char *func,
|
||||
* C_SYNC_SOURCE sends P_RS_DATA_REPLY (and expects P_WRITE_ACK with ID_SYNCER)
|
||||
* (or P_NEG_ACK with ID_SYNCER)
|
||||
*/
|
||||
static inline void inc_rs_pending(struct drbd_device *device)
|
||||
static inline void inc_rs_pending(struct drbd_peer_device *peer_device)
|
||||
{
|
||||
atomic_inc(&device->rs_pending_cnt);
|
||||
atomic_inc(&peer_device->device->rs_pending_cnt);
|
||||
}
|
||||
|
||||
#define dec_rs_pending(device) _dec_rs_pending(device, __func__, __LINE__)
|
||||
static inline void _dec_rs_pending(struct drbd_device *device, const char *func, int line)
|
||||
#define dec_rs_pending(peer_device) \
|
||||
((void)expect((peer_device), __dec_rs_pending(peer_device) >= 0))
|
||||
static inline int __dec_rs_pending(struct drbd_peer_device *peer_device)
|
||||
{
|
||||
atomic_dec(&device->rs_pending_cnt);
|
||||
ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt, func, line);
|
||||
return atomic_dec_return(&peer_device->device->rs_pending_cnt);
|
||||
}
|
||||
|
||||
/* counts how many answers we still need to send to the peer.
|
||||
@ -1964,18 +1974,16 @@ static inline void inc_unacked(struct drbd_device *device)
|
||||
atomic_inc(&device->unacked_cnt);
|
||||
}
|
||||
|
||||
#define dec_unacked(device) _dec_unacked(device, __func__, __LINE__)
|
||||
static inline void _dec_unacked(struct drbd_device *device, const char *func, int line)
|
||||
#define dec_unacked(device) ((void)expect(device, __dec_unacked(device) >= 0))
|
||||
static inline int __dec_unacked(struct drbd_device *device)
|
||||
{
|
||||
atomic_dec(&device->unacked_cnt);
|
||||
ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
|
||||
return atomic_dec_return(&device->unacked_cnt);
|
||||
}
|
||||
|
||||
#define sub_unacked(device, n) _sub_unacked(device, n, __func__, __LINE__)
|
||||
static inline void _sub_unacked(struct drbd_device *device, int n, const char *func, int line)
|
||||
#define sub_unacked(device, n) ((void)expect(device, __sub_unacked(device) >= 0))
|
||||
static inline int __sub_unacked(struct drbd_device *device, int n)
|
||||
{
|
||||
atomic_sub(n, &device->unacked_cnt);
|
||||
ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
|
||||
return atomic_sub_return(n, &device->unacked_cnt);
|
||||
}
|
||||
|
||||
static inline bool is_sync_target_state(enum drbd_conns connection_state)
|
||||
|
@ -231,9 +231,11 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr,
|
||||
}
|
||||
req = list_prepare_entry(tmp, &connection->transfer_log, tl_requests);
|
||||
list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) {
|
||||
struct drbd_peer_device *peer_device;
|
||||
if (req->epoch != expect_epoch)
|
||||
break;
|
||||
_req_mod(req, BARRIER_ACKED);
|
||||
peer_device = conn_peer_device(connection, req->device->vnr);
|
||||
_req_mod(req, BARRIER_ACKED, peer_device);
|
||||
}
|
||||
spin_unlock_irq(&connection->resource->req_lock);
|
||||
|
||||
@ -256,10 +258,13 @@ bail:
|
||||
/* must hold resource->req_lock */
|
||||
void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what)
|
||||
{
|
||||
struct drbd_peer_device *peer_device;
|
||||
struct drbd_request *req, *r;
|
||||
|
||||
list_for_each_entry_safe(req, r, &connection->transfer_log, tl_requests)
|
||||
_req_mod(req, what);
|
||||
list_for_each_entry_safe(req, r, &connection->transfer_log, tl_requests) {
|
||||
peer_device = conn_peer_device(connection, req->device->vnr);
|
||||
_req_mod(req, what, peer_device);
|
||||
}
|
||||
}
|
||||
|
||||
void tl_restart(struct drbd_connection *connection, enum drbd_req_event what)
|
||||
@ -297,7 +302,7 @@ void tl_abort_disk_io(struct drbd_device *device)
|
||||
continue;
|
||||
if (req->device != device)
|
||||
continue;
|
||||
_req_mod(req, ABORT_DISK_IO);
|
||||
_req_mod(req, ABORT_DISK_IO, NULL);
|
||||
}
|
||||
spin_unlock_irq(&connection->resource->req_lock);
|
||||
}
|
||||
@ -1198,10 +1203,11 @@ static int fill_bitmap_rle_bits(struct drbd_device *device,
|
||||
* code upon failure.
|
||||
*/
|
||||
static int
|
||||
send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c)
|
||||
send_bitmap_rle_or_plain(struct drbd_peer_device *peer_device, struct bm_xfer_ctx *c)
|
||||
{
|
||||
struct drbd_socket *sock = &first_peer_device(device)->connection->data;
|
||||
unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
|
||||
struct drbd_device *device = peer_device->device;
|
||||
struct drbd_socket *sock = &peer_device->connection->data;
|
||||
unsigned int header_size = drbd_header_size(peer_device->connection);
|
||||
struct p_compressed_bm *p = sock->sbuf + header_size;
|
||||
int len, err;
|
||||
|
||||
@ -1212,7 +1218,7 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c)
|
||||
|
||||
if (len) {
|
||||
dcbp_set_code(p, RLE_VLI_Bits);
|
||||
err = __send_command(first_peer_device(device)->connection, device->vnr, sock,
|
||||
err = __send_command(peer_device->connection, device->vnr, sock,
|
||||
P_COMPRESSED_BITMAP, sizeof(*p) + len,
|
||||
NULL, 0);
|
||||
c->packets[0]++;
|
||||
@ -1233,7 +1239,8 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c)
|
||||
len = num_words * sizeof(*p);
|
||||
if (len)
|
||||
drbd_bm_get_lel(device, c->word_offset, num_words, p);
|
||||
err = __send_command(first_peer_device(device)->connection, device->vnr, sock, P_BITMAP, len, NULL, 0);
|
||||
err = __send_command(peer_device->connection, device->vnr, sock, P_BITMAP,
|
||||
len, NULL, 0);
|
||||
c->word_offset += num_words;
|
||||
c->bit_offset = c->word_offset * BITS_PER_LONG;
|
||||
|
||||
@ -1245,7 +1252,7 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c)
|
||||
}
|
||||
if (!err) {
|
||||
if (len == 0) {
|
||||
INFO_bm_xfer_stats(device, "send", c);
|
||||
INFO_bm_xfer_stats(peer_device, "send", c);
|
||||
return 0;
|
||||
} else
|
||||
return 1;
|
||||
@ -1254,7 +1261,8 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c)
|
||||
}
|
||||
|
||||
/* See the comment at receive_bitmap() */
|
||||
static int _drbd_send_bitmap(struct drbd_device *device)
|
||||
static int _drbd_send_bitmap(struct drbd_device *device,
|
||||
struct drbd_peer_device *peer_device)
|
||||
{
|
||||
struct bm_xfer_ctx c;
|
||||
int err;
|
||||
@ -1266,7 +1274,7 @@ static int _drbd_send_bitmap(struct drbd_device *device)
|
||||
if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC)) {
|
||||
drbd_info(device, "Writing the whole bitmap, MDF_FullSync was set.\n");
|
||||
drbd_bm_set_all(device);
|
||||
if (drbd_bm_write(device)) {
|
||||
if (drbd_bm_write(device, peer_device)) {
|
||||
/* write_bm did fail! Leave full sync flag set in Meta P_DATA
|
||||
* but otherwise process as per normal - need to tell other
|
||||
* side that a full resync is required! */
|
||||
@ -1285,20 +1293,20 @@ static int _drbd_send_bitmap(struct drbd_device *device)
|
||||
};
|
||||
|
||||
do {
|
||||
err = send_bitmap_rle_or_plain(device, &c);
|
||||
err = send_bitmap_rle_or_plain(peer_device, &c);
|
||||
} while (err > 0);
|
||||
|
||||
return err == 0;
|
||||
}
|
||||
|
||||
int drbd_send_bitmap(struct drbd_device *device)
|
||||
int drbd_send_bitmap(struct drbd_device *device, struct drbd_peer_device *peer_device)
|
||||
{
|
||||
struct drbd_socket *sock = &first_peer_device(device)->connection->data;
|
||||
struct drbd_socket *sock = &peer_device->connection->data;
|
||||
int err = -1;
|
||||
|
||||
mutex_lock(&sock->mutex);
|
||||
if (sock->socket)
|
||||
err = !_drbd_send_bitmap(device);
|
||||
err = !_drbd_send_bitmap(device, peer_device);
|
||||
mutex_unlock(&sock->mutex);
|
||||
return err;
|
||||
}
|
||||
@ -3406,7 +3414,9 @@ void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local)
|
||||
*
|
||||
* Sets all bits in the bitmap and writes the whole bitmap to stable storage.
|
||||
*/
|
||||
int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local)
|
||||
int drbd_bmio_set_n_write(struct drbd_device *device,
|
||||
struct drbd_peer_device *peer_device) __must_hold(local)
|
||||
|
||||
{
|
||||
int rv = -EIO;
|
||||
|
||||
@ -3414,7 +3424,7 @@ int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local)
|
||||
drbd_md_sync(device);
|
||||
drbd_bm_set_all(device);
|
||||
|
||||
rv = drbd_bm_write(device);
|
||||
rv = drbd_bm_write(device, peer_device);
|
||||
|
||||
if (!rv) {
|
||||
drbd_md_clear_flag(device, MDF_FULL_SYNC);
|
||||
@ -3430,11 +3440,13 @@ int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local)
|
||||
*
|
||||
* Clears all bits in the bitmap and writes the whole bitmap to stable storage.
|
||||
*/
|
||||
int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local)
|
||||
int drbd_bmio_clear_n_write(struct drbd_device *device,
|
||||
struct drbd_peer_device *peer_device) __must_hold(local)
|
||||
|
||||
{
|
||||
drbd_resume_al(device);
|
||||
drbd_bm_clear_all(device);
|
||||
return drbd_bm_write(device);
|
||||
return drbd_bm_write(device, peer_device);
|
||||
}
|
||||
|
||||
static int w_bitmap_io(struct drbd_work *w, int unused)
|
||||
@ -3453,7 +3465,7 @@ static int w_bitmap_io(struct drbd_work *w, int unused)
|
||||
|
||||
if (get_ldev(device)) {
|
||||
drbd_bm_lock(device, work->why, work->flags);
|
||||
rv = work->io_fn(device);
|
||||
rv = work->io_fn(device, work->peer_device);
|
||||
drbd_bm_unlock(device);
|
||||
put_ldev(device);
|
||||
}
|
||||
@ -3488,11 +3500,12 @@ static int w_bitmap_io(struct drbd_work *w, int unused)
|
||||
* put_ldev().
|
||||
*/
|
||||
void drbd_queue_bitmap_io(struct drbd_device *device,
|
||||
int (*io_fn)(struct drbd_device *),
|
||||
int (*io_fn)(struct drbd_device *, struct drbd_peer_device *),
|
||||
void (*done)(struct drbd_device *, int),
|
||||
char *why, enum bm_flag flags)
|
||||
char *why, enum bm_flag flags,
|
||||
struct drbd_peer_device *peer_device)
|
||||
{
|
||||
D_ASSERT(device, current == first_peer_device(device)->connection->worker.task);
|
||||
D_ASSERT(device, current == peer_device->connection->worker.task);
|
||||
|
||||
D_ASSERT(device, !test_bit(BITMAP_IO_QUEUED, &device->flags));
|
||||
D_ASSERT(device, !test_bit(BITMAP_IO, &device->flags));
|
||||
@ -3501,6 +3514,7 @@ void drbd_queue_bitmap_io(struct drbd_device *device,
|
||||
drbd_err(device, "FIXME going to queue '%s' but '%s' still pending?\n",
|
||||
why, device->bm_io_work.why);
|
||||
|
||||
device->bm_io_work.peer_device = peer_device;
|
||||
device->bm_io_work.io_fn = io_fn;
|
||||
device->bm_io_work.done = done;
|
||||
device->bm_io_work.why = why;
|
||||
@ -3512,7 +3526,7 @@ void drbd_queue_bitmap_io(struct drbd_device *device,
|
||||
* application IO does not conflict anyways. */
|
||||
if (flags == BM_LOCKED_CHANGE_ALLOWED || atomic_read(&device->ap_bio_cnt) == 0) {
|
||||
if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags))
|
||||
drbd_queue_work(&first_peer_device(device)->connection->sender_work,
|
||||
drbd_queue_work(&peer_device->connection->sender_work,
|
||||
&device->bm_io_work.w);
|
||||
}
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
@ -3528,8 +3542,10 @@ void drbd_queue_bitmap_io(struct drbd_device *device,
|
||||
* freezes application IO while that the actual IO operations runs. This
|
||||
* functions MAY NOT be called from worker context.
|
||||
*/
|
||||
int drbd_bitmap_io(struct drbd_device *device, int (*io_fn)(struct drbd_device *),
|
||||
char *why, enum bm_flag flags)
|
||||
int drbd_bitmap_io(struct drbd_device *device,
|
||||
int (*io_fn)(struct drbd_device *, struct drbd_peer_device *),
|
||||
char *why, enum bm_flag flags,
|
||||
struct drbd_peer_device *peer_device)
|
||||
{
|
||||
/* Only suspend io, if some operation is supposed to be locked out */
|
||||
const bool do_suspend_io = flags & (BM_DONT_CLEAR|BM_DONT_SET|BM_DONT_TEST);
|
||||
@ -3541,7 +3557,7 @@ int drbd_bitmap_io(struct drbd_device *device, int (*io_fn)(struct drbd_device *
|
||||
drbd_suspend_io(device);
|
||||
|
||||
drbd_bm_lock(device, why, flags);
|
||||
rv = io_fn(device);
|
||||
rv = io_fn(device, peer_device);
|
||||
drbd_bm_unlock(device);
|
||||
|
||||
if (do_suspend_io)
|
||||
|
@ -1053,7 +1053,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
|
||||
la_size_changed ? "size changed" : "md moved");
|
||||
/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
|
||||
drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
|
||||
"size changed", BM_LOCKED_MASK);
|
||||
"size changed", BM_LOCKED_MASK, NULL);
|
||||
|
||||
/* on-disk bitmap and activity log is authoritative again
|
||||
* (unless there was an IO error meanwhile...) */
|
||||
@ -2027,13 +2027,15 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
|
||||
drbd_info(device, "Assuming that all blocks are out of sync "
|
||||
"(aka FullSync)\n");
|
||||
if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
|
||||
"set_n_write from attaching", BM_LOCKED_MASK)) {
|
||||
"set_n_write from attaching", BM_LOCKED_MASK,
|
||||
NULL)) {
|
||||
retcode = ERR_IO_MD_DISK;
|
||||
goto force_diskless_dec;
|
||||
}
|
||||
} else {
|
||||
if (drbd_bitmap_io(device, &drbd_bm_read,
|
||||
"read from attaching", BM_LOCKED_MASK)) {
|
||||
"read from attaching", BM_LOCKED_MASK,
|
||||
NULL)) {
|
||||
retcode = ERR_IO_MD_DISK;
|
||||
goto force_diskless_dec;
|
||||
}
|
||||
@ -2972,7 +2974,7 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
|
||||
retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
|
||||
if (retcode >= SS_SUCCESS) {
|
||||
if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
|
||||
"set_n_write from invalidate", BM_LOCKED_MASK))
|
||||
"set_n_write from invalidate", BM_LOCKED_MASK, NULL))
|
||||
retcode = ERR_IO_MD_DISK;
|
||||
}
|
||||
} else
|
||||
@ -3005,11 +3007,12 @@ out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local)
|
||||
static int drbd_bmio_set_susp_al(struct drbd_device *device,
|
||||
struct drbd_peer_device *peer_device) __must_hold(local)
|
||||
{
|
||||
int rv;
|
||||
|
||||
rv = drbd_bmio_set_n_write(device);
|
||||
rv = drbd_bmio_set_n_write(device, peer_device);
|
||||
drbd_suspend_al(device);
|
||||
return rv;
|
||||
}
|
||||
@ -3052,7 +3055,7 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
|
||||
if (retcode >= SS_SUCCESS) {
|
||||
if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
|
||||
"set_n_write from invalidate_peer",
|
||||
BM_LOCKED_SET_ALLOWED))
|
||||
BM_LOCKED_SET_ALLOWED, NULL))
|
||||
retcode = ERR_IO_MD_DISK;
|
||||
}
|
||||
} else
|
||||
@ -4148,7 +4151,7 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
|
||||
|
||||
if (args.clear_bm) {
|
||||
err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
|
||||
"clear_n_write from new_c_uuid", BM_LOCKED_MASK);
|
||||
"clear_n_write from new_c_uuid", BM_LOCKED_MASK, NULL);
|
||||
if (err) {
|
||||
drbd_err(device, "Writing bitmap failed with %d\n", err);
|
||||
retcode = ERR_IO_MD_DISK;
|
||||
|
@ -2044,11 +2044,11 @@ static int e_end_resync_block(struct drbd_work *w, int unused)
|
||||
D_ASSERT(device, drbd_interval_empty(&peer_req->i));
|
||||
|
||||
if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
|
||||
drbd_set_in_sync(device, sector, peer_req->i.size);
|
||||
drbd_set_in_sync(peer_device, sector, peer_req->i.size);
|
||||
err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
|
||||
} else {
|
||||
/* Record failure to sync */
|
||||
drbd_rs_failed_io(device, sector, peer_req->i.size);
|
||||
drbd_rs_failed_io(peer_device, sector, peer_req->i.size);
|
||||
|
||||
err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
|
||||
}
|
||||
@ -2067,7 +2067,7 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto
|
||||
if (!peer_req)
|
||||
goto fail;
|
||||
|
||||
dec_rs_pending(device);
|
||||
dec_rs_pending(peer_device);
|
||||
|
||||
inc_unacked(device);
|
||||
/* corresponding dec_unacked() in e_end_resync_block()
|
||||
@ -2138,7 +2138,7 @@ static int receive_DataReply(struct drbd_connection *connection, struct packet_i
|
||||
|
||||
err = recv_dless_read(peer_device, req, sector, pi->size);
|
||||
if (!err)
|
||||
req_mod(req, DATA_RECEIVED);
|
||||
req_mod(req, DATA_RECEIVED, peer_device);
|
||||
/* else: nothing. handled from drbd_disconnect...
|
||||
* I don't think we may complete this just yet
|
||||
* in case we are "on-disconnect: freeze" */
|
||||
@ -2196,7 +2196,7 @@ static void restart_conflicting_writes(struct drbd_device *device,
|
||||
continue;
|
||||
/* as it is RQ_POSTPONED, this will cause it to
|
||||
* be queued on the retry workqueue. */
|
||||
__req_mod(req, CONFLICT_RESOLVED, NULL);
|
||||
__req_mod(req, CONFLICT_RESOLVED, NULL, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2220,7 +2220,7 @@ static int e_end_block(struct drbd_work *w, int cancel)
|
||||
P_RS_WRITE_ACK : P_WRITE_ACK;
|
||||
err = drbd_send_ack(peer_device, pcmd, peer_req);
|
||||
if (pcmd == P_RS_WRITE_ACK)
|
||||
drbd_set_in_sync(device, sector, peer_req->i.size);
|
||||
drbd_set_in_sync(peer_device, sector, peer_req->i.size);
|
||||
} else {
|
||||
err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
|
||||
/* we expect it to be marked out of sync anyways...
|
||||
@ -2420,6 +2420,7 @@ static blk_opf_t wire_flags_to_bio(struct drbd_connection *connection, u32 dpf)
|
||||
static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
|
||||
unsigned int size)
|
||||
{
|
||||
struct drbd_peer_device *peer_device = first_peer_device(device);
|
||||
struct drbd_interval *i;
|
||||
|
||||
repeat:
|
||||
@ -2433,7 +2434,7 @@ static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
|
||||
if (!(req->rq_state & RQ_POSTPONED))
|
||||
continue;
|
||||
req->rq_state &= ~RQ_POSTPONED;
|
||||
__req_mod(req, NEG_ACKED, &m);
|
||||
__req_mod(req, NEG_ACKED, peer_device, &m);
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
if (m.bio)
|
||||
complete_master_bio(device, &m);
|
||||
@ -2690,7 +2691,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
|
||||
|
||||
if (device->state.pdsk < D_INCONSISTENT) {
|
||||
/* In case we have the only disk of the cluster, */
|
||||
drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
|
||||
drbd_set_out_of_sync(peer_device, peer_req->i.sector, peer_req->i.size);
|
||||
peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
|
||||
drbd_al_begin_io(device, &peer_req->i);
|
||||
peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
|
||||
@ -2729,9 +2730,10 @@ out_interrupted:
|
||||
* The current sync rate used here uses only the most recent two step marks,
|
||||
* to have a short time average so we can react faster.
|
||||
*/
|
||||
bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
|
||||
bool drbd_rs_should_slow_down(struct drbd_peer_device *peer_device, sector_t sector,
|
||||
bool throttle_if_app_is_waiting)
|
||||
{
|
||||
struct drbd_device *device = peer_device->device;
|
||||
struct lc_element *tmp;
|
||||
bool throttle = drbd_rs_c_min_rate_throttle(device);
|
||||
|
||||
@ -2843,7 +2845,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
|
||||
break;
|
||||
case P_OV_REPLY:
|
||||
verb = 0;
|
||||
dec_rs_pending(device);
|
||||
dec_rs_pending(peer_device);
|
||||
drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
|
||||
break;
|
||||
default:
|
||||
@ -2914,7 +2916,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
|
||||
/* track progress, we may need to throttle */
|
||||
atomic_add(size >> 9, &device->rs_sect_in);
|
||||
peer_req->w.cb = w_e_end_ov_reply;
|
||||
dec_rs_pending(device);
|
||||
dec_rs_pending(peer_device);
|
||||
/* drbd_rs_begin_io done when we sent this request,
|
||||
* but accounting still needs to be done. */
|
||||
goto submit_for_resync;
|
||||
@ -2977,7 +2979,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
|
||||
|
||||
update_receiver_timing_details(connection, drbd_rs_should_slow_down);
|
||||
if (device->state.peer != R_PRIMARY
|
||||
&& drbd_rs_should_slow_down(device, sector, false))
|
||||
&& drbd_rs_should_slow_down(peer_device, sector, false))
|
||||
schedule_timeout_uninterruptible(HZ/10);
|
||||
update_receiver_timing_details(connection, drbd_rs_begin_io);
|
||||
if (drbd_rs_begin_io(device, sector))
|
||||
@ -3226,10 +3228,11 @@ static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
|
||||
-1096 requires proto 96
|
||||
*/
|
||||
|
||||
static int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
|
||||
static int drbd_uuid_compare(struct drbd_peer_device *const peer_device,
|
||||
enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
|
||||
{
|
||||
struct drbd_peer_device *const peer_device = first_peer_device(device);
|
||||
struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
|
||||
struct drbd_connection *const connection = peer_device->connection;
|
||||
struct drbd_device *device = peer_device->device;
|
||||
u64 self, peer;
|
||||
int i, j;
|
||||
|
||||
@ -3465,7 +3468,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
|
||||
drbd_uuid_dump(device, "peer", device->p_uuid,
|
||||
device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
|
||||
|
||||
hg = drbd_uuid_compare(device, peer_role, &rule_nr);
|
||||
hg = drbd_uuid_compare(peer_device, peer_role, &rule_nr);
|
||||
spin_unlock_irq(&device->ldev->md.uuid_lock);
|
||||
|
||||
drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
|
||||
@ -3591,7 +3594,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
|
||||
if (abs(hg) >= 2) {
|
||||
drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
|
||||
if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
|
||||
BM_LOCKED_SET_ALLOWED))
|
||||
BM_LOCKED_SET_ALLOWED, NULL))
|
||||
return C_MASK;
|
||||
}
|
||||
|
||||
@ -4270,7 +4273,7 @@ static int receive_uuids(struct drbd_connection *connection, struct packet_info
|
||||
drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
|
||||
drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
|
||||
"clear_n_write from receive_uuids",
|
||||
BM_LOCKED_TEST_ALLOWED);
|
||||
BM_LOCKED_TEST_ALLOWED, NULL);
|
||||
_drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
|
||||
_drbd_uuid_set(device, UI_BITMAP, 0);
|
||||
_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
|
||||
@ -4448,7 +4451,7 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
|
||||
else if (os.conn >= C_SYNC_SOURCE &&
|
||||
peer_state.conn == C_CONNECTED) {
|
||||
if (drbd_bm_total_weight(device) <= device->rs_failed)
|
||||
drbd_resync_finished(device);
|
||||
drbd_resync_finished(peer_device);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@ -4456,8 +4459,8 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
|
||||
/* explicit verify finished notification, stop sector reached. */
|
||||
if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
|
||||
peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
|
||||
ov_out_of_sync_print(device);
|
||||
drbd_resync_finished(device);
|
||||
ov_out_of_sync_print(peer_device);
|
||||
drbd_resync_finished(peer_device);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -4766,11 +4769,11 @@ decode_bitmap_c(struct drbd_peer_device *peer_device,
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
void INFO_bm_xfer_stats(struct drbd_device *device,
|
||||
void INFO_bm_xfer_stats(struct drbd_peer_device *peer_device,
|
||||
const char *direction, struct bm_xfer_ctx *c)
|
||||
{
|
||||
/* what would it take to transfer it "plaintext" */
|
||||
unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
|
||||
unsigned int header_size = drbd_header_size(peer_device->connection);
|
||||
unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
|
||||
unsigned int plain =
|
||||
header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
|
||||
@ -4794,7 +4797,7 @@ void INFO_bm_xfer_stats(struct drbd_device *device,
|
||||
r = 1000;
|
||||
|
||||
r = 1000 - r;
|
||||
drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
|
||||
drbd_info(peer_device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
|
||||
"total %u; compression: %u.%u%%\n",
|
||||
direction,
|
||||
c->bytes[1], c->packets[1],
|
||||
@ -4872,12 +4875,12 @@ static int receive_bitmap(struct drbd_connection *connection, struct packet_info
|
||||
goto out;
|
||||
}
|
||||
|
||||
INFO_bm_xfer_stats(device, "receive", &c);
|
||||
INFO_bm_xfer_stats(peer_device, "receive", &c);
|
||||
|
||||
if (device->state.conn == C_WF_BITMAP_T) {
|
||||
enum drbd_state_rv rv;
|
||||
|
||||
err = drbd_send_bitmap(device);
|
||||
err = drbd_send_bitmap(device, peer_device);
|
||||
if (err)
|
||||
goto out;
|
||||
/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
|
||||
@ -4935,7 +4938,7 @@ static int receive_out_of_sync(struct drbd_connection *connection, struct packet
|
||||
drbd_conn_str(device->state.conn));
|
||||
}
|
||||
|
||||
drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
|
||||
drbd_set_out_of_sync(peer_device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -4956,7 +4959,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac
|
||||
sector = be64_to_cpu(p->sector);
|
||||
size = be32_to_cpu(p->blksize);
|
||||
|
||||
dec_rs_pending(device);
|
||||
dec_rs_pending(peer_device);
|
||||
|
||||
if (get_ldev(device)) {
|
||||
struct drbd_peer_request *peer_req;
|
||||
@ -5214,7 +5217,7 @@ static int drbd_disconnected(struct drbd_peer_device *peer_device)
|
||||
|
||||
if (get_ldev(device)) {
|
||||
drbd_bitmap_io(device, &drbd_bm_write_copy_pages,
|
||||
"write from disconnected", BM_LOCKED_CHANGE_ALLOWED);
|
||||
"write from disconnected", BM_LOCKED_CHANGE_ALLOWED, NULL);
|
||||
put_ldev(device);
|
||||
}
|
||||
|
||||
@ -5648,22 +5651,23 @@ static int got_IsInSync(struct drbd_connection *connection, struct packet_info *
|
||||
|
||||
if (get_ldev(device)) {
|
||||
drbd_rs_complete_io(device, sector);
|
||||
drbd_set_in_sync(device, sector, blksize);
|
||||
drbd_set_in_sync(peer_device, sector, blksize);
|
||||
/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
|
||||
device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
|
||||
put_ldev(device);
|
||||
}
|
||||
dec_rs_pending(device);
|
||||
dec_rs_pending(peer_device);
|
||||
atomic_add(blksize >> 9, &device->rs_sect_in);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
|
||||
validate_req_change_req_state(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
|
||||
struct rb_root *root, const char *func,
|
||||
enum drbd_req_event what, bool missing_ok)
|
||||
{
|
||||
struct drbd_device *device = peer_device->device;
|
||||
struct drbd_request *req;
|
||||
struct bio_and_error m;
|
||||
|
||||
@ -5673,7 +5677,7 @@ validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t secto
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
return -EIO;
|
||||
}
|
||||
__req_mod(req, what, &m);
|
||||
__req_mod(req, what, peer_device, &m);
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
|
||||
if (m.bio)
|
||||
@ -5698,8 +5702,8 @@ static int got_BlockAck(struct drbd_connection *connection, struct packet_info *
|
||||
update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
|
||||
|
||||
if (p->block_id == ID_SYNCER) {
|
||||
drbd_set_in_sync(device, sector, blksize);
|
||||
dec_rs_pending(device);
|
||||
drbd_set_in_sync(peer_device, sector, blksize);
|
||||
dec_rs_pending(peer_device);
|
||||
return 0;
|
||||
}
|
||||
switch (pi->cmd) {
|
||||
@ -5722,7 +5726,7 @@ static int got_BlockAck(struct drbd_connection *connection, struct packet_info *
|
||||
BUG();
|
||||
}
|
||||
|
||||
return validate_req_change_req_state(device, p->block_id, sector,
|
||||
return validate_req_change_req_state(peer_device, p->block_id, sector,
|
||||
&device->write_requests, __func__,
|
||||
what, false);
|
||||
}
|
||||
@ -5744,12 +5748,12 @@ static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi
|
||||
update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
|
||||
|
||||
if (p->block_id == ID_SYNCER) {
|
||||
dec_rs_pending(device);
|
||||
drbd_rs_failed_io(device, sector, size);
|
||||
dec_rs_pending(peer_device);
|
||||
drbd_rs_failed_io(peer_device, sector, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
err = validate_req_change_req_state(device, p->block_id, sector,
|
||||
err = validate_req_change_req_state(peer_device, p->block_id, sector,
|
||||
&device->write_requests, __func__,
|
||||
NEG_ACKED, true);
|
||||
if (err) {
|
||||
@ -5758,7 +5762,7 @@ static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi
|
||||
request is no longer in the collision hash. */
|
||||
/* In Protocol B we might already have got a P_RECV_ACK
|
||||
but then get a P_NEG_ACK afterwards. */
|
||||
drbd_set_out_of_sync(device, sector, size);
|
||||
drbd_set_out_of_sync(peer_device, sector, size);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -5780,7 +5784,7 @@ static int got_NegDReply(struct drbd_connection *connection, struct packet_info
|
||||
drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
|
||||
(unsigned long long)sector, be32_to_cpu(p->blksize));
|
||||
|
||||
return validate_req_change_req_state(device, p->block_id, sector,
|
||||
return validate_req_change_req_state(peer_device, p->block_id, sector,
|
||||
&device->read_requests, __func__,
|
||||
NEG_ACKED, false);
|
||||
}
|
||||
@ -5803,13 +5807,13 @@ static int got_NegRSDReply(struct drbd_connection *connection, struct packet_inf
|
||||
|
||||
update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
|
||||
|
||||
dec_rs_pending(device);
|
||||
dec_rs_pending(peer_device);
|
||||
|
||||
if (get_ldev_if_state(device, D_FAILED)) {
|
||||
drbd_rs_complete_io(device, sector);
|
||||
switch (pi->cmd) {
|
||||
case P_NEG_RS_DREPLY:
|
||||
drbd_rs_failed_io(device, sector, size);
|
||||
drbd_rs_failed_io(peer_device, sector, size);
|
||||
break;
|
||||
case P_RS_CANCEL:
|
||||
break;
|
||||
@ -5866,21 +5870,21 @@ static int got_OVResult(struct drbd_connection *connection, struct packet_info *
|
||||
update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
|
||||
|
||||
if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
|
||||
drbd_ov_out_of_sync_found(device, sector, size);
|
||||
drbd_ov_out_of_sync_found(peer_device, sector, size);
|
||||
else
|
||||
ov_out_of_sync_print(device);
|
||||
ov_out_of_sync_print(peer_device);
|
||||
|
||||
if (!get_ldev(device))
|
||||
return 0;
|
||||
|
||||
drbd_rs_complete_io(device, sector);
|
||||
dec_rs_pending(device);
|
||||
dec_rs_pending(peer_device);
|
||||
|
||||
--device->ov_left;
|
||||
|
||||
/* let's advance progress step marks only for every other megabyte */
|
||||
if ((device->ov_left & 0x200) == 0x200)
|
||||
drbd_advance_rs_marks(device, device->ov_left);
|
||||
drbd_advance_rs_marks(peer_device, device->ov_left);
|
||||
|
||||
if (device->ov_left == 0) {
|
||||
dw = kmalloc(sizeof(*dw), GFP_NOIO);
|
||||
@ -5890,8 +5894,8 @@ static int got_OVResult(struct drbd_connection *connection, struct packet_info *
|
||||
drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
|
||||
} else {
|
||||
drbd_err(device, "kmalloc(dw) failed.");
|
||||
ov_out_of_sync_print(device);
|
||||
drbd_resync_finished(device);
|
||||
ov_out_of_sync_print(peer_device);
|
||||
drbd_resync_finished(peer_device);
|
||||
}
|
||||
}
|
||||
put_ldev(device);
|
||||
|
@ -122,12 +122,13 @@ void drbd_req_destroy(struct kref *kref)
|
||||
* before it even was submitted or sent.
|
||||
* In that case we do not want to touch the bitmap at all.
|
||||
*/
|
||||
struct drbd_peer_device *peer_device = first_peer_device(device);
|
||||
if ((s & (RQ_POSTPONED|RQ_LOCAL_MASK|RQ_NET_MASK)) != RQ_POSTPONED) {
|
||||
if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK))
|
||||
drbd_set_out_of_sync(device, req->i.sector, req->i.size);
|
||||
drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size);
|
||||
|
||||
if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS))
|
||||
drbd_set_in_sync(device, req->i.sector, req->i.size);
|
||||
drbd_set_in_sync(peer_device, req->i.sector, req->i.size);
|
||||
}
|
||||
|
||||
/* one might be tempted to move the drbd_al_complete_io
|
||||
@ -552,12 +553,15 @@ static inline bool is_pending_write_protocol_A(struct drbd_request *req)
|
||||
* happen "atomically" within the req_lock,
|
||||
* and it enforces that we have to think in a very structured manner
|
||||
* about the "events" that may happen to a request during its life time ...
|
||||
*
|
||||
*
|
||||
* peer_device == NULL means local disk
|
||||
*/
|
||||
int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
||||
struct drbd_peer_device *peer_device,
|
||||
struct bio_and_error *m)
|
||||
{
|
||||
struct drbd_device *const device = req->device;
|
||||
struct drbd_peer_device *const peer_device = first_peer_device(device);
|
||||
struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
|
||||
struct net_conf *nc;
|
||||
int p, rv = 0;
|
||||
@ -617,7 +621,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
||||
break;
|
||||
|
||||
case READ_COMPLETED_WITH_ERROR:
|
||||
drbd_set_out_of_sync(device, req->i.sector, req->i.size);
|
||||
drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size);
|
||||
drbd_report_io_error(device, req);
|
||||
__drbd_chk_io_error(device, DRBD_READ_ERROR);
|
||||
fallthrough;
|
||||
@ -1100,6 +1104,7 @@ static bool drbd_should_send_out_of_sync(union drbd_dev_state s)
|
||||
static int drbd_process_write_request(struct drbd_request *req)
|
||||
{
|
||||
struct drbd_device *device = req->device;
|
||||
struct drbd_peer_device *peer_device = first_peer_device(device);
|
||||
int remote, send_oos;
|
||||
|
||||
remote = drbd_should_do_remote(device->state);
|
||||
@ -1115,7 +1120,7 @@ static int drbd_process_write_request(struct drbd_request *req)
|
||||
/* The only size==0 bios we expect are empty flushes. */
|
||||
D_ASSERT(device, req->master_bio->bi_opf & REQ_PREFLUSH);
|
||||
if (remote)
|
||||
_req_mod(req, QUEUE_AS_DRBD_BARRIER);
|
||||
_req_mod(req, QUEUE_AS_DRBD_BARRIER, peer_device);
|
||||
return remote;
|
||||
}
|
||||
|
||||
@ -1125,10 +1130,10 @@ static int drbd_process_write_request(struct drbd_request *req)
|
||||
D_ASSERT(device, !(remote && send_oos));
|
||||
|
||||
if (remote) {
|
||||
_req_mod(req, TO_BE_SENT);
|
||||
_req_mod(req, QUEUE_FOR_NET_WRITE);
|
||||
} else if (drbd_set_out_of_sync(device, req->i.sector, req->i.size))
|
||||
_req_mod(req, QUEUE_FOR_SEND_OOS);
|
||||
_req_mod(req, TO_BE_SENT, peer_device);
|
||||
_req_mod(req, QUEUE_FOR_NET_WRITE, peer_device);
|
||||
} else if (drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size))
|
||||
_req_mod(req, QUEUE_FOR_SEND_OOS, peer_device);
|
||||
|
||||
return remote;
|
||||
}
|
||||
@ -1312,6 +1317,7 @@ static void drbd_update_plug(struct drbd_plug_cb *plug, struct drbd_request *req
|
||||
static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req)
|
||||
{
|
||||
struct drbd_resource *resource = device->resource;
|
||||
struct drbd_peer_device *peer_device = first_peer_device(device);
|
||||
const int rw = bio_data_dir(req->master_bio);
|
||||
struct bio_and_error m = { NULL, };
|
||||
bool no_remote = false;
|
||||
@ -1375,8 +1381,8 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
|
||||
/* We either have a private_bio, or we can read from remote.
|
||||
* Otherwise we had done the goto nodata above. */
|
||||
if (req->private_bio == NULL) {
|
||||
_req_mod(req, TO_BE_SENT);
|
||||
_req_mod(req, QUEUE_FOR_NET_READ);
|
||||
_req_mod(req, TO_BE_SENT, peer_device);
|
||||
_req_mod(req, QUEUE_FOR_NET_READ, peer_device);
|
||||
} else
|
||||
no_remote = true;
|
||||
}
|
||||
@ -1397,7 +1403,7 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
|
||||
req->pre_submit_jif = jiffies;
|
||||
list_add_tail(&req->req_pending_local,
|
||||
&device->pending_completion[rw == WRITE]);
|
||||
_req_mod(req, TO_BE_SUBMITTED);
|
||||
_req_mod(req, TO_BE_SUBMITTED, NULL);
|
||||
/* but we need to give up the spinlock to submit */
|
||||
submit_private_bio = true;
|
||||
} else if (no_remote) {
|
||||
|
@ -267,6 +267,7 @@ struct bio_and_error {
|
||||
extern void start_new_tl_epoch(struct drbd_connection *connection);
|
||||
extern void drbd_req_destroy(struct kref *kref);
|
||||
extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
||||
struct drbd_peer_device *peer_device,
|
||||
struct bio_and_error *m);
|
||||
extern void complete_master_bio(struct drbd_device *device,
|
||||
struct bio_and_error *m);
|
||||
@ -280,14 +281,15 @@ extern void drbd_restart_request(struct drbd_request *req);
|
||||
|
||||
/* use this if you don't want to deal with calling complete_master_bio()
|
||||
* outside the spinlock, e.g. when walking some list on cleanup. */
|
||||
static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
|
||||
static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what,
|
||||
struct drbd_peer_device *peer_device)
|
||||
{
|
||||
struct drbd_device *device = req->device;
|
||||
struct bio_and_error m;
|
||||
int rv;
|
||||
|
||||
/* __req_mod possibly frees req, do not touch req after that! */
|
||||
rv = __req_mod(req, what, &m);
|
||||
rv = __req_mod(req, what, peer_device, &m);
|
||||
if (m.bio)
|
||||
complete_master_bio(device, &m);
|
||||
|
||||
@ -299,7 +301,8 @@ static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
|
||||
* of the lower level driver completion callback, so we need to
|
||||
* spin_lock_irqsave here. */
|
||||
static inline int req_mod(struct drbd_request *req,
|
||||
enum drbd_req_event what)
|
||||
enum drbd_req_event what,
|
||||
struct drbd_peer_device *peer_device)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct drbd_device *device = req->device;
|
||||
@ -307,7 +310,7 @@ static inline int req_mod(struct drbd_request *req,
|
||||
int rv;
|
||||
|
||||
spin_lock_irqsave(&device->resource->req_lock, flags);
|
||||
rv = __req_mod(req, what, &m);
|
||||
rv = __req_mod(req, what, peer_device, &m);
|
||||
spin_unlock_irqrestore(&device->resource->req_lock, flags);
|
||||
|
||||
if (m.bio)
|
||||
|
@ -1222,9 +1222,11 @@ void drbd_resume_al(struct drbd_device *device)
|
||||
}
|
||||
|
||||
/* helper for _drbd_set_state */
|
||||
static void set_ov_position(struct drbd_device *device, enum drbd_conns cs)
|
||||
static void set_ov_position(struct drbd_peer_device *peer_device, enum drbd_conns cs)
|
||||
{
|
||||
if (first_peer_device(device)->connection->agreed_pro_version < 90)
|
||||
struct drbd_device *device = peer_device->device;
|
||||
|
||||
if (peer_device->connection->agreed_pro_version < 90)
|
||||
device->ov_start_sector = 0;
|
||||
device->rs_total = drbd_bm_bits(device);
|
||||
device->ov_position = 0;
|
||||
@ -1387,7 +1389,7 @@ _drbd_set_state(struct drbd_device *device, union drbd_state ns,
|
||||
unsigned long now = jiffies;
|
||||
int i;
|
||||
|
||||
set_ov_position(device, ns.conn);
|
||||
set_ov_position(peer_device, ns.conn);
|
||||
device->rs_start = now;
|
||||
device->rs_last_sect_ev = 0;
|
||||
device->ov_last_oos_size = 0;
|
||||
@ -1398,7 +1400,7 @@ _drbd_set_state(struct drbd_device *device, union drbd_state ns,
|
||||
device->rs_mark_time[i] = now;
|
||||
}
|
||||
|
||||
drbd_rs_controller_reset(device);
|
||||
drbd_rs_controller_reset(peer_device);
|
||||
|
||||
if (ns.conn == C_VERIFY_S) {
|
||||
drbd_info(device, "Starting Online Verify from sector %llu\n",
|
||||
@ -1518,8 +1520,9 @@ static void abw_start_sync(struct drbd_device *device, int rv)
|
||||
}
|
||||
|
||||
int drbd_bitmap_io_from_worker(struct drbd_device *device,
|
||||
int (*io_fn)(struct drbd_device *),
|
||||
char *why, enum bm_flag flags)
|
||||
int (*io_fn)(struct drbd_device *, struct drbd_peer_device *),
|
||||
char *why, enum bm_flag flags,
|
||||
struct drbd_peer_device *peer_device)
|
||||
{
|
||||
int rv;
|
||||
|
||||
@ -1529,7 +1532,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
|
||||
atomic_inc(&device->suspend_cnt);
|
||||
|
||||
drbd_bm_lock(device, why, flags);
|
||||
rv = io_fn(device);
|
||||
rv = io_fn(device, peer_device);
|
||||
drbd_bm_unlock(device);
|
||||
|
||||
drbd_resume_io(device);
|
||||
@ -1809,7 +1812,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
|
||||
device->state.conn == C_WF_BITMAP_S)
|
||||
drbd_queue_bitmap_io(device, &drbd_send_bitmap, NULL,
|
||||
"send_bitmap (WFBitMapS)",
|
||||
BM_LOCKED_TEST_ALLOWED);
|
||||
BM_LOCKED_TEST_ALLOWED, peer_device);
|
||||
|
||||
/* Lost contact to peer's copy of the data */
|
||||
if (lost_contact_to_peer_data(os.pdsk, ns.pdsk)) {
|
||||
@ -1839,7 +1842,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
|
||||
* No harm done if the bitmap still changes,
|
||||
* redirtied pages will follow later. */
|
||||
drbd_bitmap_io_from_worker(device, &drbd_bm_write,
|
||||
"demote diskless peer", BM_LOCKED_SET_ALLOWED);
|
||||
"demote diskless peer", BM_LOCKED_SET_ALLOWED, peer_device);
|
||||
put_ldev(device);
|
||||
}
|
||||
|
||||
@ -1851,7 +1854,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
|
||||
/* No changes to the bitmap expected this time, so assert that,
|
||||
* even though no harm was done if it did change. */
|
||||
drbd_bitmap_io_from_worker(device, &drbd_bm_write,
|
||||
"demote", BM_LOCKED_TEST_ALLOWED);
|
||||
"demote", BM_LOCKED_TEST_ALLOWED, peer_device);
|
||||
put_ldev(device);
|
||||
}
|
||||
|
||||
@ -1888,7 +1891,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
|
||||
/* no other bitmap changes expected during this phase */
|
||||
drbd_queue_bitmap_io(device,
|
||||
&drbd_bmio_set_n_write, &abw_start_sync,
|
||||
"set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
|
||||
"set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED,
|
||||
peer_device);
|
||||
|
||||
/* first half of local IO error, failure to attach,
|
||||
* or administrative detach */
|
||||
@ -2011,7 +2015,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
|
||||
if ((os.conn > C_CONNECTED && os.conn < C_AHEAD) &&
|
||||
(ns.conn == C_CONNECTED || ns.conn >= C_AHEAD) && get_ldev(device)) {
|
||||
drbd_queue_bitmap_io(device, &drbd_bm_write_copy_pages, NULL,
|
||||
"write from resync_finished", BM_LOCKED_CHANGE_ALLOWED);
|
||||
"write from resync_finished", BM_LOCKED_CHANGE_ALLOWED,
|
||||
peer_device);
|
||||
put_ldev(device);
|
||||
}
|
||||
|
||||
|
@ -28,8 +28,8 @@
|
||||
#include "drbd_protocol.h"
|
||||
#include "drbd_req.h"
|
||||
|
||||
static int make_ov_request(struct drbd_device *, int);
|
||||
static int make_resync_request(struct drbd_device *, int);
|
||||
static int make_ov_request(struct drbd_peer_device *, int);
|
||||
static int make_resync_request(struct drbd_peer_device *, int);
|
||||
|
||||
/* endio handlers:
|
||||
* drbd_md_endio (defined here)
|
||||
@ -124,7 +124,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
|
||||
* In case of a write error, send the neg ack anyways. */
|
||||
if (!__test_and_set_bit(__EE_SEND_WRITE_ACK, &peer_req->flags))
|
||||
inc_unacked(device);
|
||||
drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
|
||||
drbd_set_out_of_sync(peer_device, peer_req->i.sector, peer_req->i.size);
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&device->resource->req_lock, flags);
|
||||
@ -276,7 +276,7 @@ void drbd_request_endio(struct bio *bio)
|
||||
|
||||
/* not req_mod(), we need irqsave here! */
|
||||
spin_lock_irqsave(&device->resource->req_lock, flags);
|
||||
__req_mod(req, what, &m);
|
||||
__req_mod(req, what, NULL, &m);
|
||||
spin_unlock_irqrestore(&device->resource->req_lock, flags);
|
||||
put_ldev(device);
|
||||
|
||||
@ -363,7 +363,7 @@ static int w_e_send_csum(struct drbd_work *w, int cancel)
|
||||
* drbd_alloc_pages due to pp_in_use > max_buffers. */
|
||||
drbd_free_peer_req(device, peer_req);
|
||||
peer_req = NULL;
|
||||
inc_rs_pending(device);
|
||||
inc_rs_pending(peer_device);
|
||||
err = drbd_send_drequest_csum(peer_device, sector, size,
|
||||
digest, digest_size,
|
||||
P_CSUM_RS_REQUEST);
|
||||
@ -430,10 +430,10 @@ int w_resync_timer(struct drbd_work *w, int cancel)
|
||||
|
||||
switch (device->state.conn) {
|
||||
case C_VERIFY_S:
|
||||
make_ov_request(device, cancel);
|
||||
make_ov_request(first_peer_device(device), cancel);
|
||||
break;
|
||||
case C_SYNC_TARGET:
|
||||
make_resync_request(device, cancel);
|
||||
make_resync_request(first_peer_device(device), cancel);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -493,8 +493,9 @@ struct fifo_buffer *fifo_alloc(unsigned int fifo_size)
|
||||
return fb;
|
||||
}
|
||||
|
||||
static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
|
||||
static int drbd_rs_controller(struct drbd_peer_device *peer_device, unsigned int sect_in)
|
||||
{
|
||||
struct drbd_device *device = peer_device->device;
|
||||
struct disk_conf *dc;
|
||||
unsigned int want; /* The number of sectors we want in-flight */
|
||||
int req_sect; /* Number of sectors to request in this turn */
|
||||
@ -545,8 +546,9 @@ static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
|
||||
return req_sect;
|
||||
}
|
||||
|
||||
static int drbd_rs_number_requests(struct drbd_device *device)
|
||||
static int drbd_rs_number_requests(struct drbd_peer_device *peer_device)
|
||||
{
|
||||
struct drbd_device *device = peer_device->device;
|
||||
unsigned int sect_in; /* Number of sectors that came in since the last turn */
|
||||
int number, mxb;
|
||||
|
||||
@ -556,7 +558,7 @@ static int drbd_rs_number_requests(struct drbd_device *device)
|
||||
rcu_read_lock();
|
||||
mxb = drbd_get_max_buffers(device) / 2;
|
||||
if (rcu_dereference(device->rs_plan_s)->size) {
|
||||
number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
|
||||
number = drbd_rs_controller(peer_device, sect_in) >> (BM_BLOCK_SHIFT - 9);
|
||||
device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
|
||||
} else {
|
||||
device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
|
||||
@ -580,9 +582,9 @@ static int drbd_rs_number_requests(struct drbd_device *device)
|
||||
return number;
|
||||
}
|
||||
|
||||
static int make_resync_request(struct drbd_device *const device, int cancel)
|
||||
static int make_resync_request(struct drbd_peer_device *const peer_device, int cancel)
|
||||
{
|
||||
struct drbd_peer_device *const peer_device = first_peer_device(device);
|
||||
struct drbd_device *const device = peer_device->device;
|
||||
struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
|
||||
unsigned long bit;
|
||||
sector_t sector;
|
||||
@ -598,7 +600,7 @@ static int make_resync_request(struct drbd_device *const device, int cancel)
|
||||
|
||||
if (device->rs_total == 0) {
|
||||
/* empty resync? */
|
||||
drbd_resync_finished(device);
|
||||
drbd_resync_finished(peer_device);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -618,7 +620,7 @@ static int make_resync_request(struct drbd_device *const device, int cancel)
|
||||
}
|
||||
|
||||
max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
|
||||
number = drbd_rs_number_requests(device);
|
||||
number = drbd_rs_number_requests(peer_device);
|
||||
if (number <= 0)
|
||||
goto requeue;
|
||||
|
||||
@ -653,7 +655,7 @@ next_sector:
|
||||
|
||||
sector = BM_BIT_TO_SECT(bit);
|
||||
|
||||
if (drbd_try_rs_begin_io(device, sector)) {
|
||||
if (drbd_try_rs_begin_io(peer_device, sector)) {
|
||||
device->bm_resync_fo = bit;
|
||||
goto requeue;
|
||||
}
|
||||
@ -729,13 +731,13 @@ next_sector:
|
||||
} else {
|
||||
int err;
|
||||
|
||||
inc_rs_pending(device);
|
||||
inc_rs_pending(peer_device);
|
||||
err = drbd_send_drequest(peer_device,
|
||||
size == discard_granularity ? P_RS_THIN_REQ : P_RS_DATA_REQUEST,
|
||||
sector, size, ID_SYNCER);
|
||||
if (err) {
|
||||
drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
|
||||
dec_rs_pending(device);
|
||||
dec_rs_pending(peer_device);
|
||||
put_ldev(device);
|
||||
return err;
|
||||
}
|
||||
@ -760,8 +762,9 @@ next_sector:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int make_ov_request(struct drbd_device *device, int cancel)
|
||||
static int make_ov_request(struct drbd_peer_device *peer_device, int cancel)
|
||||
{
|
||||
struct drbd_device *device = peer_device->device;
|
||||
int number, i, size;
|
||||
sector_t sector;
|
||||
const sector_t capacity = get_capacity(device->vdisk);
|
||||
@ -770,7 +773,7 @@ static int make_ov_request(struct drbd_device *device, int cancel)
|
||||
if (unlikely(cancel))
|
||||
return 1;
|
||||
|
||||
number = drbd_rs_number_requests(device);
|
||||
number = drbd_rs_number_requests(peer_device);
|
||||
|
||||
sector = device->ov_position;
|
||||
for (i = 0; i < number; i++) {
|
||||
@ -788,7 +791,7 @@ static int make_ov_request(struct drbd_device *device, int cancel)
|
||||
|
||||
size = BM_BLOCK_SIZE;
|
||||
|
||||
if (drbd_try_rs_begin_io(device, sector)) {
|
||||
if (drbd_try_rs_begin_io(peer_device, sector)) {
|
||||
device->ov_position = sector;
|
||||
goto requeue;
|
||||
}
|
||||
@ -796,9 +799,9 @@ static int make_ov_request(struct drbd_device *device, int cancel)
|
||||
if (sector + (size>>9) > capacity)
|
||||
size = (capacity-sector)<<9;
|
||||
|
||||
inc_rs_pending(device);
|
||||
inc_rs_pending(peer_device);
|
||||
if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
|
||||
dec_rs_pending(device);
|
||||
dec_rs_pending(peer_device);
|
||||
return 0;
|
||||
}
|
||||
sector += BM_SECT_PER_BIT;
|
||||
@ -818,8 +821,8 @@ int w_ov_finished(struct drbd_work *w, int cancel)
|
||||
container_of(w, struct drbd_device_work, w);
|
||||
struct drbd_device *device = dw->device;
|
||||
kfree(dw);
|
||||
ov_out_of_sync_print(device);
|
||||
drbd_resync_finished(device);
|
||||
ov_out_of_sync_print(first_peer_device(device));
|
||||
drbd_resync_finished(first_peer_device(device));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -831,7 +834,7 @@ static int w_resync_finished(struct drbd_work *w, int cancel)
|
||||
struct drbd_device *device = dw->device;
|
||||
kfree(dw);
|
||||
|
||||
drbd_resync_finished(device);
|
||||
drbd_resync_finished(first_peer_device(device));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -846,9 +849,10 @@ static void ping_peer(struct drbd_device *device)
|
||||
test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
|
||||
}
|
||||
|
||||
int drbd_resync_finished(struct drbd_device *device)
|
||||
int drbd_resync_finished(struct drbd_peer_device *peer_device)
|
||||
{
|
||||
struct drbd_connection *connection = first_peer_device(device)->connection;
|
||||
struct drbd_device *device = peer_device->device;
|
||||
struct drbd_connection *connection = peer_device->connection;
|
||||
unsigned long db, dt, dbdt;
|
||||
unsigned long n_oos;
|
||||
union drbd_state os, ns;
|
||||
@ -1129,7 +1133,7 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
|
||||
err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req);
|
||||
} else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
|
||||
if (likely(device->state.pdsk >= D_INCONSISTENT)) {
|
||||
inc_rs_pending(device);
|
||||
inc_rs_pending(peer_device);
|
||||
if (peer_req->flags & EE_RS_THIN_REQ && all_zero(peer_req))
|
||||
err = drbd_send_rs_deallocated(peer_device, peer_req);
|
||||
else
|
||||
@ -1148,7 +1152,7 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
|
||||
err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
|
||||
|
||||
/* update resync data with failure */
|
||||
drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
|
||||
drbd_rs_failed_io(peer_device, peer_req->i.sector, peer_req->i.size);
|
||||
}
|
||||
|
||||
dec_unacked(device);
|
||||
@ -1199,12 +1203,12 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
|
||||
}
|
||||
|
||||
if (eq) {
|
||||
drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
|
||||
drbd_set_in_sync(peer_device, peer_req->i.sector, peer_req->i.size);
|
||||
/* rs_same_csums unit is BM_BLOCK_SIZE */
|
||||
device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
|
||||
err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req);
|
||||
} else {
|
||||
inc_rs_pending(device);
|
||||
inc_rs_pending(peer_device);
|
||||
peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
|
||||
peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
|
||||
kfree(di);
|
||||
@ -1257,10 +1261,10 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel)
|
||||
* drbd_alloc_pages due to pp_in_use > max_buffers. */
|
||||
drbd_free_peer_req(device, peer_req);
|
||||
peer_req = NULL;
|
||||
inc_rs_pending(device);
|
||||
inc_rs_pending(peer_device);
|
||||
err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY);
|
||||
if (err)
|
||||
dec_rs_pending(device);
|
||||
dec_rs_pending(peer_device);
|
||||
kfree(digest);
|
||||
|
||||
out:
|
||||
@ -1270,15 +1274,16 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
|
||||
void drbd_ov_out_of_sync_found(struct drbd_peer_device *peer_device, sector_t sector, int size)
|
||||
{
|
||||
struct drbd_device *device = peer_device->device;
|
||||
if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
|
||||
device->ov_last_oos_size += size>>9;
|
||||
} else {
|
||||
device->ov_last_oos_start = sector;
|
||||
device->ov_last_oos_size = size>>9;
|
||||
}
|
||||
drbd_set_out_of_sync(device, sector, size);
|
||||
drbd_set_out_of_sync(peer_device, sector, size);
|
||||
}
|
||||
|
||||
int w_e_end_ov_reply(struct drbd_work *w, int cancel)
|
||||
@ -1328,9 +1333,9 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel)
|
||||
* drbd_alloc_pages due to pp_in_use > max_buffers. */
|
||||
drbd_free_peer_req(device, peer_req);
|
||||
if (!eq)
|
||||
drbd_ov_out_of_sync_found(device, sector, size);
|
||||
drbd_ov_out_of_sync_found(peer_device, sector, size);
|
||||
else
|
||||
ov_out_of_sync_print(device);
|
||||
ov_out_of_sync_print(peer_device);
|
||||
|
||||
err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size,
|
||||
eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
|
||||
@ -1341,14 +1346,14 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel)
|
||||
|
||||
/* let's advance progress step marks only for every other megabyte */
|
||||
if ((device->ov_left & 0x200) == 0x200)
|
||||
drbd_advance_rs_marks(device, device->ov_left);
|
||||
drbd_advance_rs_marks(peer_device, device->ov_left);
|
||||
|
||||
stop_sector_reached = verify_can_do_stop_sector(device) &&
|
||||
(sector + (size>>9)) >= device->ov_stop_sector;
|
||||
|
||||
if (device->ov_left == 0 || stop_sector_reached) {
|
||||
ov_out_of_sync_print(device);
|
||||
drbd_resync_finished(device);
|
||||
ov_out_of_sync_print(peer_device);
|
||||
drbd_resync_finished(peer_device);
|
||||
}
|
||||
|
||||
return err;
|
||||
@ -1425,7 +1430,7 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel)
|
||||
int err;
|
||||
|
||||
if (unlikely(cancel)) {
|
||||
req_mod(req, SEND_CANCELED);
|
||||
req_mod(req, SEND_CANCELED, peer_device);
|
||||
return 0;
|
||||
}
|
||||
req->pre_send_jif = jiffies;
|
||||
@ -1437,7 +1442,7 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel)
|
||||
maybe_send_barrier(connection, req->epoch);
|
||||
|
||||
err = drbd_send_out_of_sync(peer_device, req);
|
||||
req_mod(req, OOS_HANDED_TO_NETWORK);
|
||||
req_mod(req, OOS_HANDED_TO_NETWORK, peer_device);
|
||||
|
||||
return err;
|
||||
}
|
||||
@ -1457,7 +1462,7 @@ int w_send_dblock(struct drbd_work *w, int cancel)
|
||||
int err;
|
||||
|
||||
if (unlikely(cancel)) {
|
||||
req_mod(req, SEND_CANCELED);
|
||||
req_mod(req, SEND_CANCELED, peer_device);
|
||||
return 0;
|
||||
}
|
||||
req->pre_send_jif = jiffies;
|
||||
@ -1467,7 +1472,7 @@ int w_send_dblock(struct drbd_work *w, int cancel)
|
||||
connection->send.current_epoch_writes++;
|
||||
|
||||
err = drbd_send_dblock(peer_device, req);
|
||||
req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
|
||||
req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK, peer_device);
|
||||
|
||||
if (do_send_unplug && !err)
|
||||
pd_send_unplug_remote(peer_device);
|
||||
@ -1490,7 +1495,7 @@ int w_send_read_req(struct drbd_work *w, int cancel)
|
||||
int err;
|
||||
|
||||
if (unlikely(cancel)) {
|
||||
req_mod(req, SEND_CANCELED);
|
||||
req_mod(req, SEND_CANCELED, peer_device);
|
||||
return 0;
|
||||
}
|
||||
req->pre_send_jif = jiffies;
|
||||
@ -1502,7 +1507,7 @@ int w_send_read_req(struct drbd_work *w, int cancel)
|
||||
err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size,
|
||||
(unsigned long)req);
|
||||
|
||||
req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
|
||||
req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK, peer_device);
|
||||
|
||||
if (do_send_unplug && !err)
|
||||
pd_send_unplug_remote(peer_device);
|
||||
@ -1668,8 +1673,9 @@ void drbd_resync_after_changed(struct drbd_device *device)
|
||||
} while (changed);
|
||||
}
|
||||
|
||||
void drbd_rs_controller_reset(struct drbd_device *device)
|
||||
void drbd_rs_controller_reset(struct drbd_peer_device *peer_device)
|
||||
{
|
||||
struct drbd_device *device = peer_device->device;
|
||||
struct gendisk *disk = device->ldev->backing_bdev->bd_disk;
|
||||
struct fifo_buffer *plan;
|
||||
|
||||
@ -1891,10 +1897,10 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
|
||||
rcu_read_unlock();
|
||||
schedule_timeout_interruptible(timeo);
|
||||
}
|
||||
drbd_resync_finished(device);
|
||||
drbd_resync_finished(peer_device);
|
||||
}
|
||||
|
||||
drbd_rs_controller_reset(device);
|
||||
drbd_rs_controller_reset(peer_device);
|
||||
/* ns.conn may already be != device->state.conn,
|
||||
* we may have been paused in between, or become paused until
|
||||
* the timer triggers.
|
||||
@ -1909,8 +1915,9 @@ out:
|
||||
mutex_unlock(device->state_mutex);
|
||||
}
|
||||
|
||||
static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
|
||||
static void update_on_disk_bitmap(struct drbd_peer_device *peer_device, bool resync_done)
|
||||
{
|
||||
struct drbd_device *device = peer_device->device;
|
||||
struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
|
||||
device->rs_last_bcast = jiffies;
|
||||
|
||||
@ -1919,7 +1926,7 @@ static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
|
||||
|
||||
drbd_bm_write_lazy(device, 0);
|
||||
if (resync_done && is_sync_state(device->state.conn))
|
||||
drbd_resync_finished(device);
|
||||
drbd_resync_finished(peer_device);
|
||||
|
||||
drbd_bcast_event(device, &sib);
|
||||
/* update timestamp, in case it took a while to write out stuff */
|
||||
@ -1945,6 +1952,7 @@ static void drbd_ldev_destroy(struct drbd_device *device)
|
||||
|
||||
static void go_diskless(struct drbd_device *device)
|
||||
{
|
||||
struct drbd_peer_device *peer_device = first_peer_device(device);
|
||||
D_ASSERT(device, device->state.disk == D_FAILED);
|
||||
/* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
|
||||
* inc/dec it frequently. Once we are D_DISKLESS, no one will touch
|
||||
@ -1970,7 +1978,7 @@ static void go_diskless(struct drbd_device *device)
|
||||
* Any modifications would not be expected anymore, though.
|
||||
*/
|
||||
if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
|
||||
"detach", BM_LOCKED_TEST_ALLOWED)) {
|
||||
"detach", BM_LOCKED_TEST_ALLOWED, peer_device)) {
|
||||
if (test_bit(WAS_READ_ERROR, &device->flags)) {
|
||||
drbd_md_set_flag(device, MDF_FULL_SYNC);
|
||||
drbd_md_sync(device);
|
||||
@ -2017,7 +2025,7 @@ static void do_device_work(struct drbd_device *device, const unsigned long todo)
|
||||
do_md_sync(device);
|
||||
if (test_bit(RS_DONE, &todo) ||
|
||||
test_bit(RS_PROGRESS, &todo))
|
||||
update_on_disk_bitmap(device, test_bit(RS_DONE, &todo));
|
||||
update_on_disk_bitmap(first_peer_device(device), test_bit(RS_DONE, &todo));
|
||||
if (test_bit(GO_DISKLESS, &todo))
|
||||
go_diskless(device);
|
||||
if (test_bit(DESTROY_DISK, &todo))
|
||||
|
@ -325,6 +325,9 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
|
||||
if (blk_validate_block_size(blksize))
|
||||
return -EINVAL;
|
||||
|
||||
if (bytesize < 0)
|
||||
return -EINVAL;
|
||||
|
||||
nbd->config->bytesize = bytesize;
|
||||
nbd->config->blksize_bits = __ffs(blksize);
|
||||
|
||||
@ -1111,6 +1114,9 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
|
||||
struct nbd_sock *nsock;
|
||||
int err;
|
||||
|
||||
/* Arg will be cast to int, check it to avoid overflow */
|
||||
if (arg > INT_MAX)
|
||||
return -EINVAL;
|
||||
sock = nbd_get_socket(nbd, arg, &err);
|
||||
if (!sock)
|
||||
return err;
|
||||
@ -1934,11 +1940,11 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
if (!info->attrs[NBD_ATTR_SOCKETS]) {
|
||||
if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_SOCKETS)) {
|
||||
pr_err("must specify at least one socket\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (!info->attrs[NBD_ATTR_SIZE_BYTES]) {
|
||||
if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_SIZE_BYTES)) {
|
||||
pr_err("must specify a size in bytes for the device\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -2123,7 +2129,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
|
||||
if (!netlink_capable(skb, CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (!info->attrs[NBD_ATTR_INDEX]) {
|
||||
if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_INDEX)) {
|
||||
pr_err("must specify an index to disconnect\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -2161,7 +2167,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
|
||||
if (!netlink_capable(skb, CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (!info->attrs[NBD_ATTR_INDEX]) {
|
||||
if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_INDEX)) {
|
||||
pr_err("must specify a device to reconfigure\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -2325,6 +2331,7 @@ static struct genl_family nbd_genl_family __ro_after_init = {
|
||||
.n_small_ops = ARRAY_SIZE(nbd_connect_genl_ops),
|
||||
.resv_start_op = NBD_CMD_STATUS + 1,
|
||||
.maxattr = NBD_ATTR_MAX,
|
||||
.netnsok = 1,
|
||||
.policy = nbd_attr_policy,
|
||||
.mcgrps = nbd_mcast_grps,
|
||||
.n_mcgrps = ARRAY_SIZE(nbd_mcast_grps),
|
||||
|
@ -9,4 +9,4 @@ config BLK_DEV_NULL_BLK
|
||||
|
||||
config BLK_DEV_NULL_BLK_FAULT_INJECTION
|
||||
bool "Support fault injection for Null test block driver"
|
||||
depends on BLK_DEV_NULL_BLK && FAULT_INJECTION
|
||||
depends on BLK_DEV_NULL_BLK && FAULT_INJECTION_CONFIGFS
|
||||
|
@ -250,7 +250,7 @@ static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
|
||||
|
||||
static inline struct nullb_device *to_nullb_device(struct config_item *item)
|
||||
{
|
||||
return item ? container_of(item, struct nullb_device, item) : NULL;
|
||||
return item ? container_of(to_config_group(item), struct nullb_device, group) : NULL;
|
||||
}
|
||||
|
||||
static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page)
|
||||
@ -593,8 +593,29 @@ static const struct config_item_type nullb_device_type = {
|
||||
.ct_owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
|
||||
|
||||
static void nullb_add_fault_config(struct nullb_device *dev)
|
||||
{
|
||||
fault_config_init(&dev->timeout_config, "timeout_inject");
|
||||
fault_config_init(&dev->requeue_config, "requeue_inject");
|
||||
fault_config_init(&dev->init_hctx_fault_config, "init_hctx_fault_inject");
|
||||
|
||||
configfs_add_default_group(&dev->timeout_config.group, &dev->group);
|
||||
configfs_add_default_group(&dev->requeue_config.group, &dev->group);
|
||||
configfs_add_default_group(&dev->init_hctx_fault_config.group, &dev->group);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void nullb_add_fault_config(struct nullb_device *dev)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static struct
|
||||
config_item *nullb_group_make_item(struct config_group *group, const char *name)
|
||||
config_group *nullb_group_make_group(struct config_group *group, const char *name)
|
||||
{
|
||||
struct nullb_device *dev;
|
||||
|
||||
@ -605,9 +626,10 @@ config_item *nullb_group_make_item(struct config_group *group, const char *name)
|
||||
if (!dev)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
config_item_init_type_name(&dev->item, name, &nullb_device_type);
|
||||
config_group_init_type_name(&dev->group, name, &nullb_device_type);
|
||||
nullb_add_fault_config(dev);
|
||||
|
||||
return &dev->item;
|
||||
return &dev->group;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -645,7 +667,7 @@ static struct configfs_attribute *nullb_group_attrs[] = {
|
||||
};
|
||||
|
||||
static struct configfs_group_operations nullb_group_ops = {
|
||||
.make_item = nullb_group_make_item,
|
||||
.make_group = nullb_group_make_group,
|
||||
.drop_item = nullb_group_drop_item,
|
||||
};
|
||||
|
||||
@ -676,6 +698,13 @@ static struct nullb_device *null_alloc_dev(void)
|
||||
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
|
||||
if (!dev)
|
||||
return NULL;
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
|
||||
dev->timeout_config.attr = null_timeout_attr;
|
||||
dev->requeue_config.attr = null_requeue_attr;
|
||||
dev->init_hctx_fault_config.attr = null_init_hctx_attr;
|
||||
#endif
|
||||
|
||||
INIT_RADIX_TREE(&dev->data, GFP_ATOMIC);
|
||||
INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC);
|
||||
if (badblocks_init(&dev->badblocks, 0)) {
|
||||
@ -1030,8 +1059,8 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
|
||||
if (!t_page)
|
||||
return -ENOMEM;
|
||||
|
||||
src = kmap_atomic(c_page->page);
|
||||
dst = kmap_atomic(t_page->page);
|
||||
src = kmap_local_page(c_page->page);
|
||||
dst = kmap_local_page(t_page->page);
|
||||
|
||||
for (i = 0; i < PAGE_SECTORS;
|
||||
i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
|
||||
@ -1043,8 +1072,8 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
|
||||
}
|
||||
}
|
||||
|
||||
kunmap_atomic(dst);
|
||||
kunmap_atomic(src);
|
||||
kunmap_local(dst);
|
||||
kunmap_local(src);
|
||||
|
||||
ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page);
|
||||
null_free_page(ret);
|
||||
@ -1112,7 +1141,6 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source,
|
||||
size_t temp, count = 0;
|
||||
unsigned int offset;
|
||||
struct nullb_page *t_page;
|
||||
void *dst, *src;
|
||||
|
||||
while (count < n) {
|
||||
temp = min_t(size_t, nullb->dev->blocksize, n - count);
|
||||
@ -1126,11 +1154,7 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source,
|
||||
if (!t_page)
|
||||
return -ENOSPC;
|
||||
|
||||
src = kmap_atomic(source);
|
||||
dst = kmap_atomic(t_page->page);
|
||||
memcpy(dst + offset, src + off + count, temp);
|
||||
kunmap_atomic(dst);
|
||||
kunmap_atomic(src);
|
||||
memcpy_page(t_page->page, offset, source, off + count, temp);
|
||||
|
||||
__set_bit(sector & SECTOR_MASK, t_page->bitmap);
|
||||
|
||||
@ -1149,7 +1173,6 @@ static int copy_from_nullb(struct nullb *nullb, struct page *dest,
|
||||
size_t temp, count = 0;
|
||||
unsigned int offset;
|
||||
struct nullb_page *t_page;
|
||||
void *dst, *src;
|
||||
|
||||
while (count < n) {
|
||||
temp = min_t(size_t, nullb->dev->blocksize, n - count);
|
||||
@ -1158,16 +1181,11 @@ static int copy_from_nullb(struct nullb *nullb, struct page *dest,
|
||||
t_page = null_lookup_page(nullb, sector, false,
|
||||
!null_cache_active(nullb));
|
||||
|
||||
dst = kmap_atomic(dest);
|
||||
if (!t_page) {
|
||||
memset(dst + off + count, 0, temp);
|
||||
goto next;
|
||||
}
|
||||
src = kmap_atomic(t_page->page);
|
||||
memcpy(dst + off + count, src + offset, temp);
|
||||
kunmap_atomic(src);
|
||||
next:
|
||||
kunmap_atomic(dst);
|
||||
if (t_page)
|
||||
memcpy_page(dest, off + count, t_page->page, offset,
|
||||
temp);
|
||||
else
|
||||
zero_user(dest, off + count, temp);
|
||||
|
||||
count += temp;
|
||||
sector += temp >> SECTOR_SHIFT;
|
||||
@ -1178,11 +1196,7 @@ next:
|
||||
static void nullb_fill_pattern(struct nullb *nullb, struct page *page,
|
||||
unsigned int len, unsigned int off)
|
||||
{
|
||||
void *dst;
|
||||
|
||||
dst = kmap_atomic(page);
|
||||
memset(dst + off, 0xFF, len);
|
||||
kunmap_atomic(dst);
|
||||
memset_page(page, off, 0xff, len);
|
||||
}
|
||||
|
||||
blk_status_t null_handle_discard(struct nullb_device *dev,
|
||||
@ -1529,24 +1543,48 @@ static void null_submit_bio(struct bio *bio)
|
||||
null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
|
||||
|
||||
static bool should_timeout_request(struct request *rq)
|
||||
{
|
||||
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
|
||||
struct nullb_device *dev = cmd->nq->dev;
|
||||
|
||||
return should_fail(&dev->timeout_config.attr, 1);
|
||||
}
|
||||
|
||||
static bool should_requeue_request(struct request *rq)
|
||||
{
|
||||
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
|
||||
struct nullb_device *dev = cmd->nq->dev;
|
||||
|
||||
return should_fail(&dev->requeue_config.attr, 1);
|
||||
}
|
||||
|
||||
static bool should_init_hctx_fail(struct nullb_device *dev)
|
||||
{
|
||||
return should_fail(&dev->init_hctx_fault_config.attr, 1);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static bool should_timeout_request(struct request *rq)
|
||||
{
|
||||
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
|
||||
if (g_timeout_str[0])
|
||||
return should_fail(&null_timeout_attr, 1);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool should_requeue_request(struct request *rq)
|
||||
{
|
||||
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
|
||||
if (g_requeue_str[0])
|
||||
return should_fail(&null_requeue_attr, 1);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool should_init_hctx_fail(struct nullb_device *dev)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void null_map_queues(struct blk_mq_tag_set *set)
|
||||
{
|
||||
struct nullb *nullb = set->driver_data;
|
||||
@ -1743,10 +1781,8 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
|
||||
struct nullb *nullb = hctx->queue->queuedata;
|
||||
struct nullb_queue *nq;
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
|
||||
if (g_init_hctx_str[0] && should_fail(&null_init_hctx_attr, 1))
|
||||
if (should_init_hctx_fail(nullb->dev))
|
||||
return -EFAULT;
|
||||
#endif
|
||||
|
||||
nq = &nullb->queues[hctx_idx];
|
||||
hctx->driver_data = nq;
|
||||
@ -1964,6 +2000,11 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
|
||||
|
||||
static int null_validate_conf(struct nullb_device *dev)
|
||||
{
|
||||
if (dev->queue_mode == NULL_Q_RQ) {
|
||||
pr_err("legacy IO path is no longer available\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
dev->blocksize = round_down(dev->blocksize, 512);
|
||||
dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
|
||||
|
||||
@ -2066,9 +2107,6 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
if (rv)
|
||||
goto out_cleanup_queues;
|
||||
|
||||
if (!null_setup_fault())
|
||||
goto out_cleanup_tags;
|
||||
|
||||
nullb->tag_set->timeout = 5 * HZ;
|
||||
nullb->disk = blk_mq_alloc_disk(nullb->tag_set, nullb);
|
||||
if (IS_ERR(nullb->disk)) {
|
||||
@ -2130,10 +2168,10 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
|
||||
null_config_discard(nullb);
|
||||
|
||||
if (config_item_name(&dev->item)) {
|
||||
if (config_item_name(&dev->group.cg_item)) {
|
||||
/* Use configfs dir name as the device name */
|
||||
snprintf(nullb->disk_name, sizeof(nullb->disk_name),
|
||||
"%s", config_item_name(&dev->item));
|
||||
"%s", config_item_name(&dev->group.cg_item));
|
||||
} else {
|
||||
sprintf(nullb->disk_name, "nullb%d", nullb->index);
|
||||
}
|
||||
@ -2233,6 +2271,9 @@ static int __init null_init(void)
|
||||
g_home_node = NUMA_NO_NODE;
|
||||
}
|
||||
|
||||
if (!null_setup_fault())
|
||||
return -EINVAL;
|
||||
|
||||
if (g_queue_mode == NULL_Q_RQ) {
|
||||
pr_err("legacy IO path is no longer available\n");
|
||||
return -EINVAL;
|
||||
|
@ -69,7 +69,12 @@ enum {
|
||||
|
||||
struct nullb_device {
|
||||
struct nullb *nullb;
|
||||
struct config_item item;
|
||||
struct config_group group;
|
||||
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
|
||||
struct fault_config timeout_config;
|
||||
struct fault_config requeue_config;
|
||||
struct fault_config init_hctx_fault_config;
|
||||
#endif
|
||||
struct radix_tree_root data; /* data stored in the disk */
|
||||
struct radix_tree_root cache; /* disk cache data */
|
||||
unsigned long flags; /* device flags */
|
||||
|
@ -53,7 +53,8 @@
|
||||
| UBLK_F_NEED_GET_DATA \
|
||||
| UBLK_F_USER_RECOVERY \
|
||||
| UBLK_F_USER_RECOVERY_REISSUE \
|
||||
| UBLK_F_UNPRIVILEGED_DEV)
|
||||
| UBLK_F_UNPRIVILEGED_DEV \
|
||||
| UBLK_F_CMD_IOCTL_ENCODE)
|
||||
|
||||
/* All UBLK_PARAM_TYPE_* should be included here */
|
||||
#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | \
|
||||
@ -298,9 +299,7 @@ static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
|
||||
|
||||
static inline bool ublk_need_get_data(const struct ublk_queue *ubq)
|
||||
{
|
||||
if (ubq->flags & UBLK_F_NEED_GET_DATA)
|
||||
return true;
|
||||
return false;
|
||||
return ubq->flags & UBLK_F_NEED_GET_DATA;
|
||||
}
|
||||
|
||||
static struct ublk_device *ublk_get_device(struct ublk_device *ub)
|
||||
@ -349,25 +348,19 @@ static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id)
|
||||
static inline bool ublk_queue_can_use_recovery_reissue(
|
||||
struct ublk_queue *ubq)
|
||||
{
|
||||
if ((ubq->flags & UBLK_F_USER_RECOVERY) &&
|
||||
(ubq->flags & UBLK_F_USER_RECOVERY_REISSUE))
|
||||
return true;
|
||||
return false;
|
||||
return (ubq->flags & UBLK_F_USER_RECOVERY) &&
|
||||
(ubq->flags & UBLK_F_USER_RECOVERY_REISSUE);
|
||||
}
|
||||
|
||||
static inline bool ublk_queue_can_use_recovery(
|
||||
struct ublk_queue *ubq)
|
||||
{
|
||||
if (ubq->flags & UBLK_F_USER_RECOVERY)
|
||||
return true;
|
||||
return false;
|
||||
return ubq->flags & UBLK_F_USER_RECOVERY;
|
||||
}
|
||||
|
||||
static inline bool ublk_can_use_recovery(struct ublk_device *ub)
|
||||
{
|
||||
if (ub->dev_info.flags & UBLK_F_USER_RECOVERY)
|
||||
return true;
|
||||
return false;
|
||||
return ub->dev_info.flags & UBLK_F_USER_RECOVERY;
|
||||
}
|
||||
|
||||
static void ublk_free_disk(struct gendisk *disk)
|
||||
@ -428,10 +421,9 @@ static const struct block_device_operations ub_fops = {
|
||||
#define UBLK_MAX_PIN_PAGES 32
|
||||
|
||||
struct ublk_map_data {
|
||||
const struct ublk_queue *ubq;
|
||||
const struct request *rq;
|
||||
const struct ublk_io *io;
|
||||
unsigned max_bytes;
|
||||
unsigned long ubuf;
|
||||
unsigned int len;
|
||||
};
|
||||
|
||||
struct ublk_io_iter {
|
||||
@ -488,18 +480,17 @@ static inline unsigned ublk_copy_io_pages(struct ublk_io_iter *data,
|
||||
return done;
|
||||
}
|
||||
|
||||
static inline int ublk_copy_user_pages(struct ublk_map_data *data,
|
||||
bool to_vm)
|
||||
static int ublk_copy_user_pages(struct ublk_map_data *data, bool to_vm)
|
||||
{
|
||||
const unsigned int gup_flags = to_vm ? FOLL_WRITE : 0;
|
||||
const unsigned long start_vm = data->io->addr;
|
||||
const unsigned long start_vm = data->ubuf;
|
||||
unsigned int done = 0;
|
||||
struct ublk_io_iter iter = {
|
||||
.pg_off = start_vm & (PAGE_SIZE - 1),
|
||||
.bio = data->rq->bio,
|
||||
.iter = data->rq->bio->bi_iter,
|
||||
};
|
||||
const unsigned int nr_pages = round_up(data->max_bytes +
|
||||
const unsigned int nr_pages = round_up(data->len +
|
||||
(start_vm & (PAGE_SIZE - 1)), PAGE_SIZE) >> PAGE_SHIFT;
|
||||
|
||||
while (done < nr_pages) {
|
||||
@ -512,42 +503,49 @@ static inline int ublk_copy_user_pages(struct ublk_map_data *data,
|
||||
iter.pages);
|
||||
if (iter.nr_pages <= 0)
|
||||
return done == 0 ? iter.nr_pages : done;
|
||||
len = ublk_copy_io_pages(&iter, data->max_bytes, to_vm);
|
||||
len = ublk_copy_io_pages(&iter, data->len, to_vm);
|
||||
for (i = 0; i < iter.nr_pages; i++) {
|
||||
if (to_vm)
|
||||
set_page_dirty(iter.pages[i]);
|
||||
put_page(iter.pages[i]);
|
||||
}
|
||||
data->max_bytes -= len;
|
||||
data->len -= len;
|
||||
done += iter.nr_pages;
|
||||
}
|
||||
|
||||
return done;
|
||||
}
|
||||
|
||||
static inline bool ublk_need_map_req(const struct request *req)
|
||||
{
|
||||
return ublk_rq_has_data(req) && req_op(req) == REQ_OP_WRITE;
|
||||
}
|
||||
|
||||
static inline bool ublk_need_unmap_req(const struct request *req)
|
||||
{
|
||||
return ublk_rq_has_data(req) && req_op(req) == REQ_OP_READ;
|
||||
}
|
||||
|
||||
static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req,
|
||||
struct ublk_io *io)
|
||||
{
|
||||
const unsigned int rq_bytes = blk_rq_bytes(req);
|
||||
|
||||
/*
|
||||
* no zero copy, we delay copy WRITE request data into ublksrv
|
||||
* context and the big benefit is that pinning pages in current
|
||||
* context is pretty fast, see ublk_pin_user_pages
|
||||
*/
|
||||
if (req_op(req) != REQ_OP_WRITE && req_op(req) != REQ_OP_FLUSH)
|
||||
return rq_bytes;
|
||||
|
||||
if (ublk_rq_has_data(req)) {
|
||||
if (ublk_need_map_req(req)) {
|
||||
struct ublk_map_data data = {
|
||||
.ubq = ubq,
|
||||
.rq = req,
|
||||
.io = io,
|
||||
.max_bytes = rq_bytes,
|
||||
.ubuf = io->addr,
|
||||
.len = rq_bytes,
|
||||
};
|
||||
|
||||
ublk_copy_user_pages(&data, true);
|
||||
|
||||
return rq_bytes - data.max_bytes;
|
||||
return rq_bytes - data.len;
|
||||
}
|
||||
return rq_bytes;
|
||||
}
|
||||
@ -558,19 +556,18 @@ static int ublk_unmap_io(const struct ublk_queue *ubq,
|
||||
{
|
||||
const unsigned int rq_bytes = blk_rq_bytes(req);
|
||||
|
||||
if (req_op(req) == REQ_OP_READ && ublk_rq_has_data(req)) {
|
||||
if (ublk_need_unmap_req(req)) {
|
||||
struct ublk_map_data data = {
|
||||
.ubq = ubq,
|
||||
.rq = req,
|
||||
.io = io,
|
||||
.max_bytes = io->res,
|
||||
.ubuf = io->addr,
|
||||
.len = io->res,
|
||||
};
|
||||
|
||||
WARN_ON_ONCE(io->res > rq_bytes);
|
||||
|
||||
ublk_copy_user_pages(&data, false);
|
||||
|
||||
return io->res - data.max_bytes;
|
||||
return io->res - data.len;
|
||||
}
|
||||
return rq_bytes;
|
||||
}
|
||||
@ -655,14 +652,15 @@ static void ublk_complete_rq(struct request *req)
|
||||
struct ublk_queue *ubq = req->mq_hctx->driver_data;
|
||||
struct ublk_io *io = &ubq->ios[req->tag];
|
||||
unsigned int unmapped_bytes;
|
||||
blk_status_t res = BLK_STS_OK;
|
||||
|
||||
/* failed read IO if nothing is read */
|
||||
if (!io->res && req_op(req) == REQ_OP_READ)
|
||||
io->res = -EIO;
|
||||
|
||||
if (io->res < 0) {
|
||||
blk_mq_end_request(req, errno_to_blk_status(io->res));
|
||||
return;
|
||||
res = errno_to_blk_status(io->res);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -671,10 +669,8 @@ static void ublk_complete_rq(struct request *req)
|
||||
*
|
||||
* Both the two needn't unmap.
|
||||
*/
|
||||
if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE) {
|
||||
blk_mq_end_request(req, BLK_STS_OK);
|
||||
return;
|
||||
}
|
||||
if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE)
|
||||
goto exit;
|
||||
|
||||
/* for READ request, writing data in iod->addr to rq buffers */
|
||||
unmapped_bytes = ublk_unmap_io(ubq, req, io);
|
||||
@ -691,6 +687,10 @@ static void ublk_complete_rq(struct request *req)
|
||||
blk_mq_requeue_request(req, true);
|
||||
else
|
||||
__blk_mq_end_request(req, BLK_STS_OK);
|
||||
|
||||
return;
|
||||
exit:
|
||||
blk_mq_end_request(req, res);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -771,9 +771,7 @@ static inline void __ublk_rq_task_work(struct request *req,
|
||||
return;
|
||||
}
|
||||
|
||||
if (ublk_need_get_data(ubq) &&
|
||||
(req_op(req) == REQ_OP_WRITE ||
|
||||
req_op(req) == REQ_OP_FLUSH)) {
|
||||
if (ublk_need_get_data(ubq) && ublk_need_map_req(req)) {
|
||||
/*
|
||||
* We have not handled UBLK_IO_NEED_GET_DATA command yet,
|
||||
* so immepdately pass UBLK_IO_RES_NEED_GET_DATA to ublksrv
|
||||
@ -1261,6 +1259,19 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
|
||||
ublk_queue_cmd(ubq, req);
|
||||
}
|
||||
|
||||
static inline int ublk_check_cmd_op(u32 cmd_op)
|
||||
{
|
||||
u32 ioc_type = _IOC_TYPE(cmd_op);
|
||||
|
||||
if (IS_ENABLED(CONFIG_BLKDEV_UBLK_LEGACY_OPCODES) && ioc_type != 'u')
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (ioc_type != 'u' && ioc_type != 0)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags,
|
||||
struct ublksrv_io_cmd *ub_cmd)
|
||||
@ -1303,10 +1314,15 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
|
||||
* iff the driver have set the UBLK_IO_FLAG_NEED_GET_DATA.
|
||||
*/
|
||||
if ((!!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA))
|
||||
^ (cmd_op == UBLK_IO_NEED_GET_DATA))
|
||||
^ (_IOC_NR(cmd_op) == UBLK_IO_NEED_GET_DATA))
|
||||
goto out;
|
||||
|
||||
switch (cmd_op) {
|
||||
ret = ublk_check_cmd_op(cmd_op);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = -EINVAL;
|
||||
switch (_IOC_NR(cmd_op)) {
|
||||
case UBLK_IO_FETCH_REQ:
|
||||
/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
|
||||
if (ublk_queue_ready(ubq)) {
|
||||
@ -1770,6 +1786,8 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
|
||||
if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK))
|
||||
ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK;
|
||||
|
||||
ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE;
|
||||
|
||||
/* We are not ready to support zero copy */
|
||||
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
|
||||
|
||||
@ -2128,7 +2146,7 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
|
||||
* know if the specified device is created as unprivileged
|
||||
* mode.
|
||||
*/
|
||||
if (cmd->cmd_op != UBLK_CMD_GET_DEV_INFO2)
|
||||
if (_IOC_NR(cmd->cmd_op) != UBLK_CMD_GET_DEV_INFO2)
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2154,7 +2172,7 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
|
||||
dev_path[header->dev_path_len] = 0;
|
||||
|
||||
ret = -EINVAL;
|
||||
switch (cmd->cmd_op) {
|
||||
switch (_IOC_NR(cmd->cmd_op)) {
|
||||
case UBLK_CMD_GET_DEV_INFO:
|
||||
case UBLK_CMD_GET_DEV_INFO2:
|
||||
case UBLK_CMD_GET_QUEUE_AFFINITY:
|
||||
@ -2193,6 +2211,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
|
||||
{
|
||||
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
|
||||
struct ublk_device *ub = NULL;
|
||||
u32 cmd_op = cmd->cmd_op;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
@ -2203,22 +2222,22 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
|
||||
if (!(issue_flags & IO_URING_F_SQE128))
|
||||
goto out;
|
||||
|
||||
if (cmd->cmd_op != UBLK_CMD_ADD_DEV) {
|
||||
ret = ublk_check_cmd_op(cmd_op);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (_IOC_NR(cmd_op) != UBLK_CMD_ADD_DEV) {
|
||||
ret = -ENODEV;
|
||||
ub = ublk_get_device_from_id(header->dev_id);
|
||||
if (!ub)
|
||||
goto out;
|
||||
|
||||
ret = ublk_ctrl_uring_cmd_permission(ub, cmd);
|
||||
} else {
|
||||
/* ADD_DEV permission check is done in command handler */
|
||||
ret = 0;
|
||||
if (ret)
|
||||
goto put_dev;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto put_dev;
|
||||
|
||||
switch (cmd->cmd_op) {
|
||||
switch (_IOC_NR(cmd_op)) {
|
||||
case UBLK_CMD_START_DEV:
|
||||
ret = ublk_ctrl_start_dev(ub, cmd);
|
||||
break;
|
||||
|
@ -1202,21 +1202,12 @@ struct dm_crypto_profile {
|
||||
struct mapped_device *md;
|
||||
};
|
||||
|
||||
struct dm_keyslot_evict_args {
|
||||
const struct blk_crypto_key *key;
|
||||
int err;
|
||||
};
|
||||
|
||||
static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev,
|
||||
sector_t start, sector_t len, void *data)
|
||||
{
|
||||
struct dm_keyslot_evict_args *args = data;
|
||||
int err;
|
||||
const struct blk_crypto_key *key = data;
|
||||
|
||||
err = blk_crypto_evict_key(dev->bdev, args->key);
|
||||
if (!args->err)
|
||||
args->err = err;
|
||||
/* Always try to evict the key from all devices. */
|
||||
blk_crypto_evict_key(dev->bdev, key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1229,7 +1220,6 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile,
|
||||
{
|
||||
struct mapped_device *md =
|
||||
container_of(profile, struct dm_crypto_profile, profile)->md;
|
||||
struct dm_keyslot_evict_args args = { key };
|
||||
struct dm_table *t;
|
||||
int srcu_idx;
|
||||
|
||||
@ -1242,11 +1232,12 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile,
|
||||
|
||||
if (!ti->type->iterate_devices)
|
||||
continue;
|
||||
ti->type->iterate_devices(ti, dm_keyslot_evict_callback, &args);
|
||||
ti->type->iterate_devices(ti, dm_keyslot_evict_callback,
|
||||
(void *)key);
|
||||
}
|
||||
|
||||
dm_put_live_table(md, srcu_idx);
|
||||
return args.err;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -209,76 +209,99 @@ static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mdde
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
|
||||
static unsigned int optimal_io_size(struct block_device *bdev,
|
||||
unsigned int last_page_size,
|
||||
unsigned int io_size)
|
||||
{
|
||||
if (bdev_io_opt(bdev) > bdev_logical_block_size(bdev))
|
||||
return roundup(last_page_size, bdev_io_opt(bdev));
|
||||
return io_size;
|
||||
}
|
||||
|
||||
static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size,
|
||||
sector_t start, sector_t boundary)
|
||||
{
|
||||
if (io_size != opt_size &&
|
||||
start + opt_size / SECTOR_SIZE <= boundary)
|
||||
return opt_size;
|
||||
if (start + io_size / SECTOR_SIZE <= boundary)
|
||||
return io_size;
|
||||
|
||||
/* Overflows boundary */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
|
||||
struct page *page)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
struct block_device *bdev;
|
||||
struct mddev *mddev = bitmap->mddev;
|
||||
struct bitmap_storage *store = &bitmap->storage;
|
||||
sector_t offset = mddev->bitmap_info.offset;
|
||||
sector_t ps, sboff, doff;
|
||||
unsigned int size = PAGE_SIZE;
|
||||
unsigned int opt_size = PAGE_SIZE;
|
||||
|
||||
restart:
|
||||
rdev = NULL;
|
||||
while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
|
||||
int size = PAGE_SIZE;
|
||||
loff_t offset = mddev->bitmap_info.offset;
|
||||
bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
|
||||
if (page->index == store->file_pages - 1) {
|
||||
unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1);
|
||||
|
||||
bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
|
||||
|
||||
if (page->index == store->file_pages-1) {
|
||||
int last_page_size = store->bytes & (PAGE_SIZE-1);
|
||||
if (last_page_size == 0)
|
||||
last_page_size = PAGE_SIZE;
|
||||
size = roundup(last_page_size,
|
||||
bdev_logical_block_size(bdev));
|
||||
}
|
||||
/* Just make sure we aren't corrupting data or
|
||||
* metadata
|
||||
*/
|
||||
if (mddev->external) {
|
||||
/* Bitmap could be anywhere. */
|
||||
if (rdev->sb_start + offset + (page->index
|
||||
* (PAGE_SIZE/512))
|
||||
> rdev->data_offset
|
||||
&&
|
||||
rdev->sb_start + offset
|
||||
< (rdev->data_offset + mddev->dev_sectors
|
||||
+ (PAGE_SIZE/512)))
|
||||
goto bad_alignment;
|
||||
} else if (offset < 0) {
|
||||
/* DATA BITMAP METADATA */
|
||||
if (offset
|
||||
+ (long)(page->index * (PAGE_SIZE/512))
|
||||
+ size/512 > 0)
|
||||
/* bitmap runs in to metadata */
|
||||
goto bad_alignment;
|
||||
if (rdev->data_offset + mddev->dev_sectors
|
||||
> rdev->sb_start + offset)
|
||||
/* data runs in to bitmap */
|
||||
goto bad_alignment;
|
||||
} else if (rdev->sb_start < rdev->data_offset) {
|
||||
/* METADATA BITMAP DATA */
|
||||
if (rdev->sb_start
|
||||
+ offset
|
||||
+ page->index*(PAGE_SIZE/512) + size/512
|
||||
> rdev->data_offset)
|
||||
/* bitmap runs in to data */
|
||||
goto bad_alignment;
|
||||
} else {
|
||||
/* DATA METADATA BITMAP - no problems */
|
||||
}
|
||||
md_super_write(mddev, rdev,
|
||||
rdev->sb_start + offset
|
||||
+ page->index * (PAGE_SIZE/512),
|
||||
size,
|
||||
page);
|
||||
if (last_page_size == 0)
|
||||
last_page_size = PAGE_SIZE;
|
||||
size = roundup(last_page_size, bdev_logical_block_size(bdev));
|
||||
opt_size = optimal_io_size(bdev, last_page_size, size);
|
||||
}
|
||||
|
||||
if (wait && md_super_wait(mddev) < 0)
|
||||
goto restart;
|
||||
return 0;
|
||||
ps = page->index * PAGE_SIZE / SECTOR_SIZE;
|
||||
sboff = rdev->sb_start + offset;
|
||||
doff = rdev->data_offset;
|
||||
|
||||
bad_alignment:
|
||||
return -EINVAL;
|
||||
/* Just make sure we aren't corrupting data or metadata */
|
||||
if (mddev->external) {
|
||||
/* Bitmap could be anywhere. */
|
||||
if (sboff + ps > doff &&
|
||||
sboff < (doff + mddev->dev_sectors + PAGE_SIZE / SECTOR_SIZE))
|
||||
return -EINVAL;
|
||||
} else if (offset < 0) {
|
||||
/* DATA BITMAP METADATA */
|
||||
size = bitmap_io_size(size, opt_size, offset + ps, 0);
|
||||
if (size == 0)
|
||||
/* bitmap runs in to metadata */
|
||||
return -EINVAL;
|
||||
|
||||
if (doff + mddev->dev_sectors > sboff)
|
||||
/* data runs in to bitmap */
|
||||
return -EINVAL;
|
||||
} else if (rdev->sb_start < rdev->data_offset) {
|
||||
/* METADATA BITMAP DATA */
|
||||
size = bitmap_io_size(size, opt_size, sboff + ps, doff);
|
||||
if (size == 0)
|
||||
/* bitmap runs in to data */
|
||||
return -EINVAL;
|
||||
} else {
|
||||
/* DATA METADATA BITMAP - no problems */
|
||||
}
|
||||
|
||||
md_super_write(mddev, rdev, sboff + ps, (int) size, page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
struct mddev *mddev = bitmap->mddev;
|
||||
int ret;
|
||||
|
||||
do {
|
||||
rdev = NULL;
|
||||
while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
|
||||
ret = __write_sb_page(rdev, bitmap, page);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
} while (wait && md_super_wait(mddev) < 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void md_bitmap_file_kick(struct bitmap *bitmap);
|
||||
|
@ -223,7 +223,8 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
|
||||
bio_sector < start_sector))
|
||||
goto out_of_bounds;
|
||||
|
||||
if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
|
||||
if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
|
||||
md_error(mddev, tmp_dev->rdev);
|
||||
bio_io_error(bio);
|
||||
return true;
|
||||
}
|
||||
@ -270,6 +271,16 @@ static void linear_status (struct seq_file *seq, struct mddev *mddev)
|
||||
seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
|
||||
}
|
||||
|
||||
static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
|
||||
char *md_name = mdname(mddev);
|
||||
|
||||
pr_crit("md/linear%s: Disk failure on %pg detected, failing array.\n",
|
||||
md_name, rdev->bdev);
|
||||
}
|
||||
}
|
||||
|
||||
static void linear_quiesce(struct mddev *mddev, int state)
|
||||
{
|
||||
}
|
||||
@ -286,6 +297,7 @@ static struct md_personality linear_personality =
|
||||
.hot_add_disk = linear_add,
|
||||
.size = linear_size,
|
||||
.quiesce = linear_quiesce,
|
||||
.error_handler = linear_error,
|
||||
};
|
||||
|
||||
static int __init linear_init (void)
|
||||
|
@ -78,7 +78,7 @@
|
||||
static LIST_HEAD(pers_list);
|
||||
static DEFINE_SPINLOCK(pers_lock);
|
||||
|
||||
static struct kobj_type md_ktype;
|
||||
static const struct kobj_type md_ktype;
|
||||
|
||||
struct md_cluster_operations *md_cluster_ops;
|
||||
EXPORT_SYMBOL(md_cluster_ops);
|
||||
@ -3600,7 +3600,7 @@ static const struct sysfs_ops rdev_sysfs_ops = {
|
||||
.show = rdev_attr_show,
|
||||
.store = rdev_attr_store,
|
||||
};
|
||||
static struct kobj_type rdev_ktype = {
|
||||
static const struct kobj_type rdev_ktype = {
|
||||
.release = rdev_free,
|
||||
.sysfs_ops = &rdev_sysfs_ops,
|
||||
.default_groups = rdev_default_groups,
|
||||
@ -5558,7 +5558,7 @@ static const struct sysfs_ops md_sysfs_ops = {
|
||||
.show = md_attr_show,
|
||||
.store = md_attr_store,
|
||||
};
|
||||
static struct kobj_type md_ktype = {
|
||||
static const struct kobj_type md_ktype = {
|
||||
.release = md_kobj_release,
|
||||
.sysfs_ops = &md_sysfs_ops,
|
||||
.default_groups = md_attr_groups,
|
||||
@ -7974,6 +7974,9 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
return;
|
||||
mddev->pers->error_handler(mddev, rdev);
|
||||
|
||||
if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR)
|
||||
return;
|
||||
|
||||
if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
|
||||
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
sysfs_notify_dirent_safe(rdev->sysfs_state);
|
||||
@ -8029,16 +8032,16 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev)
|
||||
} else if (resync > max_sectors) {
|
||||
resync = max_sectors;
|
||||
} else {
|
||||
resync -= atomic_read(&mddev->recovery_active);
|
||||
if (resync < MD_RESYNC_ACTIVE) {
|
||||
/*
|
||||
* Resync has started, but the subtraction has
|
||||
* yielded one of the special values. Force it
|
||||
* to active to ensure the status reports an
|
||||
* active resync.
|
||||
*/
|
||||
res = atomic_read(&mddev->recovery_active);
|
||||
/*
|
||||
* Resync has started, but the subtraction has overflowed or
|
||||
* yielded one of the special values. Force it to active to
|
||||
* ensure the status reports an active resync.
|
||||
*/
|
||||
if (resync < res || resync - res < MD_RESYNC_ACTIVE)
|
||||
resync = MD_RESYNC_ACTIVE;
|
||||
}
|
||||
else
|
||||
resync -= res;
|
||||
}
|
||||
|
||||
if (resync == MD_RESYNC_NONE) {
|
||||
|
@ -790,15 +790,9 @@ extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
|
||||
struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
|
||||
struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
|
||||
|
||||
static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type)
|
||||
static inline bool is_rdev_broken(struct md_rdev *rdev)
|
||||
{
|
||||
if (!disk_live(rdev->bdev->bd_disk)) {
|
||||
if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
|
||||
pr_warn("md: %s: %s array has a missing/failed member\n",
|
||||
mdname(rdev->mddev), md_type);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
return !disk_live(rdev->bdev->bd_disk);
|
||||
}
|
||||
|
||||
static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
|
||||
|
@ -569,8 +569,9 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
|
||||
return true;
|
||||
}
|
||||
|
||||
if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) {
|
||||
if (unlikely(is_rdev_broken(tmp_dev))) {
|
||||
bio_io_error(bio);
|
||||
md_error(mddev, tmp_dev);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -592,6 +593,16 @@ static void raid0_status(struct seq_file *seq, struct mddev *mddev)
|
||||
return;
|
||||
}
|
||||
|
||||
static void raid0_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
|
||||
char *md_name = mdname(mddev);
|
||||
|
||||
pr_crit("md/raid0%s: Disk failure on %pg detected, failing array.\n",
|
||||
md_name, rdev->bdev);
|
||||
}
|
||||
}
|
||||
|
||||
static void *raid0_takeover_raid45(struct mddev *mddev)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
@ -767,6 +778,7 @@ static struct md_personality raid0_personality=
|
||||
.size = raid0_size,
|
||||
.takeover = raid0_takeover,
|
||||
.quiesce = raid0_quiesce,
|
||||
.error_handler = raid0_error,
|
||||
};
|
||||
|
||||
static int __init raid0_init (void)
|
||||
|
@ -952,7 +952,9 @@ static void flush_pending_writes(struct r10conf *conf)
|
||||
static void raise_barrier(struct r10conf *conf, int force)
|
||||
{
|
||||
write_seqlock_irq(&conf->resync_lock);
|
||||
BUG_ON(force && !conf->barrier);
|
||||
|
||||
if (WARN_ON_ONCE(force && !conf->barrier))
|
||||
force = false;
|
||||
|
||||
/* Wait until no block IO is waiting (unless 'force') */
|
||||
wait_event_barrier(conf, force || !conf->nr_waiting);
|
||||
@ -995,11 +997,15 @@ static bool stop_waiting_barrier(struct r10conf *conf)
|
||||
(!bio_list_empty(&bio_list[0]) || !bio_list_empty(&bio_list[1])))
|
||||
return true;
|
||||
|
||||
/* move on if recovery thread is blocked by us */
|
||||
if (conf->mddev->thread->tsk == current &&
|
||||
test_bit(MD_RECOVERY_RUNNING, &conf->mddev->recovery) &&
|
||||
conf->nr_queued > 0)
|
||||
/*
|
||||
* move on if io is issued from raid10d(), nr_pending is not released
|
||||
* from original io(see handle_read_error()). All raise barrier is
|
||||
* blocked until this io is done.
|
||||
*/
|
||||
if (conf->mddev->thread->tsk == current) {
|
||||
WARN_ON_ONCE(atomic_read(&conf->nr_pending) == 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -1244,7 +1250,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
||||
}
|
||||
slot = r10_bio->read_slot;
|
||||
|
||||
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
|
||||
if (!r10_bio->start_time &&
|
||||
blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
|
||||
r10_bio->start_time = bio_start_io_acct(bio);
|
||||
read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set);
|
||||
|
||||
@ -1574,6 +1581,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
|
||||
r10_bio->sector = bio->bi_iter.bi_sector;
|
||||
r10_bio->state = 0;
|
||||
r10_bio->read_slot = -1;
|
||||
r10_bio->start_time = 0;
|
||||
memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) *
|
||||
conf->geo.raid_disks);
|
||||
|
||||
@ -1626,7 +1634,7 @@ static void raid10_end_discard_request(struct bio *bio)
|
||||
/*
|
||||
* raid10_remove_disk uses smp_mb to make sure rdev is set to
|
||||
* replacement before setting replacement to NULL. It can read
|
||||
* rdev first without barrier protect even replacment is NULL
|
||||
* rdev first without barrier protect even replacement is NULL
|
||||
*/
|
||||
smp_rmb();
|
||||
rdev = conf->mirrors[dev].rdev;
|
||||
@ -2609,11 +2617,22 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
{
|
||||
struct r10conf *conf = mddev->private;
|
||||
int d;
|
||||
struct bio *wbio, *wbio2;
|
||||
struct bio *wbio = r10_bio->devs[1].bio;
|
||||
struct bio *wbio2 = r10_bio->devs[1].repl_bio;
|
||||
|
||||
/* Need to test wbio2->bi_end_io before we call
|
||||
* submit_bio_noacct as if the former is NULL,
|
||||
* the latter is free to free wbio2.
|
||||
*/
|
||||
if (wbio2 && !wbio2->bi_end_io)
|
||||
wbio2 = NULL;
|
||||
|
||||
if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) {
|
||||
fix_recovery_read_error(r10_bio);
|
||||
end_sync_request(r10_bio);
|
||||
if (wbio->bi_end_io)
|
||||
end_sync_request(r10_bio);
|
||||
if (wbio2)
|
||||
end_sync_request(r10_bio);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2622,14 +2641,6 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
* and submit the write request
|
||||
*/
|
||||
d = r10_bio->devs[1].devnum;
|
||||
wbio = r10_bio->devs[1].bio;
|
||||
wbio2 = r10_bio->devs[1].repl_bio;
|
||||
/* Need to test wbio2->bi_end_io before we call
|
||||
* submit_bio_noacct as if the former is NULL,
|
||||
* the latter is free to free wbio2.
|
||||
*/
|
||||
if (wbio2 && !wbio2->bi_end_io)
|
||||
wbio2 = NULL;
|
||||
if (wbio->bi_end_io) {
|
||||
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
|
||||
md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
|
||||
@ -2978,9 +2989,13 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
md_error(mddev, rdev);
|
||||
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
allow_barrier(conf);
|
||||
r10_bio->state = 0;
|
||||
raid10_read_request(mddev, r10_bio->master_bio, r10_bio);
|
||||
/*
|
||||
* allow_barrier after re-submit to ensure no sync io
|
||||
* can be issued while regular io pending.
|
||||
*/
|
||||
allow_barrier(conf);
|
||||
}
|
||||
|
||||
static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
|
||||
@ -3289,10 +3304,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
sector_t chunk_mask = conf->geo.chunk_mask;
|
||||
int page_idx = 0;
|
||||
|
||||
if (!mempool_initialized(&conf->r10buf_pool))
|
||||
if (init_resync(conf))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Allow skipping a full rebuild for incremental assembly
|
||||
* of a clean array, like RAID1 does.
|
||||
@ -3308,6 +3319,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
return mddev->dev_sectors - sector_nr;
|
||||
}
|
||||
|
||||
if (!mempool_initialized(&conf->r10buf_pool))
|
||||
if (init_resync(conf))
|
||||
return 0;
|
||||
|
||||
skipped:
|
||||
max_sector = mddev->dev_sectors;
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
|
||||
@ -4004,6 +4019,20 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new)
|
||||
return nc*fc;
|
||||
}
|
||||
|
||||
static void raid10_free_conf(struct r10conf *conf)
|
||||
{
|
||||
if (!conf)
|
||||
return;
|
||||
|
||||
mempool_exit(&conf->r10bio_pool);
|
||||
kfree(conf->mirrors);
|
||||
kfree(conf->mirrors_old);
|
||||
kfree(conf->mirrors_new);
|
||||
safe_put_page(conf->tmppage);
|
||||
bioset_exit(&conf->bio_split);
|
||||
kfree(conf);
|
||||
}
|
||||
|
||||
static struct r10conf *setup_conf(struct mddev *mddev)
|
||||
{
|
||||
struct r10conf *conf = NULL;
|
||||
@ -4086,13 +4115,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
|
||||
return conf;
|
||||
|
||||
out:
|
||||
if (conf) {
|
||||
mempool_exit(&conf->r10bio_pool);
|
||||
kfree(conf->mirrors);
|
||||
safe_put_page(conf->tmppage);
|
||||
bioset_exit(&conf->bio_split);
|
||||
kfree(conf);
|
||||
}
|
||||
raid10_free_conf(conf);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
@ -4129,6 +4152,9 @@ static int raid10_run(struct mddev *mddev)
|
||||
if (!conf)
|
||||
goto out;
|
||||
|
||||
mddev->thread = conf->thread;
|
||||
conf->thread = NULL;
|
||||
|
||||
if (mddev_is_clustered(conf->mddev)) {
|
||||
int fc, fo;
|
||||
|
||||
@ -4141,9 +4167,6 @@ static int raid10_run(struct mddev *mddev)
|
||||
}
|
||||
}
|
||||
|
||||
mddev->thread = conf->thread;
|
||||
conf->thread = NULL;
|
||||
|
||||
if (mddev->queue) {
|
||||
blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
|
||||
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
|
||||
@ -4283,10 +4306,7 @@ static int raid10_run(struct mddev *mddev)
|
||||
|
||||
out_free_conf:
|
||||
md_unregister_thread(&mddev->thread);
|
||||
mempool_exit(&conf->r10bio_pool);
|
||||
safe_put_page(conf->tmppage);
|
||||
kfree(conf->mirrors);
|
||||
kfree(conf);
|
||||
raid10_free_conf(conf);
|
||||
mddev->private = NULL;
|
||||
out:
|
||||
return -EIO;
|
||||
@ -4294,15 +4314,7 @@ out:
|
||||
|
||||
static void raid10_free(struct mddev *mddev, void *priv)
|
||||
{
|
||||
struct r10conf *conf = priv;
|
||||
|
||||
mempool_exit(&conf->r10bio_pool);
|
||||
safe_put_page(conf->tmppage);
|
||||
kfree(conf->mirrors);
|
||||
kfree(conf->mirrors_old);
|
||||
kfree(conf->mirrors_new);
|
||||
bioset_exit(&conf->bio_split);
|
||||
kfree(conf);
|
||||
raid10_free_conf(priv);
|
||||
}
|
||||
|
||||
static void raid10_quiesce(struct mddev *mddev, int quiesce)
|
||||
|
@ -7716,7 +7716,6 @@ static void raid5_set_io_opt(struct r5conf *conf)
|
||||
static int raid5_run(struct mddev *mddev)
|
||||
{
|
||||
struct r5conf *conf;
|
||||
int working_disks = 0;
|
||||
int dirty_parity_disks = 0;
|
||||
struct md_rdev *rdev;
|
||||
struct md_rdev *journal_dev = NULL;
|
||||
@ -7912,10 +7911,8 @@ static int raid5_run(struct mddev *mddev)
|
||||
pr_warn("md: cannot handle concurrent replacement and reshape.\n");
|
||||
goto abort;
|
||||
}
|
||||
if (test_bit(In_sync, &rdev->flags)) {
|
||||
working_disks++;
|
||||
if (test_bit(In_sync, &rdev->flags))
|
||||
continue;
|
||||
}
|
||||
/* This disc is not fully in-sync. However if it
|
||||
* just stored parity (beyond the recovery_offset),
|
||||
* when we don't need to be concerned about the
|
||||
|
@ -209,16 +209,16 @@ static inline struct apple_nvme *queue_to_apple_nvme(struct apple_nvme_queue *q)
|
||||
{
|
||||
if (q->is_adminq)
|
||||
return container_of(q, struct apple_nvme, adminq);
|
||||
else
|
||||
return container_of(q, struct apple_nvme, ioq);
|
||||
|
||||
return container_of(q, struct apple_nvme, ioq);
|
||||
}
|
||||
|
||||
static unsigned int apple_nvme_queue_depth(struct apple_nvme_queue *q)
|
||||
{
|
||||
if (q->is_adminq)
|
||||
return APPLE_NVME_AQ_DEPTH;
|
||||
else
|
||||
return APPLE_ANS_MAX_QUEUE_DEPTH;
|
||||
|
||||
return APPLE_ANS_MAX_QUEUE_DEPTH;
|
||||
}
|
||||
|
||||
static void apple_nvme_rtkit_crashed(void *cookie)
|
||||
|
@ -450,8 +450,8 @@ bool nvme_cancel_request(struct request *req, void *data)
|
||||
dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
|
||||
"Cancelling I/O %d", req->tag);
|
||||
|
||||
/* don't abort one completed request */
|
||||
if (blk_mq_request_completed(req))
|
||||
/* don't abort one completed or idle request */
|
||||
if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT)
|
||||
return true;
|
||||
|
||||
nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD;
|
||||
@ -4819,8 +4819,6 @@ static bool nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
|
||||
u32 aer_notice_type = nvme_aer_subtype(result);
|
||||
bool requeue = true;
|
||||
|
||||
trace_nvme_async_event(ctrl, aer_notice_type);
|
||||
|
||||
switch (aer_notice_type) {
|
||||
case NVME_AER_NOTICE_NS_CHANGED:
|
||||
set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events);
|
||||
@ -4856,7 +4854,6 @@ static bool nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
|
||||
|
||||
static void nvme_handle_aer_persistent_error(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
trace_nvme_async_event(ctrl, NVME_AER_ERROR);
|
||||
dev_warn(ctrl->device, "resetting controller due to AER\n");
|
||||
nvme_reset_ctrl(ctrl);
|
||||
}
|
||||
@ -4872,6 +4869,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
|
||||
if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS)
|
||||
return;
|
||||
|
||||
trace_nvme_async_event(ctrl, result);
|
||||
switch (aer_type) {
|
||||
case NVME_AER_NOTICE:
|
||||
requeue = nvme_handle_aen_notice(ctrl, result);
|
||||
@ -4889,7 +4887,6 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
|
||||
case NVME_AER_SMART:
|
||||
case NVME_AER_CSS:
|
||||
case NVME_AER_VS:
|
||||
trace_nvme_async_event(ctrl, aer_type);
|
||||
ctrl->aen_result = result;
|
||||
break;
|
||||
default:
|
||||
|
@ -5,7 +5,6 @@
|
||||
*/
|
||||
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/aer.h>
|
||||
#include <linux/async.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
@ -2535,7 +2534,6 @@ static int nvme_pci_enable(struct nvme_dev *dev)
|
||||
|
||||
nvme_map_cmb(dev);
|
||||
|
||||
pci_enable_pcie_error_reporting(pdev);
|
||||
pci_save_state(pdev);
|
||||
|
||||
result = nvme_pci_configure_admin_queue(dev);
|
||||
@ -2600,10 +2598,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
|
||||
nvme_suspend_io_queues(dev);
|
||||
nvme_suspend_queue(dev, 0);
|
||||
pci_free_irq_vectors(pdev);
|
||||
if (pci_is_enabled(pdev)) {
|
||||
pci_disable_pcie_error_reporting(pdev);
|
||||
if (pci_is_enabled(pdev))
|
||||
pci_disable_device(pdev);
|
||||
}
|
||||
nvme_reap_pending_cqes(dev);
|
||||
|
||||
nvme_cancel_tagset(&dev->ctrl);
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blk-mq-rdma.h>
|
||||
#include <linux/blk-integrity.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/list.h>
|
||||
@ -464,7 +463,6 @@ static int nvme_rdma_create_cq(struct ib_device *ibdev,
|
||||
struct nvme_rdma_queue *queue)
|
||||
{
|
||||
int ret, comp_vector, idx = nvme_rdma_queue_idx(queue);
|
||||
enum ib_poll_context poll_ctx;
|
||||
|
||||
/*
|
||||
* Spread I/O queues completion vectors according their queue index.
|
||||
@ -473,15 +471,12 @@ static int nvme_rdma_create_cq(struct ib_device *ibdev,
|
||||
comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors;
|
||||
|
||||
/* Polling queues need direct cq polling context */
|
||||
if (nvme_rdma_poll_queue(queue)) {
|
||||
poll_ctx = IB_POLL_DIRECT;
|
||||
if (nvme_rdma_poll_queue(queue))
|
||||
queue->ib_cq = ib_alloc_cq(ibdev, queue, queue->cq_size,
|
||||
comp_vector, poll_ctx);
|
||||
} else {
|
||||
poll_ctx = IB_POLL_SOFTIRQ;
|
||||
comp_vector, IB_POLL_DIRECT);
|
||||
else
|
||||
queue->ib_cq = ib_cq_pool_get(ibdev, queue->cq_size,
|
||||
comp_vector, poll_ctx);
|
||||
}
|
||||
comp_vector, IB_POLL_SOFTIRQ);
|
||||
|
||||
if (IS_ERR(queue->ib_cq)) {
|
||||
ret = PTR_ERR(queue->ib_cq);
|
||||
@ -2163,10 +2158,8 @@ static void nvme_rdma_map_queues(struct blk_mq_tag_set *set)
|
||||
ctrl->io_queues[HCTX_TYPE_DEFAULT];
|
||||
set->map[HCTX_TYPE_READ].queue_offset = 0;
|
||||
}
|
||||
blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_DEFAULT],
|
||||
ctrl->device->dev, 0);
|
||||
blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ],
|
||||
ctrl->device->dev, 0);
|
||||
blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
|
||||
blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
|
||||
|
||||
if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
|
||||
/* map dedicated poll queues only if we have queues left */
|
||||
|
@ -888,6 +888,9 @@ static int nvme_tcp_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
|
||||
size_t consumed = len;
|
||||
int result;
|
||||
|
||||
if (unlikely(!queue->rd_enabled))
|
||||
return -EFAULT;
|
||||
|
||||
while (len) {
|
||||
switch (nvme_tcp_recv_state(queue)) {
|
||||
case NVME_TCP_RECV_PDU:
|
||||
|
@ -127,15 +127,12 @@ TRACE_EVENT(nvme_async_event,
|
||||
),
|
||||
TP_printk("nvme%d: NVME_AEN=%#08x [%s]",
|
||||
__entry->ctrl_id, __entry->result,
|
||||
__print_symbolic(__entry->result,
|
||||
aer_name(NVME_AER_NOTICE_NS_CHANGED),
|
||||
aer_name(NVME_AER_NOTICE_ANA),
|
||||
aer_name(NVME_AER_NOTICE_FW_ACT_STARTING),
|
||||
aer_name(NVME_AER_NOTICE_DISC_CHANGED),
|
||||
aer_name(NVME_AER_ERROR),
|
||||
aer_name(NVME_AER_SMART),
|
||||
aer_name(NVME_AER_CSS),
|
||||
aer_name(NVME_AER_VS))
|
||||
__print_symbolic(__entry->result & 0x7,
|
||||
aer_name(NVME_AER_ERROR),
|
||||
aer_name(NVME_AER_SMART),
|
||||
aer_name(NVME_AER_NOTICE),
|
||||
aer_name(NVME_AER_CSS),
|
||||
aer_name(NVME_AER_VS))
|
||||
)
|
||||
);
|
||||
|
||||
|
@ -668,21 +668,11 @@ out:
|
||||
nvmet_req_complete(req, status);
|
||||
}
|
||||
|
||||
static bool nvmet_handle_identify_desclist(struct nvmet_req *req)
|
||||
static void nvmet_execute_identify_ctrl_nvm(struct nvmet_req *req)
|
||||
{
|
||||
switch (req->cmd->identify.csi) {
|
||||
case NVME_CSI_NVM:
|
||||
nvmet_execute_identify_desclist(req);
|
||||
return true;
|
||||
case NVME_CSI_ZNS:
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
|
||||
nvmet_execute_identify_desclist(req);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
/* Not supported: return zeroes */
|
||||
nvmet_req_complete(req,
|
||||
nvmet_zero_sgl(req, 0, sizeof(struct nvme_id_ctrl_nvm)));
|
||||
}
|
||||
|
||||
static void nvmet_execute_identify(struct nvmet_req *req)
|
||||
@ -692,54 +682,49 @@ static void nvmet_execute_identify(struct nvmet_req *req)
|
||||
|
||||
switch (req->cmd->identify.cns) {
|
||||
case NVME_ID_CNS_NS:
|
||||
switch (req->cmd->identify.csi) {
|
||||
case NVME_CSI_NVM:
|
||||
return nvmet_execute_identify_ns(req);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case NVME_ID_CNS_CS_NS:
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
|
||||
switch (req->cmd->identify.csi) {
|
||||
case NVME_CSI_ZNS:
|
||||
return nvmet_execute_identify_cns_cs_ns(req);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
nvmet_execute_identify_ns(req);
|
||||
return;
|
||||
case NVME_ID_CNS_CTRL:
|
||||
nvmet_execute_identify_ctrl(req);
|
||||
return;
|
||||
case NVME_ID_CNS_NS_ACTIVE_LIST:
|
||||
nvmet_execute_identify_nslist(req);
|
||||
return;
|
||||
case NVME_ID_CNS_NS_DESC_LIST:
|
||||
nvmet_execute_identify_desclist(req);
|
||||
return;
|
||||
case NVME_ID_CNS_CS_NS:
|
||||
switch (req->cmd->identify.csi) {
|
||||
case NVME_CSI_NVM:
|
||||
return nvmet_execute_identify_ctrl(req);
|
||||
/* Not supported */
|
||||
break;
|
||||
case NVME_CSI_ZNS:
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
|
||||
nvmet_execute_identify_ns_zns(req);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case NVME_ID_CNS_CS_CTRL:
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
|
||||
switch (req->cmd->identify.csi) {
|
||||
case NVME_CSI_ZNS:
|
||||
return nvmet_execute_identify_cns_cs_ctrl(req);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case NVME_ID_CNS_NS_ACTIVE_LIST:
|
||||
switch (req->cmd->identify.csi) {
|
||||
case NVME_CSI_NVM:
|
||||
return nvmet_execute_identify_nslist(req);
|
||||
default:
|
||||
nvmet_execute_identify_ctrl_nvm(req);
|
||||
return;
|
||||
case NVME_CSI_ZNS:
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
|
||||
nvmet_execute_identify_ctrl_zns(req);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case NVME_ID_CNS_NS_DESC_LIST:
|
||||
if (nvmet_handle_identify_desclist(req) == true)
|
||||
return;
|
||||
break;
|
||||
}
|
||||
|
||||
nvmet_req_cns_error_complete(req);
|
||||
pr_debug("unhandled identify cns %d on qid %d\n",
|
||||
req->cmd->identify.cns, req->sq->qid);
|
||||
req->error_loc = offsetof(struct nvme_identify, cns);
|
||||
nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -614,10 +614,11 @@ fcloop_fcp_recv_work(struct work_struct *work)
|
||||
struct fcloop_fcpreq *tfcp_req =
|
||||
container_of(work, struct fcloop_fcpreq, fcp_rcv_work);
|
||||
struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq;
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
bool aborted = false;
|
||||
|
||||
spin_lock_irq(&tfcp_req->reqlock);
|
||||
spin_lock_irqsave(&tfcp_req->reqlock, flags);
|
||||
switch (tfcp_req->inistate) {
|
||||
case INI_IO_START:
|
||||
tfcp_req->inistate = INI_IO_ACTIVE;
|
||||
@ -626,11 +627,11 @@ fcloop_fcp_recv_work(struct work_struct *work)
|
||||
aborted = true;
|
||||
break;
|
||||
default:
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
|
||||
|
||||
if (unlikely(aborted))
|
||||
ret = -ECANCELED;
|
||||
@ -655,8 +656,9 @@ fcloop_fcp_abort_recv_work(struct work_struct *work)
|
||||
container_of(work, struct fcloop_fcpreq, abort_rcv_work);
|
||||
struct nvmefc_fcp_req *fcpreq;
|
||||
bool completed = false;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irq(&tfcp_req->reqlock);
|
||||
spin_lock_irqsave(&tfcp_req->reqlock, flags);
|
||||
fcpreq = tfcp_req->fcpreq;
|
||||
switch (tfcp_req->inistate) {
|
||||
case INI_IO_ABORTED:
|
||||
@ -665,11 +667,11 @@ fcloop_fcp_abort_recv_work(struct work_struct *work)
|
||||
completed = true;
|
||||
break;
|
||||
default:
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
|
||||
|
||||
if (unlikely(completed)) {
|
||||
/* remove reference taken in original abort downcall */
|
||||
@ -681,9 +683,9 @@ fcloop_fcp_abort_recv_work(struct work_struct *work)
|
||||
nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport,
|
||||
&tfcp_req->tgt_fcp_req);
|
||||
|
||||
spin_lock_irq(&tfcp_req->reqlock);
|
||||
spin_lock_irqsave(&tfcp_req->reqlock, flags);
|
||||
tfcp_req->fcpreq = NULL;
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
|
||||
|
||||
fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED);
|
||||
/* call_host_done releases reference for abort downcall */
|
||||
@ -699,11 +701,12 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work)
|
||||
struct fcloop_fcpreq *tfcp_req =
|
||||
container_of(work, struct fcloop_fcpreq, tio_done_work);
|
||||
struct nvmefc_fcp_req *fcpreq;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irq(&tfcp_req->reqlock);
|
||||
spin_lock_irqsave(&tfcp_req->reqlock, flags);
|
||||
fcpreq = tfcp_req->fcpreq;
|
||||
tfcp_req->inistate = INI_IO_COMPLETED;
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
|
||||
|
||||
fcloop_call_host_done(fcpreq, tfcp_req, tfcp_req->status);
|
||||
}
|
||||
@ -807,13 +810,14 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport,
|
||||
u32 rsplen = 0, xfrlen = 0;
|
||||
int fcp_err = 0, active, aborted;
|
||||
u8 op = tgt_fcpreq->op;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irq(&tfcp_req->reqlock);
|
||||
spin_lock_irqsave(&tfcp_req->reqlock, flags);
|
||||
fcpreq = tfcp_req->fcpreq;
|
||||
active = tfcp_req->active;
|
||||
aborted = tfcp_req->aborted;
|
||||
tfcp_req->active = true;
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
|
||||
|
||||
if (unlikely(active))
|
||||
/* illegal - call while i/o active */
|
||||
@ -821,9 +825,9 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport,
|
||||
|
||||
if (unlikely(aborted)) {
|
||||
/* target transport has aborted i/o prior */
|
||||
spin_lock_irq(&tfcp_req->reqlock);
|
||||
spin_lock_irqsave(&tfcp_req->reqlock, flags);
|
||||
tfcp_req->active = false;
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
|
||||
tgt_fcpreq->transferred_length = 0;
|
||||
tgt_fcpreq->fcp_error = -ECANCELED;
|
||||
tgt_fcpreq->done(tgt_fcpreq);
|
||||
@ -880,9 +884,9 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport,
|
||||
break;
|
||||
}
|
||||
|
||||
spin_lock_irq(&tfcp_req->reqlock);
|
||||
spin_lock_irqsave(&tfcp_req->reqlock, flags);
|
||||
tfcp_req->active = false;
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
|
||||
|
||||
tgt_fcpreq->transferred_length = xfrlen;
|
||||
tgt_fcpreq->fcp_error = fcp_err;
|
||||
@ -896,15 +900,16 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
|
||||
struct nvmefc_tgt_fcp_req *tgt_fcpreq)
|
||||
{
|
||||
struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* mark aborted only in case there were 2 threads in transport
|
||||
* (one doing io, other doing abort) and only kills ops posted
|
||||
* after the abort request
|
||||
*/
|
||||
spin_lock_irq(&tfcp_req->reqlock);
|
||||
spin_lock_irqsave(&tfcp_req->reqlock, flags);
|
||||
tfcp_req->aborted = true;
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
|
||||
|
||||
tfcp_req->status = NVME_SC_INTERNAL;
|
||||
|
||||
@ -946,6 +951,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
|
||||
struct fcloop_ini_fcpreq *inireq = fcpreq->private;
|
||||
struct fcloop_fcpreq *tfcp_req;
|
||||
bool abortio = true;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock(&inireq->inilock);
|
||||
tfcp_req = inireq->tfcp_req;
|
||||
@ -958,7 +964,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
|
||||
return;
|
||||
|
||||
/* break initiator/target relationship for io */
|
||||
spin_lock_irq(&tfcp_req->reqlock);
|
||||
spin_lock_irqsave(&tfcp_req->reqlock, flags);
|
||||
switch (tfcp_req->inistate) {
|
||||
case INI_IO_START:
|
||||
case INI_IO_ACTIVE:
|
||||
@ -968,11 +974,11 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
|
||||
abortio = false;
|
||||
break;
|
||||
default:
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
|
||||
|
||||
if (abortio)
|
||||
/* leave the reference while the work item is scheduled */
|
||||
|
@ -581,8 +581,8 @@ bool nvmet_ns_revalidate(struct nvmet_ns *ns);
|
||||
u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts);
|
||||
|
||||
bool nvmet_bdev_zns_enable(struct nvmet_ns *ns);
|
||||
void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req);
|
||||
void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req);
|
||||
void nvmet_execute_identify_ctrl_zns(struct nvmet_req *req);
|
||||
void nvmet_execute_identify_ns_zns(struct nvmet_req *req);
|
||||
void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req);
|
||||
void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req);
|
||||
void nvmet_bdev_execute_zone_append(struct nvmet_req *req);
|
||||
@ -687,14 +687,6 @@ static inline bool nvmet_use_inline_bvec(struct nvmet_req *req)
|
||||
req->sg_cnt <= NVMET_MAX_INLINE_BIOVEC;
|
||||
}
|
||||
|
||||
static inline void nvmet_req_cns_error_complete(struct nvmet_req *req)
|
||||
{
|
||||
pr_debug("unhandled identify cns %d on qid %d\n",
|
||||
req->cmd->identify.cns, req->sq->qid);
|
||||
req->error_loc = offsetof(struct nvme_identify, cns);
|
||||
nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
|
||||
}
|
||||
|
||||
static inline void nvmet_req_bio_put(struct nvmet_req *req, struct bio *bio)
|
||||
{
|
||||
if (bio != &req->b.inline_bio)
|
||||
|
@ -20,6 +20,31 @@
|
||||
|
||||
#define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE)
|
||||
|
||||
static int param_store_val(const char *str, int *val, int min, int max)
|
||||
{
|
||||
int ret, new_val;
|
||||
|
||||
ret = kstrtoint(str, 10, &new_val);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
if (new_val < min || new_val > max)
|
||||
return -EINVAL;
|
||||
|
||||
*val = new_val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_params(const char *str, const struct kernel_param *kp)
|
||||
{
|
||||
return param_store_val(str, kp->arg, 0, INT_MAX);
|
||||
}
|
||||
|
||||
static const struct kernel_param_ops set_param_ops = {
|
||||
.set = set_params,
|
||||
.get = param_get_int,
|
||||
};
|
||||
|
||||
/* Define the socket priority to use for connections were it is desirable
|
||||
* that the NIC consider performing optimized packet processing or filtering.
|
||||
* A non-zero value being sufficient to indicate general consideration of any
|
||||
@ -27,8 +52,8 @@
|
||||
* values that may be unique for some NIC implementations.
|
||||
*/
|
||||
static int so_priority;
|
||||
module_param(so_priority, int, 0644);
|
||||
MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority");
|
||||
device_param_cb(so_priority, &set_param_ops, &so_priority, 0644);
|
||||
MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority: Default 0");
|
||||
|
||||
/* Define a time period (in usecs) that io_work() shall sample an activated
|
||||
* queue before determining it to be idle. This optional module behavior
|
||||
@ -36,9 +61,10 @@ MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority");
|
||||
* using advanced interrupt moderation techniques.
|
||||
*/
|
||||
static int idle_poll_period_usecs;
|
||||
module_param(idle_poll_period_usecs, int, 0644);
|
||||
device_param_cb(idle_poll_period_usecs, &set_param_ops,
|
||||
&idle_poll_period_usecs, 0644);
|
||||
MODULE_PARM_DESC(idle_poll_period_usecs,
|
||||
"nvmet tcp io_work poll till idle time period in usecs");
|
||||
"nvmet tcp io_work poll till idle time period in usecs: Default 0");
|
||||
|
||||
#define NVMET_TCP_RECV_BUDGET 8
|
||||
#define NVMET_TCP_SEND_BUDGET 8
|
||||
|
@ -70,7 +70,7 @@ bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
|
||||
return true;
|
||||
}
|
||||
|
||||
void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req)
|
||||
void nvmet_execute_identify_ctrl_zns(struct nvmet_req *req)
|
||||
{
|
||||
u8 zasl = req->sq->ctrl->subsys->zasl;
|
||||
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||
@ -95,9 +95,9 @@ out:
|
||||
nvmet_req_complete(req, status);
|
||||
}
|
||||
|
||||
void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
|
||||
void nvmet_execute_identify_ns_zns(struct nvmet_req *req)
|
||||
{
|
||||
struct nvme_id_ns_zns *id_zns;
|
||||
struct nvme_id_ns_zns *id_zns = NULL;
|
||||
u64 zsze;
|
||||
u16 status;
|
||||
u32 mar, mor;
|
||||
@ -118,16 +118,18 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
|
||||
if (status)
|
||||
goto done;
|
||||
|
||||
if (!bdev_is_zoned(req->ns->bdev)) {
|
||||
req->error_loc = offsetof(struct nvme_identify, nsid);
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (nvmet_ns_revalidate(req->ns)) {
|
||||
mutex_lock(&req->ns->subsys->lock);
|
||||
nvmet_ns_changed(req->ns->subsys, req->ns->nsid);
|
||||
mutex_unlock(&req->ns->subsys->lock);
|
||||
}
|
||||
|
||||
if (!bdev_is_zoned(req->ns->bdev)) {
|
||||
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
|
||||
req->error_loc = offsetof(struct nvme_identify, nsid);
|
||||
goto out;
|
||||
}
|
||||
|
||||
zsze = (bdev_zone_sectors(req->ns->bdev) << 9) >>
|
||||
req->ns->blksize_shift;
|
||||
id_zns->lbafe[0].zsze = cpu_to_le64(zsze);
|
||||
@ -148,8 +150,8 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
|
||||
|
||||
done:
|
||||
status = nvmet_copy_to_sgl(req, 0, id_zns, sizeof(*id_zns));
|
||||
kfree(id_zns);
|
||||
out:
|
||||
kfree(id_zns);
|
||||
nvmet_req_complete(req, status);
|
||||
}
|
||||
|
||||
|
@ -73,7 +73,8 @@ static void dasd_profile_init(struct dasd_profile *, struct dentry *);
|
||||
static void dasd_profile_exit(struct dasd_profile *);
|
||||
static void dasd_hosts_init(struct dentry *, struct dasd_device *);
|
||||
static void dasd_hosts_exit(struct dasd_device *);
|
||||
|
||||
static int dasd_handle_autoquiesce(struct dasd_device *, struct dasd_ccw_req *,
|
||||
unsigned int);
|
||||
/*
|
||||
* SECTION: Operations on the device structure.
|
||||
*/
|
||||
@ -1451,6 +1452,8 @@ int dasd_start_IO(struct dasd_ccw_req *cqr)
|
||||
case -ENODEV:
|
||||
DBF_DEV_EVENT(DBF_WARNING, device, "%s",
|
||||
"start_IO: -ENODEV device gone, retry");
|
||||
/* this is equivalent to CC=3 for SSCH report this to EER */
|
||||
dasd_handle_autoquiesce(device, cqr, DASD_EER_STARTIO);
|
||||
break;
|
||||
case -EIO:
|
||||
DBF_DEV_EVENT(DBF_WARNING, device, "%s",
|
||||
@ -1952,6 +1955,16 @@ static void __dasd_device_process_final_queue(struct dasd_device *device,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* check if device should be autoquiesced due to too many timeouts
|
||||
*/
|
||||
static void __dasd_device_check_autoquiesce_timeout(struct dasd_device *device,
|
||||
struct dasd_ccw_req *cqr)
|
||||
{
|
||||
if ((device->default_retries - cqr->retries) >= device->aq_timeouts)
|
||||
dasd_handle_autoquiesce(device, cqr, DASD_EER_TIMEOUTS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Take a look at the first request on the ccw queue and check
|
||||
* if it reached its expire time. If so, terminate the IO.
|
||||
@ -1986,6 +1999,7 @@ static void __dasd_device_check_expire(struct dasd_device *device)
|
||||
"remaining\n", cqr, (cqr->expires/HZ),
|
||||
cqr->retries);
|
||||
}
|
||||
__dasd_device_check_autoquiesce_timeout(device, cqr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2325,7 +2339,7 @@ static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible)
|
||||
/* Non-temporary stop condition will trigger fail fast */
|
||||
if (device->stopped & ~DASD_STOPPED_PENDING &&
|
||||
test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) &&
|
||||
(!dasd_eer_enabled(device))) {
|
||||
!dasd_eer_enabled(device) && device->aq_mask == 0) {
|
||||
cqr->status = DASD_CQR_FAILED;
|
||||
cqr->intrc = -ENOLINK;
|
||||
continue;
|
||||
@ -2801,20 +2815,18 @@ restart:
|
||||
dasd_log_sense(cqr, &cqr->irb);
|
||||
}
|
||||
|
||||
/* First of all call extended error reporting. */
|
||||
if (dasd_eer_enabled(base) &&
|
||||
cqr->status == DASD_CQR_FAILED) {
|
||||
dasd_eer_write(base, cqr, DASD_EER_FATALERROR);
|
||||
|
||||
/* restart request */
|
||||
/*
|
||||
* First call extended error reporting and check for autoquiesce
|
||||
*/
|
||||
spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags);
|
||||
if (cqr->status == DASD_CQR_FAILED &&
|
||||
dasd_handle_autoquiesce(base, cqr, DASD_EER_FATALERROR)) {
|
||||
cqr->status = DASD_CQR_FILLED;
|
||||
cqr->retries = 255;
|
||||
spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags);
|
||||
dasd_device_set_stop_bits(base, DASD_STOPPED_QUIESCE);
|
||||
spin_unlock_irqrestore(get_ccwdev_lock(base->cdev),
|
||||
flags);
|
||||
spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags);
|
||||
goto restart;
|
||||
}
|
||||
spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags);
|
||||
|
||||
/* Process finished ERP request. */
|
||||
if (cqr->refers) {
|
||||
@ -2856,7 +2868,7 @@ static void __dasd_block_start_head(struct dasd_block *block)
|
||||
/* Non-temporary stop condition will trigger fail fast */
|
||||
if (block->base->stopped & ~DASD_STOPPED_PENDING &&
|
||||
test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) &&
|
||||
(!dasd_eer_enabled(block->base))) {
|
||||
!dasd_eer_enabled(block->base) && block->base->aq_mask == 0) {
|
||||
cqr->status = DASD_CQR_FAILED;
|
||||
cqr->intrc = -ENOLINK;
|
||||
dasd_schedule_block_bh(block);
|
||||
@ -2941,7 +2953,7 @@ static int _dasd_requeue_request(struct dasd_ccw_req *cqr)
|
||||
return 0;
|
||||
spin_lock_irq(&cqr->dq->lock);
|
||||
req = (struct request *) cqr->callback_data;
|
||||
blk_mq_requeue_request(req, false);
|
||||
blk_mq_requeue_request(req, true);
|
||||
spin_unlock_irq(&cqr->dq->lock);
|
||||
|
||||
return 0;
|
||||
@ -3670,8 +3682,8 @@ int dasd_generic_last_path_gone(struct dasd_device *device)
|
||||
dev_warn(&device->cdev->dev, "No operational channel path is left "
|
||||
"for the device\n");
|
||||
DBF_DEV_EVENT(DBF_WARNING, device, "%s", "last path gone");
|
||||
/* First of all call extended error reporting. */
|
||||
dasd_eer_write(device, NULL, DASD_EER_NOPATH);
|
||||
/* First call extended error reporting and check for autoquiesce. */
|
||||
dasd_handle_autoquiesce(device, NULL, DASD_EER_NOPATH);
|
||||
|
||||
if (device->state < DASD_STATE_BASIC)
|
||||
return 0;
|
||||
@ -3803,7 +3815,8 @@ void dasd_generic_path_event(struct ccw_device *cdev, int *path_event)
|
||||
"No verified channel paths remain for the device\n");
|
||||
DBF_DEV_EVENT(DBF_WARNING, device,
|
||||
"%s", "last verified path gone");
|
||||
dasd_eer_write(device, NULL, DASD_EER_NOPATH);
|
||||
/* First call extended error reporting and check for autoquiesce. */
|
||||
dasd_handle_autoquiesce(device, NULL, DASD_EER_NOPATH);
|
||||
dasd_device_set_stop_bits(device,
|
||||
DASD_STOPPED_DC_WAIT);
|
||||
}
|
||||
@ -3825,7 +3838,8 @@ EXPORT_SYMBOL_GPL(dasd_generic_verify_path);
|
||||
void dasd_generic_space_exhaust(struct dasd_device *device,
|
||||
struct dasd_ccw_req *cqr)
|
||||
{
|
||||
dasd_eer_write(device, NULL, DASD_EER_NOSPC);
|
||||
/* First call extended error reporting and check for autoquiesce. */
|
||||
dasd_handle_autoquiesce(device, NULL, DASD_EER_NOSPC);
|
||||
|
||||
if (device->state < DASD_STATE_BASIC)
|
||||
return;
|
||||
@ -3958,6 +3972,31 @@ void dasd_schedule_requeue(struct dasd_device *device)
|
||||
}
|
||||
EXPORT_SYMBOL(dasd_schedule_requeue);
|
||||
|
||||
static int dasd_handle_autoquiesce(struct dasd_device *device,
|
||||
struct dasd_ccw_req *cqr,
|
||||
unsigned int reason)
|
||||
{
|
||||
/* in any case write eer message with reason */
|
||||
if (dasd_eer_enabled(device))
|
||||
dasd_eer_write(device, cqr, reason);
|
||||
|
||||
if (!test_bit(reason, &device->aq_mask))
|
||||
return 0;
|
||||
|
||||
/* notify eer about autoquiesce */
|
||||
if (dasd_eer_enabled(device))
|
||||
dasd_eer_write(device, NULL, DASD_EER_AUTOQUIESCE);
|
||||
|
||||
pr_info("%s: The DASD has been put in the quiesce state\n",
|
||||
dev_name(&device->cdev->dev));
|
||||
dasd_device_set_stop_bits(device, DASD_STOPPED_QUIESCE);
|
||||
|
||||
if (device->features & DASD_FEATURE_REQUEUEQUIESCE)
|
||||
dasd_schedule_requeue(device);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device,
|
||||
int rdc_buffer_size,
|
||||
int magic)
|
||||
|
@ -50,6 +50,7 @@ struct dasd_devmap {
|
||||
unsigned short features;
|
||||
struct dasd_device *device;
|
||||
struct dasd_copy_relation *copy;
|
||||
unsigned int aq_mask;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -1475,6 +1476,128 @@ dasd_eer_store(struct device *dev, struct device_attribute *attr,
|
||||
|
||||
static DEVICE_ATTR(eer_enabled, 0644, dasd_eer_show, dasd_eer_store);
|
||||
|
||||
/*
|
||||
* aq_mask controls if the DASD should be quiesced on certain triggers
|
||||
* The aq_mask attribute is interpreted as bitmap of the DASD_EER_* triggers.
|
||||
*/
|
||||
static ssize_t dasd_aq_mask_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct dasd_devmap *devmap;
|
||||
unsigned int aq_mask = 0;
|
||||
|
||||
devmap = dasd_find_busid(dev_name(dev));
|
||||
if (!IS_ERR(devmap))
|
||||
aq_mask = devmap->aq_mask;
|
||||
|
||||
return sysfs_emit(buf, "%d\n", aq_mask);
|
||||
}
|
||||
|
||||
static ssize_t dasd_aq_mask_store(struct device *dev, struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct dasd_devmap *devmap;
|
||||
unsigned int val;
|
||||
|
||||
if (kstrtouint(buf, 0, &val) || val > DASD_EER_VALID)
|
||||
return -EINVAL;
|
||||
|
||||
devmap = dasd_devmap_from_cdev(to_ccwdev(dev));
|
||||
if (IS_ERR(devmap))
|
||||
return PTR_ERR(devmap);
|
||||
|
||||
spin_lock(&dasd_devmap_lock);
|
||||
devmap->aq_mask = val;
|
||||
if (devmap->device)
|
||||
devmap->device->aq_mask = devmap->aq_mask;
|
||||
spin_unlock(&dasd_devmap_lock);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(aq_mask, 0644, dasd_aq_mask_show, dasd_aq_mask_store);
|
||||
|
||||
/*
|
||||
* aq_requeue controls if requests are returned to the blocklayer on quiesce
|
||||
* or if requests are only not started
|
||||
*/
|
||||
static ssize_t dasd_aqr_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct dasd_devmap *devmap;
|
||||
int flag;
|
||||
|
||||
devmap = dasd_find_busid(dev_name(dev));
|
||||
if (!IS_ERR(devmap))
|
||||
flag = (devmap->features & DASD_FEATURE_REQUEUEQUIESCE) != 0;
|
||||
else
|
||||
flag = (DASD_FEATURE_DEFAULT &
|
||||
DASD_FEATURE_REQUEUEQUIESCE) != 0;
|
||||
return sysfs_emit(buf, "%d\n", flag);
|
||||
}
|
||||
|
||||
static ssize_t dasd_aqr_store(struct device *dev, struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
bool val;
|
||||
int rc;
|
||||
|
||||
if (kstrtobool(buf, &val))
|
||||
return -EINVAL;
|
||||
|
||||
rc = dasd_set_feature(to_ccwdev(dev), DASD_FEATURE_REQUEUEQUIESCE, val);
|
||||
|
||||
return rc ? : count;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(aq_requeue, 0644, dasd_aqr_show, dasd_aqr_store);
|
||||
|
||||
/*
|
||||
* aq_timeouts controls how much retries have to time out until
|
||||
* a device gets autoquiesced
|
||||
*/
|
||||
static ssize_t
|
||||
dasd_aq_timeouts_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct dasd_device *device;
|
||||
int len;
|
||||
|
||||
device = dasd_device_from_cdev(to_ccwdev(dev));
|
||||
if (IS_ERR(device))
|
||||
return -ENODEV;
|
||||
len = sysfs_emit(buf, "%u\n", device->aq_timeouts);
|
||||
dasd_put_device(device);
|
||||
return len;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
dasd_aq_timeouts_store(struct device *dev, struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct dasd_device *device;
|
||||
unsigned int val;
|
||||
|
||||
device = dasd_device_from_cdev(to_ccwdev(dev));
|
||||
if (IS_ERR(device))
|
||||
return -ENODEV;
|
||||
|
||||
if ((kstrtouint(buf, 10, &val) != 0) ||
|
||||
val > DASD_RETRIES_MAX || val == 0) {
|
||||
dasd_put_device(device);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (val)
|
||||
device->aq_timeouts = val;
|
||||
|
||||
dasd_put_device(device);
|
||||
return count;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(aq_timeouts, 0644, dasd_aq_timeouts_show,
|
||||
dasd_aq_timeouts_store);
|
||||
|
||||
/*
|
||||
* expiration time for default requests
|
||||
*/
|
||||
@ -2324,6 +2447,9 @@ static struct attribute * dasd_attrs[] = {
|
||||
&dev_attr_copy_pair.attr,
|
||||
&dev_attr_copy_role.attr,
|
||||
&dev_attr_ping.attr,
|
||||
&dev_attr_aq_mask.attr,
|
||||
&dev_attr_aq_requeue.attr,
|
||||
&dev_attr_aq_timeouts.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@ -2109,6 +2109,7 @@ dasd_eckd_check_characteristics(struct dasd_device *device)
|
||||
device->default_retries = DASD_RETRIES;
|
||||
device->path_thrhld = DASD_ECKD_PATH_THRHLD;
|
||||
device->path_interval = DASD_ECKD_PATH_INTERVAL;
|
||||
device->aq_timeouts = DASD_RETRIES_MAX;
|
||||
|
||||
if (private->conf.gneq) {
|
||||
value = 1;
|
||||
|
@ -387,6 +387,7 @@ void dasd_eer_write(struct dasd_device *device, struct dasd_ccw_req *cqr,
|
||||
break;
|
||||
case DASD_EER_NOPATH:
|
||||
case DASD_EER_NOSPC:
|
||||
case DASD_EER_AUTOQUIESCE:
|
||||
dasd_eer_write_standard_trigger(device, NULL, id);
|
||||
break;
|
||||
case DASD_EER_STATECHANGE:
|
||||
|
@ -444,22 +444,22 @@ struct dasd_discipline {
|
||||
|
||||
extern struct dasd_discipline *dasd_diag_discipline_pointer;
|
||||
|
||||
/*
|
||||
* Notification numbers for extended error reporting notifications:
|
||||
* The DASD_EER_DISABLE notification is sent before a dasd_device (and it's
|
||||
* eer pointer) is freed. The error reporting module needs to do all necessary
|
||||
* cleanup steps.
|
||||
* The DASD_EER_TRIGGER notification sends the actual error reports (triggers).
|
||||
*/
|
||||
#define DASD_EER_DISABLE 0
|
||||
#define DASD_EER_TRIGGER 1
|
||||
/* Trigger IDs for extended error reporting DASD EER and autoquiesce */
|
||||
enum eer_trigger {
|
||||
DASD_EER_FATALERROR = 1,
|
||||
DASD_EER_NOPATH,
|
||||
DASD_EER_STATECHANGE,
|
||||
DASD_EER_PPRCSUSPEND,
|
||||
DASD_EER_NOSPC,
|
||||
DASD_EER_TIMEOUTS,
|
||||
DASD_EER_STARTIO,
|
||||
|
||||
/* Trigger IDs for extended error reporting DASD_EER_TRIGGER notification */
|
||||
#define DASD_EER_FATALERROR 1
|
||||
#define DASD_EER_NOPATH 2
|
||||
#define DASD_EER_STATECHANGE 3
|
||||
#define DASD_EER_PPRCSUSPEND 4
|
||||
#define DASD_EER_NOSPC 5
|
||||
/* enum end marker, only add new trigger above */
|
||||
DASD_EER_MAX,
|
||||
DASD_EER_AUTOQUIESCE = 31, /* internal only */
|
||||
};
|
||||
|
||||
#define DASD_EER_VALID ((1U << DASD_EER_MAX) - 1)
|
||||
|
||||
/* DASD path handling */
|
||||
|
||||
@ -637,6 +637,8 @@ struct dasd_device {
|
||||
struct dasd_format_entry format_entry;
|
||||
struct kset *paths_info;
|
||||
struct dasd_copy_relation *copy;
|
||||
unsigned long aq_mask;
|
||||
unsigned int aq_timeouts;
|
||||
};
|
||||
|
||||
struct dasd_block {
|
||||
|
@ -95,8 +95,8 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key,
|
||||
int blk_crypto_start_using_key(struct block_device *bdev,
|
||||
const struct blk_crypto_key *key);
|
||||
|
||||
int blk_crypto_evict_key(struct block_device *bdev,
|
||||
const struct blk_crypto_key *key);
|
||||
void blk_crypto_evict_key(struct block_device *bdev,
|
||||
const struct blk_crypto_key *key);
|
||||
|
||||
bool blk_crypto_config_supported_natively(struct block_device *bdev,
|
||||
const struct blk_crypto_config *cfg);
|
||||
|
@ -1,11 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_BLK_MQ_RDMA_H
|
||||
#define _LINUX_BLK_MQ_RDMA_H
|
||||
|
||||
struct blk_mq_tag_set;
|
||||
struct ib_device;
|
||||
|
||||
void blk_mq_rdma_map_queues(struct blk_mq_queue_map *map,
|
||||
struct ib_device *dev, int first_vec);
|
||||
|
||||
#endif /* _LINUX_BLK_MQ_RDMA_H */
|
@ -57,8 +57,6 @@ typedef __u32 __bitwise req_flags_t;
|
||||
#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
|
||||
/* The per-zone write lock is held for this request */
|
||||
#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
|
||||
/* already slept for hybrid poll */
|
||||
#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20))
|
||||
/* ->timeout has been called, don't expire again */
|
||||
#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21))
|
||||
/* queue has elevator attached */
|
||||
|
@ -40,26 +40,26 @@ struct bio_crypt_ctx;
|
||||
struct block_device {
|
||||
sector_t bd_start_sect;
|
||||
sector_t bd_nr_sectors;
|
||||
struct gendisk * bd_disk;
|
||||
struct request_queue * bd_queue;
|
||||
struct disk_stats __percpu *bd_stats;
|
||||
unsigned long bd_stamp;
|
||||
bool bd_read_only; /* read-only policy */
|
||||
u8 bd_partno;
|
||||
bool bd_write_holder;
|
||||
bool bd_has_submit_bio;
|
||||
dev_t bd_dev;
|
||||
atomic_t bd_openers;
|
||||
spinlock_t bd_size_lock; /* for bd_inode->i_size updates */
|
||||
struct inode * bd_inode; /* will die */
|
||||
struct super_block * bd_super;
|
||||
void * bd_claiming;
|
||||
struct device bd_device;
|
||||
void * bd_holder;
|
||||
int bd_holders;
|
||||
bool bd_write_holder;
|
||||
struct kobject *bd_holder_dir;
|
||||
u8 bd_partno;
|
||||
spinlock_t bd_size_lock; /* for bd_inode->i_size updates */
|
||||
struct gendisk * bd_disk;
|
||||
struct request_queue * bd_queue;
|
||||
|
||||
/* The counter of freeze processes */
|
||||
int bd_fsfreeze_count;
|
||||
int bd_holders;
|
||||
struct kobject *bd_holder_dir;
|
||||
|
||||
/* Mutex for freeze */
|
||||
struct mutex bd_fsfreeze_mutex;
|
||||
struct super_block *bd_fsfreeze_sb;
|
||||
@ -68,6 +68,11 @@ struct block_device {
|
||||
#ifdef CONFIG_FAIL_MAKE_REQUEST
|
||||
bool bd_make_it_fail;
|
||||
#endif
|
||||
/*
|
||||
* keep this out-of-line as it's both big and not needed in the fast
|
||||
* path
|
||||
*/
|
||||
struct device bd_device;
|
||||
} __randomize_layout;
|
||||
|
||||
#define bdev_whole(_bdev) \
|
||||
|
@ -44,12 +44,6 @@ extern const struct device_type disk_type;
|
||||
extern struct device_type part_type;
|
||||
extern struct class block_class;
|
||||
|
||||
/* Must be consistent with blk_mq_poll_stats_bkt() */
|
||||
#define BLK_MQ_POLL_STATS_BKTS 16
|
||||
|
||||
/* Doing classic polling */
|
||||
#define BLK_MQ_POLL_CLASSIC -1
|
||||
|
||||
/*
|
||||
* Maximum number of blkcg policies allowed to be registered concurrently.
|
||||
* Defined here to simplify include dependency.
|
||||
@ -468,10 +462,6 @@ struct request_queue {
|
||||
#endif
|
||||
|
||||
unsigned int rq_timeout;
|
||||
int poll_nsec;
|
||||
|
||||
struct blk_stat_callback *poll_cb;
|
||||
struct blk_rq_stat *poll_stat;
|
||||
|
||||
struct timer_list timeout;
|
||||
struct work_struct timeout_work;
|
||||
@ -870,8 +860,6 @@ blk_status_t errno_to_blk_status(int errno);
|
||||
|
||||
/* only poll the hardware once, don't continue until a completion was found */
|
||||
#define BLK_POLL_ONESHOT (1 << 0)
|
||||
/* do not sleep to wait for the expected completion time */
|
||||
#define BLK_POLL_NOSLEEP (1 << 1)
|
||||
int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags);
|
||||
int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob,
|
||||
unsigned int flags);
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/configfs.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/atomic.h>
|
||||
|
||||
@ -65,6 +66,27 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name,
|
||||
|
||||
#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
|
||||
|
||||
#ifdef CONFIG_FAULT_INJECTION_CONFIGFS
|
||||
|
||||
struct fault_config {
|
||||
struct fault_attr attr;
|
||||
struct config_group group;
|
||||
};
|
||||
|
||||
void fault_config_init(struct fault_config *config, const char *name);
|
||||
|
||||
#else /* CONFIG_FAULT_INJECTION_CONFIGFS */
|
||||
|
||||
struct fault_config {
|
||||
};
|
||||
|
||||
static inline void fault_config_init(struct fault_config *config,
|
||||
const char *name)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_FAULT_INJECTION_CONFIGFS */
|
||||
|
||||
#endif /* CONFIG_FAULT_INJECTION */
|
||||
|
||||
struct kmem_cache;
|
||||
|
@ -209,7 +209,7 @@ static int s_name ## _from_attrs_for_change(struct s_name *s, \
|
||||
* Magic: define op number to op name mapping {{{1
|
||||
* {{{2
|
||||
*/
|
||||
const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd)
|
||||
static const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd)
|
||||
{
|
||||
switch (cmd) {
|
||||
#undef GENL_op
|
||||
|
@ -45,6 +45,8 @@ static inline bool is_sed_ioctl(unsigned int cmd)
|
||||
case IOC_OPAL_WRITE_SHADOW_MBR:
|
||||
case IOC_OPAL_GENERIC_TABLE_RW:
|
||||
case IOC_OPAL_GET_STATUS:
|
||||
case IOC_OPAL_GET_LR_STATUS:
|
||||
case IOC_OPAL_GET_GEOMETRY:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -78,6 +78,16 @@ struct opal_user_lr_setup {
|
||||
struct opal_session_info session;
|
||||
};
|
||||
|
||||
struct opal_lr_status {
|
||||
struct opal_session_info session;
|
||||
__u64 range_start;
|
||||
__u64 range_length;
|
||||
__u32 RLE; /* Read Lock enabled */
|
||||
__u32 WLE; /* Write Lock Enabled */
|
||||
__u32 l_state;
|
||||
__u8 align[4];
|
||||
};
|
||||
|
||||
struct opal_lock_unlock {
|
||||
struct opal_session_info session;
|
||||
__u32 l_state;
|
||||
@ -151,6 +161,18 @@ struct opal_status {
|
||||
__u32 reserved;
|
||||
};
|
||||
|
||||
/*
|
||||
* Geometry Reporting per TCG Storage OPAL SSC
|
||||
* section 3.1.1.4
|
||||
*/
|
||||
struct opal_geometry {
|
||||
__u8 align;
|
||||
__u32 logical_block_size;
|
||||
__u64 alignment_granularity;
|
||||
__u64 lowest_aligned_lba;
|
||||
__u8 __align[3];
|
||||
};
|
||||
|
||||
#define IOC_OPAL_SAVE _IOW('p', 220, struct opal_lock_unlock)
|
||||
#define IOC_OPAL_LOCK_UNLOCK _IOW('p', 221, struct opal_lock_unlock)
|
||||
#define IOC_OPAL_TAKE_OWNERSHIP _IOW('p', 222, struct opal_key)
|
||||
@ -168,5 +190,7 @@ struct opal_status {
|
||||
#define IOC_OPAL_WRITE_SHADOW_MBR _IOW('p', 234, struct opal_shadow_mbr)
|
||||
#define IOC_OPAL_GENERIC_TABLE_RW _IOW('p', 235, struct opal_read_write_table)
|
||||
#define IOC_OPAL_GET_STATUS _IOR('p', 236, struct opal_status)
|
||||
#define IOC_OPAL_GET_LR_STATUS _IOW('p', 237, struct opal_lr_status)
|
||||
#define IOC_OPAL_GET_GEOMETRY _IOR('p', 238, struct opal_geometry)
|
||||
|
||||
#endif /* _UAPI_SED_OPAL_H */
|
||||
|
@ -8,6 +8,9 @@
|
||||
|
||||
/*
|
||||
* Admin commands, issued by ublk server, and handled by ublk driver.
|
||||
*
|
||||
* Legacy command definition, don't use in new application, and don't
|
||||
* add new such definition any more
|
||||
*/
|
||||
#define UBLK_CMD_GET_QUEUE_AFFINITY 0x01
|
||||
#define UBLK_CMD_GET_DEV_INFO 0x02
|
||||
@ -21,6 +24,30 @@
|
||||
#define UBLK_CMD_END_USER_RECOVERY 0x11
|
||||
#define UBLK_CMD_GET_DEV_INFO2 0x12
|
||||
|
||||
/* Any new ctrl command should encode by __IO*() */
|
||||
#define UBLK_U_CMD_GET_QUEUE_AFFINITY \
|
||||
_IOR('u', UBLK_CMD_GET_QUEUE_AFFINITY, struct ublksrv_ctrl_cmd)
|
||||
#define UBLK_U_CMD_GET_DEV_INFO \
|
||||
_IOR('u', UBLK_CMD_GET_DEV_INFO, struct ublksrv_ctrl_cmd)
|
||||
#define UBLK_U_CMD_ADD_DEV \
|
||||
_IOWR('u', UBLK_CMD_ADD_DEV, struct ublksrv_ctrl_cmd)
|
||||
#define UBLK_U_CMD_DEL_DEV \
|
||||
_IOWR('u', UBLK_CMD_DEL_DEV, struct ublksrv_ctrl_cmd)
|
||||
#define UBLK_U_CMD_START_DEV \
|
||||
_IOWR('u', UBLK_CMD_START_DEV, struct ublksrv_ctrl_cmd)
|
||||
#define UBLK_U_CMD_STOP_DEV \
|
||||
_IOWR('u', UBLK_CMD_STOP_DEV, struct ublksrv_ctrl_cmd)
|
||||
#define UBLK_U_CMD_SET_PARAMS \
|
||||
_IOWR('u', UBLK_CMD_SET_PARAMS, struct ublksrv_ctrl_cmd)
|
||||
#define UBLK_U_CMD_GET_PARAMS \
|
||||
_IOR('u', UBLK_CMD_GET_PARAMS, struct ublksrv_ctrl_cmd)
|
||||
#define UBLK_U_CMD_START_USER_RECOVERY \
|
||||
_IOWR('u', UBLK_CMD_START_USER_RECOVERY, struct ublksrv_ctrl_cmd)
|
||||
#define UBLK_U_CMD_END_USER_RECOVERY \
|
||||
_IOWR('u', UBLK_CMD_END_USER_RECOVERY, struct ublksrv_ctrl_cmd)
|
||||
#define UBLK_U_CMD_GET_DEV_INFO2 \
|
||||
_IOR('u', UBLK_CMD_GET_DEV_INFO2, struct ublksrv_ctrl_cmd)
|
||||
|
||||
/*
|
||||
* IO commands, issued by ublk server, and handled by ublk driver.
|
||||
*
|
||||
@ -41,10 +68,23 @@
|
||||
* It is only used if ublksrv set UBLK_F_NEED_GET_DATA flag
|
||||
* while starting a ublk device.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Legacy IO command definition, don't use in new application, and don't
|
||||
* add new such definition any more
|
||||
*/
|
||||
#define UBLK_IO_FETCH_REQ 0x20
|
||||
#define UBLK_IO_COMMIT_AND_FETCH_REQ 0x21
|
||||
#define UBLK_IO_NEED_GET_DATA 0x22
|
||||
|
||||
/* Any new IO command should encode by __IOWR() */
|
||||
#define UBLK_U_IO_FETCH_REQ \
|
||||
_IOWR('u', UBLK_IO_FETCH_REQ, struct ublksrv_io_cmd)
|
||||
#define UBLK_U_IO_COMMIT_AND_FETCH_REQ \
|
||||
_IOWR('u', UBLK_IO_COMMIT_AND_FETCH_REQ, struct ublksrv_io_cmd)
|
||||
#define UBLK_U_IO_NEED_GET_DATA \
|
||||
_IOWR('u', UBLK_IO_NEED_GET_DATA, struct ublksrv_io_cmd)
|
||||
|
||||
/* only ABORT means that no re-fetch */
|
||||
#define UBLK_IO_RES_OK 0
|
||||
#define UBLK_IO_RES_NEED_GET_DATA 1
|
||||
@ -102,6 +142,9 @@
|
||||
*/
|
||||
#define UBLK_F_UNPRIVILEGED_DEV (1UL << 5)
|
||||
|
||||
/* use ioctl encoding for uring command */
|
||||
#define UBLK_F_CMD_IOCTL_ENCODE (1UL << 6)
|
||||
|
||||
/* device state */
|
||||
#define UBLK_S_DEV_DEAD 0
|
||||
#define UBLK_S_DEV_LIVE 1
|
||||
|
@ -1001,7 +1001,7 @@ void io_rw_fail(struct io_kiocb *req)
|
||||
int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
|
||||
{
|
||||
struct io_wq_work_node *pos, *start, *prev;
|
||||
unsigned int poll_flags = BLK_POLL_NOSLEEP;
|
||||
unsigned int poll_flags = 0;
|
||||
DEFINE_IO_COMP_BATCH(iob);
|
||||
int nr_events = 0;
|
||||
|
||||
|
@ -1958,9 +1958,21 @@ config FAIL_SUNRPC
|
||||
Provide fault-injection capability for SunRPC and
|
||||
its consumers.
|
||||
|
||||
config FAULT_INJECTION_CONFIGFS
|
||||
bool "Configfs interface for fault-injection capabilities"
|
||||
depends on FAULT_INJECTION
|
||||
select CONFIGFS_FS
|
||||
help
|
||||
This option allows configfs-based drivers to dynamically configure
|
||||
fault-injection via configfs. Each parameter for driver-specific
|
||||
fault-injection can be made visible as a configfs attribute in a
|
||||
configfs group.
|
||||
|
||||
|
||||
config FAULT_INJECTION_STACKTRACE_FILTER
|
||||
bool "stacktrace filter for fault-injection capabilities"
|
||||
depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
|
||||
depends on FAULT_INJECTION
|
||||
depends on (FAULT_INJECTION_DEBUG_FS || FAULT_INJECTION_CONFIGFS) && STACKTRACE_SUPPORT
|
||||
select STACKTRACE
|
||||
depends on FRAME_POINTER || MIPS || PPC || S390 || MICROBLAZE || ARM || ARC || X86
|
||||
help
|
||||
|
@ -244,3 +244,194 @@ struct dentry *fault_create_debugfs_attr(const char *name,
|
||||
EXPORT_SYMBOL_GPL(fault_create_debugfs_attr);
|
||||
|
||||
#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
|
||||
|
||||
#ifdef CONFIG_FAULT_INJECTION_CONFIGFS
|
||||
|
||||
/* These configfs attribute utilities are copied from drivers/block/null_blk/main.c */
|
||||
|
||||
static ssize_t fault_uint_attr_show(unsigned int val, char *page)
|
||||
{
|
||||
return snprintf(page, PAGE_SIZE, "%u\n", val);
|
||||
}
|
||||
|
||||
static ssize_t fault_ulong_attr_show(unsigned long val, char *page)
|
||||
{
|
||||
return snprintf(page, PAGE_SIZE, "%lu\n", val);
|
||||
}
|
||||
|
||||
static ssize_t fault_bool_attr_show(bool val, char *page)
|
||||
{
|
||||
return snprintf(page, PAGE_SIZE, "%u\n", val);
|
||||
}
|
||||
|
||||
static ssize_t fault_atomic_t_attr_show(atomic_t val, char *page)
|
||||
{
|
||||
return snprintf(page, PAGE_SIZE, "%d\n", atomic_read(&val));
|
||||
}
|
||||
|
||||
static ssize_t fault_uint_attr_store(unsigned int *val, const char *page, size_t count)
|
||||
{
|
||||
unsigned int tmp;
|
||||
int result;
|
||||
|
||||
result = kstrtouint(page, 0, &tmp);
|
||||
if (result < 0)
|
||||
return result;
|
||||
|
||||
*val = tmp;
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t fault_ulong_attr_store(unsigned long *val, const char *page, size_t count)
|
||||
{
|
||||
int result;
|
||||
unsigned long tmp;
|
||||
|
||||
result = kstrtoul(page, 0, &tmp);
|
||||
if (result < 0)
|
||||
return result;
|
||||
|
||||
*val = tmp;
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t fault_bool_attr_store(bool *val, const char *page, size_t count)
|
||||
{
|
||||
bool tmp;
|
||||
int result;
|
||||
|
||||
result = kstrtobool(page, &tmp);
|
||||
if (result < 0)
|
||||
return result;
|
||||
|
||||
*val = tmp;
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t fault_atomic_t_attr_store(atomic_t *val, const char *page, size_t count)
|
||||
{
|
||||
int tmp;
|
||||
int result;
|
||||
|
||||
result = kstrtoint(page, 0, &tmp);
|
||||
if (result < 0)
|
||||
return result;
|
||||
|
||||
atomic_set(val, tmp);
|
||||
return count;
|
||||
}
|
||||
|
||||
#define CONFIGFS_ATTR_NAMED(_pfx, _name, _attr_name) \
|
||||
static struct configfs_attribute _pfx##attr_##_name = { \
|
||||
.ca_name = _attr_name, \
|
||||
.ca_mode = 0644, \
|
||||
.ca_owner = THIS_MODULE, \
|
||||
.show = _pfx##_name##_show, \
|
||||
.store = _pfx##_name##_store, \
|
||||
}
|
||||
|
||||
static struct fault_config *to_fault_config(struct config_item *item)
|
||||
{
|
||||
return container_of(to_config_group(item), struct fault_config, group);
|
||||
}
|
||||
|
||||
#define FAULT_CONFIGFS_ATTR_NAMED(NAME, ATTR_NAME, MEMBER, TYPE) \
|
||||
static ssize_t fault_##NAME##_show(struct config_item *item, char *page) \
|
||||
{ \
|
||||
return fault_##TYPE##_attr_show(to_fault_config(item)->attr.MEMBER, page); \
|
||||
} \
|
||||
static ssize_t fault_##NAME##_store(struct config_item *item, const char *page, size_t count) \
|
||||
{ \
|
||||
struct fault_config *config = to_fault_config(item); \
|
||||
return fault_##TYPE##_attr_store(&config->attr.MEMBER, page, count); \
|
||||
} \
|
||||
CONFIGFS_ATTR_NAMED(fault_, NAME, ATTR_NAME)
|
||||
|
||||
#define FAULT_CONFIGFS_ATTR(NAME, TYPE) \
|
||||
FAULT_CONFIGFS_ATTR_NAMED(NAME, __stringify(NAME), NAME, TYPE)
|
||||
|
||||
FAULT_CONFIGFS_ATTR(probability, ulong);
|
||||
FAULT_CONFIGFS_ATTR(interval, ulong);
|
||||
FAULT_CONFIGFS_ATTR(times, atomic_t);
|
||||
FAULT_CONFIGFS_ATTR(space, atomic_t);
|
||||
FAULT_CONFIGFS_ATTR(verbose, ulong);
|
||||
FAULT_CONFIGFS_ATTR_NAMED(ratelimit_interval, "verbose_ratelimit_interval_ms",
|
||||
ratelimit_state.interval, uint);
|
||||
FAULT_CONFIGFS_ATTR_NAMED(ratelimit_burst, "verbose_ratelimit_burst",
|
||||
ratelimit_state.burst, uint);
|
||||
FAULT_CONFIGFS_ATTR_NAMED(task_filter, "task-filter", task_filter, bool);
|
||||
|
||||
#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER
|
||||
|
||||
static ssize_t fault_stacktrace_depth_show(struct config_item *item, char *page)
|
||||
{
|
||||
return fault_ulong_attr_show(to_fault_config(item)->attr.stacktrace_depth, page);
|
||||
}
|
||||
|
||||
static ssize_t fault_stacktrace_depth_store(struct config_item *item, const char *page,
|
||||
size_t count)
|
||||
{
|
||||
int result;
|
||||
unsigned long tmp;
|
||||
|
||||
result = kstrtoul(page, 0, &tmp);
|
||||
if (result < 0)
|
||||
return result;
|
||||
|
||||
to_fault_config(item)->attr.stacktrace_depth =
|
||||
min_t(unsigned long, tmp, MAX_STACK_TRACE_DEPTH);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
CONFIGFS_ATTR_NAMED(fault_, stacktrace_depth, "stacktrace-depth");
|
||||
|
||||
static ssize_t fault_xul_attr_show(unsigned long val, char *page)
|
||||
{
|
||||
return snprintf(page, PAGE_SIZE,
|
||||
sizeof(val) == sizeof(u32) ? "0x%08lx\n" : "0x%016lx\n", val);
|
||||
}
|
||||
|
||||
static ssize_t fault_xul_attr_store(unsigned long *val, const char *page, size_t count)
|
||||
{
|
||||
return fault_ulong_attr_store(val, page, count);
|
||||
}
|
||||
|
||||
FAULT_CONFIGFS_ATTR_NAMED(require_start, "require-start", require_start, xul);
|
||||
FAULT_CONFIGFS_ATTR_NAMED(require_end, "require-end", require_end, xul);
|
||||
FAULT_CONFIGFS_ATTR_NAMED(reject_start, "reject-start", reject_start, xul);
|
||||
FAULT_CONFIGFS_ATTR_NAMED(reject_end, "reject-end", reject_end, xul);
|
||||
|
||||
#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
|
||||
|
||||
static struct configfs_attribute *fault_config_attrs[] = {
|
||||
&fault_attr_probability,
|
||||
&fault_attr_interval,
|
||||
&fault_attr_times,
|
||||
&fault_attr_space,
|
||||
&fault_attr_verbose,
|
||||
&fault_attr_ratelimit_interval,
|
||||
&fault_attr_ratelimit_burst,
|
||||
&fault_attr_task_filter,
|
||||
#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER
|
||||
&fault_attr_stacktrace_depth,
|
||||
&fault_attr_require_start,
|
||||
&fault_attr_require_end,
|
||||
&fault_attr_reject_start,
|
||||
&fault_attr_reject_end,
|
||||
#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct config_item_type fault_config_type = {
|
||||
.ct_attrs = fault_config_attrs,
|
||||
.ct_owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
void fault_config_init(struct fault_config *config, const char *name)
|
||||
{
|
||||
config_group_init_type_name(&config->group, name, &fault_config_type);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(fault_config_init);
|
||||
|
||||
#endif /* CONFIG_FAULT_INJECTION_CONFIGFS */
|
||||
|
Loading…
Reference in New Issue
Block a user