Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "A smaller collection of fixes that should go into -rc1. This contains: - A fix from Christoph, fixing a regression with the WRITE_SAME and partial completions. Caused a BUG() on ppc. - Fixup for __blk_mq_stop_hw_queues(), it should be static. From Colin. - Removal of dmesg error messages on elevator switching, when invoked from sysfs. From me. - Fix for blk-stat, using this_cpu_ptr() in a section only protected by rcu_read_lock(). This breaks when PREEMPT_RCU is enabled. From me. - Two fixes for BFQ from Paolo, one fixing a crash and one updating the documentation. - An error handling lightnvm memory leak, from Rakesh. - The previous blk-mq hot unplug lock reversal depends on the CPU hotplug rework that isn't in mainline yet. This caused a lockdep splat when people unplugged CPUs with blk-mq devices. From Wanpeng. - A regression fix for DIF/DIX on blk-mq. From Wen" * 'for-linus' of git://git.kernel.dk/linux-block: block: handle partial completions for special payload requests blk-mq: NVMe 512B/4K+T10 DIF/DIX format returns I/O error on dd with split op blk-stat: don't use this_cpu_ptr() in a preemptable section elevator: remove redundant warnings on IO scheduler switch block, bfq: stress that low_latency must be off to get max throughput block, bfq: use pointer entity->sched_data only if set nvme: lightnvm: fix memory leak blk-mq: make __blk_mq_stop_hw_queues static lightnvm: remove unused rq parameter of nvme_nvm_rqtocmd() to kill warning block/mq: fix potential deadlock during cpu hotplug
This commit is contained in:
commit
55a1ab56c7
@ -11,6 +11,13 @@ controllers), BFQ's main features are:
|
|||||||
groups (switching back to time distribution when needed to keep
|
groups (switching back to time distribution when needed to keep
|
||||||
throughput high).
|
throughput high).
|
||||||
|
|
||||||
|
In its default configuration, BFQ privileges latency over
|
||||||
|
throughput. So, when needed for achieving a lower latency, BFQ builds
|
||||||
|
schedules that may lead to a lower throughput. If your main or only
|
||||||
|
goal, for a given device, is to achieve the maximum-possible
|
||||||
|
throughput at all times, then do switch off all low-latency heuristics
|
||||||
|
for that device, by setting low_latency to 0. Full details in Section 3.
|
||||||
|
|
||||||
On average CPUs, the current version of BFQ can handle devices
|
On average CPUs, the current version of BFQ can handle devices
|
||||||
performing at most ~30K IOPS; at most ~50 KIOPS on faster CPUs. As a
|
performing at most ~30K IOPS; at most ~50 KIOPS on faster CPUs. As a
|
||||||
reference, 30-50 KIOPS correspond to very high bandwidths with
|
reference, 30-50 KIOPS correspond to very high bandwidths with
|
||||||
@ -375,11 +382,19 @@ default, low latency mode is enabled. If enabled, interactive and soft
|
|||||||
real-time applications are privileged and experience a lower latency,
|
real-time applications are privileged and experience a lower latency,
|
||||||
as explained in more detail in the description of how BFQ works.
|
as explained in more detail in the description of how BFQ works.
|
||||||
|
|
||||||
DO NOT enable this mode if you need full control on bandwidth
|
DISABLE this mode if you need full control on bandwidth
|
||||||
distribution. In fact, if it is enabled, then BFQ automatically
|
distribution. In fact, if it is enabled, then BFQ automatically
|
||||||
increases the bandwidth share of privileged applications, as the main
|
increases the bandwidth share of privileged applications, as the main
|
||||||
means to guarantee a lower latency to them.
|
means to guarantee a lower latency to them.
|
||||||
|
|
||||||
|
In addition, as already highlighted at the beginning of this document,
|
||||||
|
DISABLE this mode if your only goal is to achieve a high throughput.
|
||||||
|
In fact, privileging the I/O of some application over the rest may
|
||||||
|
entail a lower throughput. To achieve the highest-possible throughput
|
||||||
|
on a non-rotational device, setting slice_idle to 0 may be needed too
|
||||||
|
(at the cost of giving up any strong guarantee on fairness and low
|
||||||
|
latency).
|
||||||
|
|
||||||
timeout_sync
|
timeout_sync
|
||||||
------------
|
------------
|
||||||
|
|
||||||
|
@ -56,6 +56,11 @@
|
|||||||
* rotational or flash-based devices, and to get the job done quickly
|
* rotational or flash-based devices, and to get the job done quickly
|
||||||
* for applications consisting in many I/O-bound processes.
|
* for applications consisting in many I/O-bound processes.
|
||||||
*
|
*
|
||||||
|
* NOTE: if the main or only goal, with a given device, is to achieve
|
||||||
|
* the maximum-possible throughput at all times, then do switch off
|
||||||
|
* all low-latency heuristics for that device, by setting low_latency
|
||||||
|
* to 0.
|
||||||
|
*
|
||||||
* BFQ is described in [1], where also a reference to the initial, more
|
* BFQ is described in [1], where also a reference to the initial, more
|
||||||
* theoretical paper on BFQ can be found. The interested reader can find
|
* theoretical paper on BFQ can be found. The interested reader can find
|
||||||
* in the latter paper full details on the main algorithm, as well as
|
* in the latter paper full details on the main algorithm, as well as
|
||||||
|
@ -1114,12 +1114,21 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity,
|
|||||||
bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
|
bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
|
||||||
{
|
{
|
||||||
struct bfq_sched_data *sd = entity->sched_data;
|
struct bfq_sched_data *sd = entity->sched_data;
|
||||||
struct bfq_service_tree *st = bfq_entity_service_tree(entity);
|
struct bfq_service_tree *st;
|
||||||
int is_in_service = entity == sd->in_service_entity;
|
bool is_in_service;
|
||||||
|
|
||||||
if (!entity->on_st) /* entity never activated, or already inactive */
|
if (!entity->on_st) /* entity never activated, or already inactive */
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we get here, then entity is active, which implies that
|
||||||
|
* bfq_group_set_parent has already been invoked for the group
|
||||||
|
* represented by entity. Therefore, the field
|
||||||
|
* entity->sched_data has been set, and we can safely use it.
|
||||||
|
*/
|
||||||
|
st = bfq_entity_service_tree(entity);
|
||||||
|
is_in_service = entity == sd->in_service_entity;
|
||||||
|
|
||||||
if (is_in_service)
|
if (is_in_service)
|
||||||
bfq_calc_finish(entity, entity->service);
|
bfq_calc_finish(entity, entity->service);
|
||||||
|
|
||||||
|
@ -2644,8 +2644,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_ON_ONCE(req->rq_flags & RQF_SPECIAL_PAYLOAD);
|
|
||||||
|
|
||||||
req->__data_len -= total_bytes;
|
req->__data_len -= total_bytes;
|
||||||
|
|
||||||
/* update sector only for requests with clear definition of sector */
|
/* update sector only for requests with clear definition of sector */
|
||||||
@ -2658,17 +2656,19 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
|
|||||||
req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;
|
req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) {
|
||||||
* If total number of sectors is less than the first segment
|
/*
|
||||||
* size, something has gone terribly wrong.
|
* If total number of sectors is less than the first segment
|
||||||
*/
|
* size, something has gone terribly wrong.
|
||||||
if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
|
*/
|
||||||
blk_dump_rq_flags(req, "request botched");
|
if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
|
||||||
req->__data_len = blk_rq_cur_bytes(req);
|
blk_dump_rq_flags(req, "request botched");
|
||||||
}
|
req->__data_len = blk_rq_cur_bytes(req);
|
||||||
|
}
|
||||||
|
|
||||||
/* recalculate the number of segments */
|
/* recalculate the number of segments */
|
||||||
blk_recalc_rq_segments(req);
|
blk_recalc_rq_segments(req);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -1236,7 +1236,7 @@ void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_mq_stop_hw_queue);
|
EXPORT_SYMBOL(blk_mq_stop_hw_queue);
|
||||||
|
|
||||||
void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync)
|
static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync)
|
||||||
{
|
{
|
||||||
struct blk_mq_hw_ctx *hctx;
|
struct blk_mq_hw_ctx *hctx;
|
||||||
int i;
|
int i;
|
||||||
@ -1554,13 +1554,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
|||||||
|
|
||||||
blk_queue_bounce(q, &bio);
|
blk_queue_bounce(q, &bio);
|
||||||
|
|
||||||
|
blk_queue_split(q, &bio, q->bio_split);
|
||||||
|
|
||||||
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
|
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
|
||||||
bio_io_error(bio);
|
bio_io_error(bio);
|
||||||
return BLK_QC_T_NONE;
|
return BLK_QC_T_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
blk_queue_split(q, &bio, q->bio_split);
|
|
||||||
|
|
||||||
if (!is_flush_fua && !blk_queue_nomerges(q) &&
|
if (!is_flush_fua && !blk_queue_nomerges(q) &&
|
||||||
blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
|
blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
|
||||||
return BLK_QC_T_NONE;
|
return BLK_QC_T_NONE;
|
||||||
@ -2341,15 +2341,15 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
|||||||
|
|
||||||
blk_mq_init_cpu_queues(q, set->nr_hw_queues);
|
blk_mq_init_cpu_queues(q, set->nr_hw_queues);
|
||||||
|
|
||||||
mutex_lock(&all_q_mutex);
|
|
||||||
get_online_cpus();
|
get_online_cpus();
|
||||||
|
mutex_lock(&all_q_mutex);
|
||||||
|
|
||||||
list_add_tail(&q->all_q_node, &all_q_list);
|
list_add_tail(&q->all_q_node, &all_q_list);
|
||||||
blk_mq_add_queue_tag_set(set, q);
|
blk_mq_add_queue_tag_set(set, q);
|
||||||
blk_mq_map_swqueue(q, cpu_online_mask);
|
blk_mq_map_swqueue(q, cpu_online_mask);
|
||||||
|
|
||||||
put_online_cpus();
|
|
||||||
mutex_unlock(&all_q_mutex);
|
mutex_unlock(&all_q_mutex);
|
||||||
|
put_online_cpus();
|
||||||
|
|
||||||
if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
|
if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
|
||||||
int ret;
|
int ret;
|
||||||
|
@ -96,13 +96,16 @@ void blk_stat_add(struct request *rq)
|
|||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
list_for_each_entry_rcu(cb, &q->stats->callbacks, list) {
|
list_for_each_entry_rcu(cb, &q->stats->callbacks, list) {
|
||||||
if (blk_stat_is_active(cb)) {
|
if (!blk_stat_is_active(cb))
|
||||||
bucket = cb->bucket_fn(rq);
|
continue;
|
||||||
if (bucket < 0)
|
|
||||||
continue;
|
bucket = cb->bucket_fn(rq);
|
||||||
stat = &this_cpu_ptr(cb->cpu_stat)[bucket];
|
if (bucket < 0)
|
||||||
__blk_stat_add(stat, value);
|
continue;
|
||||||
}
|
|
||||||
|
stat = &get_cpu_ptr(cb->cpu_stat)[bucket];
|
||||||
|
__blk_stat_add(stat, value);
|
||||||
|
put_cpu_ptr(cb->cpu_stat);
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
@ -1062,10 +1062,8 @@ static int __elevator_change(struct request_queue *q, const char *name)
|
|||||||
|
|
||||||
strlcpy(elevator_name, name, sizeof(elevator_name));
|
strlcpy(elevator_name, name, sizeof(elevator_name));
|
||||||
e = elevator_get(strstrip(elevator_name), true);
|
e = elevator_get(strstrip(elevator_name), true);
|
||||||
if (!e) {
|
if (!e)
|
||||||
printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
|
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
|
||||||
|
|
||||||
if (q->elevator &&
|
if (q->elevator &&
|
||||||
!strcmp(elevator_name, q->elevator->type->elevator_name)) {
|
!strcmp(elevator_name, q->elevator->type->elevator_name)) {
|
||||||
@ -1105,7 +1103,6 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
|
|||||||
if (!ret)
|
if (!ret)
|
||||||
return count;
|
return count;
|
||||||
|
|
||||||
printk(KERN_ERR "elevator: switch to %s failed\n", name);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -367,7 +367,8 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
|
|||||||
|
|
||||||
if (unlikely(elba > nvmdev->total_secs)) {
|
if (unlikely(elba > nvmdev->total_secs)) {
|
||||||
pr_err("nvm: L2P data from device is out of bounds!\n");
|
pr_err("nvm: L2P data from device is out of bounds!\n");
|
||||||
return -EINVAL;
|
ret = -EINVAL;
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Transform physical address to target address space */
|
/* Transform physical address to target address space */
|
||||||
@ -464,8 +465,8 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd,
|
static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
|
||||||
struct nvme_ns *ns, struct nvme_nvm_command *c)
|
struct nvme_nvm_command *c)
|
||||||
{
|
{
|
||||||
c->ph_rw.opcode = rqd->opcode;
|
c->ph_rw.opcode = rqd->opcode;
|
||||||
c->ph_rw.nsid = cpu_to_le32(ns->ns_id);
|
c->ph_rw.nsid = cpu_to_le32(ns->ns_id);
|
||||||
@ -503,7 +504,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
|
|||||||
if (!cmd)
|
if (!cmd)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
nvme_nvm_rqtocmd(rq, rqd, ns, cmd);
|
nvme_nvm_rqtocmd(rqd, ns, cmd);
|
||||||
|
|
||||||
rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY);
|
rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY);
|
||||||
if (IS_ERR(rq)) {
|
if (IS_ERR(rq)) {
|
||||||
|
Loading…
Reference in New Issue
Block a user