block: move blk-throtl fast path inline
Even if no policies are defined, we spend ~2% of the total IO time checking. Move the fast path inline. Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
079a2e3e86
commit
a7b36ee6ba
@ -32,6 +32,7 @@
|
||||
#include <linux/psi.h>
|
||||
#include "blk.h"
|
||||
#include "blk-ioprio.h"
|
||||
#include "blk-throttle.h"
|
||||
|
||||
/*
|
||||
* blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
|
||||
|
@ -50,6 +50,7 @@
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-pm.h"
|
||||
#include "blk-throttle.h"
|
||||
|
||||
struct dentry *blk_debugfs_root;
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
#include "blk.h"
|
||||
#include "blk-rq-qos.h"
|
||||
#include "blk-throttle.h"
|
||||
|
||||
static inline bool bio_will_gap(struct request_queue *q,
|
||||
struct request *prev_rq, struct bio *prev, struct bio *next)
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-wbt.h"
|
||||
#include "blk-throttle.h"
|
||||
|
||||
struct queue_sysfs_entry {
|
||||
struct attribute attr;
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <linux/blk-cgroup.h>
|
||||
#include "blk.h"
|
||||
#include "blk-cgroup-rwstat.h"
|
||||
#include "blk-throttle.h"
|
||||
|
||||
/* Max dispatch from a group in 1 round */
|
||||
#define THROTL_GRP_QUANTUM 8
|
||||
@ -37,60 +38,9 @@
|
||||
*/
|
||||
#define LATENCY_FILTERED_HD (1000L) /* 1ms */
|
||||
|
||||
static struct blkcg_policy blkcg_policy_throtl;
|
||||
|
||||
/* A workqueue to queue throttle related work */
|
||||
static struct workqueue_struct *kthrotld_workqueue;
|
||||
|
||||
/*
|
||||
* To implement hierarchical throttling, throtl_grps form a tree and bios
|
||||
* are dispatched upwards level by level until they reach the top and get
|
||||
* issued. When dispatching bios from the children and local group at each
|
||||
* level, if the bios are dispatched into a single bio_list, there's a risk
|
||||
* of a local or child group which can queue many bios at once filling up
|
||||
* the list starving others.
|
||||
*
|
||||
* To avoid such starvation, dispatched bios are queued separately
|
||||
* according to where they came from. When they are again dispatched to
|
||||
* the parent, they're popped in round-robin order so that no single source
|
||||
* hogs the dispatch window.
|
||||
*
|
||||
* throtl_qnode is used to keep the queued bios separated by their sources.
|
||||
* Bios are queued to throtl_qnode which in turn is queued to
|
||||
* throtl_service_queue and then dispatched in round-robin order.
|
||||
*
|
||||
* It's also used to track the reference counts on blkg's. A qnode always
|
||||
* belongs to a throtl_grp and gets queued on itself or the parent, so
|
||||
* incrementing the reference of the associated throtl_grp when a qnode is
|
||||
* queued and decrementing when dequeued is enough to keep the whole blkg
|
||||
* tree pinned while bios are in flight.
|
||||
*/
|
||||
struct throtl_qnode {
|
||||
struct list_head node; /* service_queue->queued[] */
|
||||
struct bio_list bios; /* queued bios */
|
||||
struct throtl_grp *tg; /* tg this qnode belongs to */
|
||||
};
|
||||
|
||||
struct throtl_service_queue {
|
||||
struct throtl_service_queue *parent_sq; /* the parent service_queue */
|
||||
|
||||
/*
|
||||
* Bios queued directly to this service_queue or dispatched from
|
||||
* children throtl_grp's.
|
||||
*/
|
||||
struct list_head queued[2]; /* throtl_qnode [READ/WRITE] */
|
||||
unsigned int nr_queued[2]; /* number of queued bios */
|
||||
|
||||
/*
|
||||
* RB tree of active children throtl_grp's, which are sorted by
|
||||
* their ->disptime.
|
||||
*/
|
||||
struct rb_root_cached pending_tree; /* RB tree of active tgs */
|
||||
unsigned int nr_pending; /* # queued in the tree */
|
||||
unsigned long first_pending_disptime; /* disptime of the first tg */
|
||||
struct timer_list pending_timer; /* fires on first_pending_disptime */
|
||||
};
|
||||
|
||||
enum tg_state_flags {
|
||||
THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */
|
||||
THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */
|
||||
@ -98,93 +48,6 @@ enum tg_state_flags {
|
||||
|
||||
#define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node)
|
||||
|
||||
enum {
|
||||
LIMIT_LOW,
|
||||
LIMIT_MAX,
|
||||
LIMIT_CNT,
|
||||
};
|
||||
|
||||
struct throtl_grp {
|
||||
/* must be the first member */
|
||||
struct blkg_policy_data pd;
|
||||
|
||||
/* active throtl group service_queue member */
|
||||
struct rb_node rb_node;
|
||||
|
||||
/* throtl_data this group belongs to */
|
||||
struct throtl_data *td;
|
||||
|
||||
/* this group's service queue */
|
||||
struct throtl_service_queue service_queue;
|
||||
|
||||
/*
|
||||
* qnode_on_self is used when bios are directly queued to this
|
||||
* throtl_grp so that local bios compete fairly with bios
|
||||
* dispatched from children. qnode_on_parent is used when bios are
|
||||
* dispatched from this throtl_grp into its parent and will compete
|
||||
* with the sibling qnode_on_parents and the parent's
|
||||
* qnode_on_self.
|
||||
*/
|
||||
struct throtl_qnode qnode_on_self[2];
|
||||
struct throtl_qnode qnode_on_parent[2];
|
||||
|
||||
/*
|
||||
* Dispatch time in jiffies. This is the estimated time when group
|
||||
* will unthrottle and is ready to dispatch more bio. It is used as
|
||||
* key to sort active groups in service tree.
|
||||
*/
|
||||
unsigned long disptime;
|
||||
|
||||
unsigned int flags;
|
||||
|
||||
/* are there any throtl rules between this group and td? */
|
||||
bool has_rules[2];
|
||||
|
||||
/* internally used bytes per second rate limits */
|
||||
uint64_t bps[2][LIMIT_CNT];
|
||||
/* user configured bps limits */
|
||||
uint64_t bps_conf[2][LIMIT_CNT];
|
||||
|
||||
/* internally used IOPS limits */
|
||||
unsigned int iops[2][LIMIT_CNT];
|
||||
/* user configured IOPS limits */
|
||||
unsigned int iops_conf[2][LIMIT_CNT];
|
||||
|
||||
/* Number of bytes dispatched in current slice */
|
||||
uint64_t bytes_disp[2];
|
||||
/* Number of bio's dispatched in current slice */
|
||||
unsigned int io_disp[2];
|
||||
|
||||
unsigned long last_low_overflow_time[2];
|
||||
|
||||
uint64_t last_bytes_disp[2];
|
||||
unsigned int last_io_disp[2];
|
||||
|
||||
unsigned long last_check_time;
|
||||
|
||||
unsigned long latency_target; /* us */
|
||||
unsigned long latency_target_conf; /* us */
|
||||
/* When did we start a new slice */
|
||||
unsigned long slice_start[2];
|
||||
unsigned long slice_end[2];
|
||||
|
||||
unsigned long last_finish_time; /* ns / 1024 */
|
||||
unsigned long checked_last_finish_time; /* ns / 1024 */
|
||||
unsigned long avg_idletime; /* ns / 1024 */
|
||||
unsigned long idletime_threshold; /* us */
|
||||
unsigned long idletime_threshold_conf; /* us */
|
||||
|
||||
unsigned int bio_cnt; /* total bios */
|
||||
unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
|
||||
unsigned long bio_cnt_reset_time;
|
||||
|
||||
atomic_t io_split_cnt[2];
|
||||
atomic_t last_io_split_cnt[2];
|
||||
|
||||
struct blkg_rwstat stat_bytes;
|
||||
struct blkg_rwstat stat_ios;
|
||||
};
|
||||
|
||||
/* We measure latency for request size from <= 4k to >= 1M */
|
||||
#define LATENCY_BUCKET_SIZE 9
|
||||
|
||||
@ -231,16 +94,6 @@ struct throtl_data
|
||||
|
||||
static void throtl_pending_timer_fn(struct timer_list *t);
|
||||
|
||||
static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd)
|
||||
{
|
||||
return pd ? container_of(pd, struct throtl_grp, pd) : NULL;
|
||||
}
|
||||
|
||||
static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
|
||||
{
|
||||
return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl));
|
||||
}
|
||||
|
||||
static inline struct blkcg_gq *tg_to_blkg(struct throtl_grp *tg)
|
||||
{
|
||||
return pd_to_blkg(&tg->pd);
|
||||
@ -1794,7 +1647,7 @@ static void throtl_shutdown_wq(struct request_queue *q)
|
||||
cancel_work_sync(&td->dispatch_work);
|
||||
}
|
||||
|
||||
static struct blkcg_policy blkcg_policy_throtl = {
|
||||
struct blkcg_policy blkcg_policy_throtl = {
|
||||
.dfl_cftypes = throtl_files,
|
||||
.legacy_cftypes = throtl_legacy_files,
|
||||
|
||||
@ -2208,7 +2061,7 @@ void blk_throtl_charge_bio_split(struct bio *bio)
|
||||
} while (parent);
|
||||
}
|
||||
|
||||
bool blk_throtl_bio(struct bio *bio)
|
||||
bool __blk_throtl_bio(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bio->bi_bdev->bd_disk->queue;
|
||||
struct blkcg_gq *blkg = bio->bi_blkg;
|
||||
@ -2221,19 +2074,12 @@ bool blk_throtl_bio(struct bio *bio)
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
/* see throtl_charge_bio() */
|
||||
if (bio_flagged(bio, BIO_THROTTLED))
|
||||
goto out;
|
||||
|
||||
if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {
|
||||
blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf,
|
||||
bio->bi_iter.bi_size);
|
||||
blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1);
|
||||
}
|
||||
|
||||
if (!tg->has_rules[rw])
|
||||
goto out;
|
||||
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
|
||||
throtl_update_latency_buckets(td);
|
||||
@ -2317,7 +2163,6 @@ again:
|
||||
|
||||
out_unlock:
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
out:
|
||||
bio_set_flag(bio, BIO_THROTTLED);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
|
182
block/blk-throttle.h
Normal file
182
block/blk-throttle.h
Normal file
@ -0,0 +1,182 @@
|
||||
#ifndef BLK_THROTTLE_H
|
||||
#define BLK_THROTTLE_H
|
||||
|
||||
#include "blk-cgroup-rwstat.h"
|
||||
|
||||
/*
|
||||
* To implement hierarchical throttling, throtl_grps form a tree and bios
|
||||
* are dispatched upwards level by level until they reach the top and get
|
||||
* issued. When dispatching bios from the children and local group at each
|
||||
* level, if the bios are dispatched into a single bio_list, there's a risk
|
||||
* of a local or child group which can queue many bios at once filling up
|
||||
* the list starving others.
|
||||
*
|
||||
* To avoid such starvation, dispatched bios are queued separately
|
||||
* according to where they came from. When they are again dispatched to
|
||||
* the parent, they're popped in round-robin order so that no single source
|
||||
* hogs the dispatch window.
|
||||
*
|
||||
* throtl_qnode is used to keep the queued bios separated by their sources.
|
||||
* Bios are queued to throtl_qnode which in turn is queued to
|
||||
* throtl_service_queue and then dispatched in round-robin order.
|
||||
*
|
||||
* It's also used to track the reference counts on blkg's. A qnode always
|
||||
* belongs to a throtl_grp and gets queued on itself or the parent, so
|
||||
* incrementing the reference of the associated throtl_grp when a qnode is
|
||||
* queued and decrementing when dequeued is enough to keep the whole blkg
|
||||
* tree pinned while bios are in flight.
|
||||
*/
|
||||
struct throtl_qnode {
|
||||
struct list_head node; /* service_queue->queued[] */
|
||||
struct bio_list bios; /* queued bios */
|
||||
struct throtl_grp *tg; /* tg this qnode belongs to */
|
||||
};
|
||||
|
||||
struct throtl_service_queue {
|
||||
struct throtl_service_queue *parent_sq; /* the parent service_queue */
|
||||
|
||||
/*
|
||||
* Bios queued directly to this service_queue or dispatched from
|
||||
* children throtl_grp's.
|
||||
*/
|
||||
struct list_head queued[2]; /* throtl_qnode [READ/WRITE] */
|
||||
unsigned int nr_queued[2]; /* number of queued bios */
|
||||
|
||||
/*
|
||||
* RB tree of active children throtl_grp's, which are sorted by
|
||||
* their ->disptime.
|
||||
*/
|
||||
struct rb_root_cached pending_tree; /* RB tree of active tgs */
|
||||
unsigned int nr_pending; /* # queued in the tree */
|
||||
unsigned long first_pending_disptime; /* disptime of the first tg */
|
||||
struct timer_list pending_timer; /* fires on first_pending_disptime */
|
||||
};
|
||||
|
||||
enum {
|
||||
LIMIT_LOW,
|
||||
LIMIT_MAX,
|
||||
LIMIT_CNT,
|
||||
};
|
||||
|
||||
struct throtl_grp {
|
||||
/* must be the first member */
|
||||
struct blkg_policy_data pd;
|
||||
|
||||
/* active throtl group service_queue member */
|
||||
struct rb_node rb_node;
|
||||
|
||||
/* throtl_data this group belongs to */
|
||||
struct throtl_data *td;
|
||||
|
||||
/* this group's service queue */
|
||||
struct throtl_service_queue service_queue;
|
||||
|
||||
/*
|
||||
* qnode_on_self is used when bios are directly queued to this
|
||||
* throtl_grp so that local bios compete fairly with bios
|
||||
* dispatched from children. qnode_on_parent is used when bios are
|
||||
* dispatched from this throtl_grp into its parent and will compete
|
||||
* with the sibling qnode_on_parents and the parent's
|
||||
* qnode_on_self.
|
||||
*/
|
||||
struct throtl_qnode qnode_on_self[2];
|
||||
struct throtl_qnode qnode_on_parent[2];
|
||||
|
||||
/*
|
||||
* Dispatch time in jiffies. This is the estimated time when group
|
||||
* will unthrottle and is ready to dispatch more bio. It is used as
|
||||
* key to sort active groups in service tree.
|
||||
*/
|
||||
unsigned long disptime;
|
||||
|
||||
unsigned int flags;
|
||||
|
||||
/* are there any throtl rules between this group and td? */
|
||||
bool has_rules[2];
|
||||
|
||||
/* internally used bytes per second rate limits */
|
||||
uint64_t bps[2][LIMIT_CNT];
|
||||
/* user configured bps limits */
|
||||
uint64_t bps_conf[2][LIMIT_CNT];
|
||||
|
||||
/* internally used IOPS limits */
|
||||
unsigned int iops[2][LIMIT_CNT];
|
||||
/* user configured IOPS limits */
|
||||
unsigned int iops_conf[2][LIMIT_CNT];
|
||||
|
||||
/* Number of bytes dispatched in current slice */
|
||||
uint64_t bytes_disp[2];
|
||||
/* Number of bio's dispatched in current slice */
|
||||
unsigned int io_disp[2];
|
||||
|
||||
unsigned long last_low_overflow_time[2];
|
||||
|
||||
uint64_t last_bytes_disp[2];
|
||||
unsigned int last_io_disp[2];
|
||||
|
||||
unsigned long last_check_time;
|
||||
|
||||
unsigned long latency_target; /* us */
|
||||
unsigned long latency_target_conf; /* us */
|
||||
/* When did we start a new slice */
|
||||
unsigned long slice_start[2];
|
||||
unsigned long slice_end[2];
|
||||
|
||||
unsigned long last_finish_time; /* ns / 1024 */
|
||||
unsigned long checked_last_finish_time; /* ns / 1024 */
|
||||
unsigned long avg_idletime; /* ns / 1024 */
|
||||
unsigned long idletime_threshold; /* us */
|
||||
unsigned long idletime_threshold_conf; /* us */
|
||||
|
||||
unsigned int bio_cnt; /* total bios */
|
||||
unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
|
||||
unsigned long bio_cnt_reset_time;
|
||||
|
||||
atomic_t io_split_cnt[2];
|
||||
atomic_t last_io_split_cnt[2];
|
||||
|
||||
struct blkg_rwstat stat_bytes;
|
||||
struct blkg_rwstat stat_ios;
|
||||
};
|
||||
|
||||
extern struct blkcg_policy blkcg_policy_throtl;
|
||||
|
||||
static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd)
|
||||
{
|
||||
return pd ? container_of(pd, struct throtl_grp, pd) : NULL;
|
||||
}
|
||||
|
||||
static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
|
||||
{
|
||||
return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl));
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal throttling interface
|
||||
*/
|
||||
#ifndef CONFIG_BLK_DEV_THROTTLING
|
||||
static inline int blk_throtl_init(struct request_queue *q) { return 0; }
|
||||
static inline void blk_throtl_exit(struct request_queue *q) { }
|
||||
static inline void blk_throtl_register_queue(struct request_queue *q) { }
|
||||
static inline void blk_throtl_charge_bio_split(struct bio *bio) { }
|
||||
static inline bool blk_throtl_bio(struct bio *bio) { return false; }
|
||||
#else /* CONFIG_BLK_DEV_THROTTLING */
|
||||
int blk_throtl_init(struct request_queue *q);
|
||||
void blk_throtl_exit(struct request_queue *q);
|
||||
void blk_throtl_register_queue(struct request_queue *q);
|
||||
void blk_throtl_charge_bio_split(struct bio *bio);
|
||||
bool __blk_throtl_bio(struct bio *bio);
|
||||
static inline bool blk_throtl_bio(struct bio *bio)
|
||||
{
|
||||
struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);
|
||||
|
||||
if (bio_flagged(bio, BIO_THROTTLED))
|
||||
return false;
|
||||
if (!tg->has_rules[bio_data_dir(bio)])
|
||||
return false;
|
||||
|
||||
return __blk_throtl_bio(bio);
|
||||
}
|
||||
#endif /* CONFIG_BLK_DEV_THROTTLING */
|
||||
|
||||
#endif
|
16
block/blk.h
16
block/blk.h
@ -325,22 +325,6 @@ void ioc_clear_queue(struct request_queue *q);
|
||||
|
||||
int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node);
|
||||
|
||||
/*
|
||||
* Internal throttling interface
|
||||
*/
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING
|
||||
extern int blk_throtl_init(struct request_queue *q);
|
||||
extern void blk_throtl_exit(struct request_queue *q);
|
||||
extern void blk_throtl_register_queue(struct request_queue *q);
|
||||
extern void blk_throtl_charge_bio_split(struct bio *bio);
|
||||
bool blk_throtl_bio(struct bio *bio);
|
||||
#else /* CONFIG_BLK_DEV_THROTTLING */
|
||||
static inline int blk_throtl_init(struct request_queue *q) { return 0; }
|
||||
static inline void blk_throtl_exit(struct request_queue *q) { }
|
||||
static inline void blk_throtl_register_queue(struct request_queue *q) { }
|
||||
static inline void blk_throtl_charge_bio_split(struct bio *bio) { }
|
||||
static inline bool blk_throtl_bio(struct bio *bio) { return false; }
|
||||
#endif /* CONFIG_BLK_DEV_THROTTLING */
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
extern ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page);
|
||||
extern ssize_t blk_throtl_sample_time_store(struct request_queue *q,
|
||||
|
Loading…
Reference in New Issue
Block a user