2021-10-05 18:11:56 +03:00
# ifndef BLK_THROTTLE_H
# define BLK_THROTTLE_H
# include "blk-cgroup-rwstat.h"
/*
* To implement hierarchical throttling , throtl_grps form a tree and bios
* are dispatched upwards level by level until they reach the top and get
* issued . When dispatching bios from the children and local group at each
* level , if the bios are dispatched into a single bio_list , there ' s a risk
* of a local or child group which can queue many bios at once filling up
* the list starving others .
*
* To avoid such starvation , dispatched bios are queued separately
* according to where they came from . When they are again dispatched to
* the parent , they ' re popped in round - robin order so that no single source
* hogs the dispatch window .
*
* throtl_qnode is used to keep the queued bios separated by their sources .
* Bios are queued to throtl_qnode which in turn is queued to
* throtl_service_queue and then dispatched in round - robin order .
*
* It ' s also used to track the reference counts on blkg ' s . A qnode always
* belongs to a throtl_grp and gets queued on itself or the parent , so
* incrementing the reference of the associated throtl_grp when a qnode is
* queued and decrementing when dequeued is enough to keep the whole blkg
* tree pinned while bios are in flight .
*/
struct throtl_qnode {
struct list_head node ; /* service_queue->queued[] */
struct bio_list bios ; /* queued bios */
struct throtl_grp * tg ; /* tg this qnode belongs to */
} ;
struct throtl_service_queue {
struct throtl_service_queue * parent_sq ; /* the parent service_queue */
/*
* Bios queued directly to this service_queue or dispatched from
* children throtl_grp ' s .
*/
struct list_head queued [ 2 ] ; /* throtl_qnode [READ/WRITE] */
unsigned int nr_queued [ 2 ] ; /* number of queued bios */
/*
* RB tree of active children throtl_grp ' s , which are sorted by
* their - > disptime .
*/
struct rb_root_cached pending_tree ; /* RB tree of active tgs */
unsigned int nr_pending ; /* # queued in the tree */
unsigned long first_pending_disptime ; /* disptime of the first tg */
struct timer_list pending_timer ; /* fires on first_pending_disptime */
} ;
2022-02-16 07:45:13 +03:00
enum tg_state_flags {
THROTL_TG_PENDING = 1 < < 0 , /* on parent's pending tree */
THROTL_TG_WAS_EMPTY = 1 < < 1 , /* bio_lists[] became non-empty */
2022-09-21 12:53:08 +03:00
THROTL_TG_CANCELING = 1 < < 2 , /* starts to cancel bio */
2022-02-16 07:45:13 +03:00
} ;
2021-10-05 18:11:56 +03:00
enum {
LIMIT_LOW ,
LIMIT_MAX ,
LIMIT_CNT ,
} ;
struct throtl_grp {
/* must be the first member */
struct blkg_policy_data pd ;
/* active throtl group service_queue member */
struct rb_node rb_node ;
/* throtl_data this group belongs to */
struct throtl_data * td ;
/* this group's service queue */
struct throtl_service_queue service_queue ;
/*
* qnode_on_self is used when bios are directly queued to this
* throtl_grp so that local bios compete fairly with bios
* dispatched from children . qnode_on_parent is used when bios are
* dispatched from this throtl_grp into its parent and will compete
* with the sibling qnode_on_parents and the parent ' s
* qnode_on_self .
*/
struct throtl_qnode qnode_on_self [ 2 ] ;
struct throtl_qnode qnode_on_parent [ 2 ] ;
/*
* Dispatch time in jiffies . This is the estimated time when group
* will unthrottle and is ready to dispatch more bio . It is used as
* key to sort active groups in service tree .
*/
unsigned long disptime ;
unsigned int flags ;
/* are there any throtl rules between this group and td? */
2022-09-21 12:53:09 +03:00
bool has_rules_bps [ 2 ] ;
bool has_rules_iops [ 2 ] ;
2021-10-05 18:11:56 +03:00
/* internally used bytes per second rate limits */
uint64_t bps [ 2 ] [ LIMIT_CNT ] ;
/* user configured bps limits */
uint64_t bps_conf [ 2 ] [ LIMIT_CNT ] ;
/* internally used IOPS limits */
unsigned int iops [ 2 ] [ LIMIT_CNT ] ;
/* user configured IOPS limits */
unsigned int iops_conf [ 2 ] [ LIMIT_CNT ] ;
/* Number of bytes dispatched in current slice */
uint64_t bytes_disp [ 2 ] ;
/* Number of bio's dispatched in current slice */
unsigned int io_disp [ 2 ] ;
unsigned long last_low_overflow_time [ 2 ] ;
uint64_t last_bytes_disp [ 2 ] ;
unsigned int last_io_disp [ 2 ] ;
2022-08-29 05:22:40 +03:00
/*
* The following two fields are updated when new configuration is
* submitted while some bios are still throttled , they record how many
* bytes / ios are waited already in previous configuration , and they will
* be used to calculate wait time under new configuration .
*/
2023-08-16 04:27:05 +03:00
long long carryover_bytes [ 2 ] ;
int carryover_ios [ 2 ] ;
2022-08-29 05:22:40 +03:00
2021-10-05 18:11:56 +03:00
unsigned long last_check_time ;
unsigned long latency_target ; /* us */
unsigned long latency_target_conf ; /* us */
/* When did we start a new slice */
unsigned long slice_start [ 2 ] ;
unsigned long slice_end [ 2 ] ;
unsigned long last_finish_time ; /* ns / 1024 */
unsigned long checked_last_finish_time ; /* ns / 1024 */
unsigned long avg_idletime ; /* ns / 1024 */
unsigned long idletime_threshold ; /* us */
unsigned long idletime_threshold_conf ; /* us */
unsigned int bio_cnt ; /* total bios */
unsigned int bad_bio_cnt ; /* bios exceeding latency threshold */
unsigned long bio_cnt_reset_time ;
struct blkg_rwstat stat_bytes ;
struct blkg_rwstat stat_ios ;
} ;
extern struct blkcg_policy blkcg_policy_throtl ;
static inline struct throtl_grp * pd_to_tg ( struct blkg_policy_data * pd )
{
return pd ? container_of ( pd , struct throtl_grp , pd ) : NULL ;
}
static inline struct throtl_grp * blkg_to_tg ( struct blkcg_gq * blkg )
{
return pd_to_tg ( blkg_to_pd ( blkg , & blkcg_policy_throtl ) ) ;
}
/*
* Internal throttling interface
*/
# ifndef CONFIG_BLK_DEV_THROTTLING
2022-09-21 21:04:56 +03:00
static inline int blk_throtl_init ( struct gendisk * disk ) { return 0 ; }
static inline void blk_throtl_exit ( struct gendisk * disk ) { }
2022-09-21 21:04:57 +03:00
static inline void blk_throtl_register ( struct gendisk * disk ) { }
2021-10-05 18:11:56 +03:00
static inline bool blk_throtl_bio ( struct bio * bio ) { return false ; }
2022-09-21 21:04:58 +03:00
static inline void blk_throtl_cancel_bios ( struct gendisk * disk ) { }
2021-10-05 18:11:56 +03:00
# else /* CONFIG_BLK_DEV_THROTTLING */
2022-09-21 21:04:56 +03:00
int blk_throtl_init ( struct gendisk * disk ) ;
void blk_throtl_exit ( struct gendisk * disk ) ;
2022-09-21 21:04:57 +03:00
void blk_throtl_register ( struct gendisk * disk ) ;
2021-10-05 18:11:56 +03:00
bool __blk_throtl_bio ( struct bio * bio ) ;
2022-09-21 21:04:58 +03:00
void blk_throtl_cancel_bios ( struct gendisk * disk ) ;
2022-09-21 12:53:09 +03:00
static inline bool blk_should_throtl ( struct bio * bio )
2021-10-05 18:11:56 +03:00
{
struct throtl_grp * tg = blkg_to_tg ( bio - > bi_blkg ) ;
2022-09-21 12:53:09 +03:00
int rw = bio_data_dir ( bio ) ;
2023-05-07 20:06:31 +03:00
if ( ! cgroup_subsys_on_dfl ( io_cgrp_subsys ) ) {
if ( ! bio_flagged ( bio , BIO_CGROUP_ACCT ) ) {
bio_set_flag ( bio , BIO_CGROUP_ACCT ) ;
blkg_rwstat_add ( & tg - > stat_bytes , bio - > bi_opf ,
bio - > bi_iter . bi_size ) ;
}
blkg_rwstat_add ( & tg - > stat_ios , bio - > bi_opf , 1 ) ;
}
2022-09-21 12:53:09 +03:00
/* iops limit is always counted */
if ( tg - > has_rules_iops [ rw ] )
return true ;
if ( tg - > has_rules_bps [ rw ] & & ! bio_flagged ( bio , BIO_BPS_THROTTLED ) )
return true ;
return false ;
}
static inline bool blk_throtl_bio ( struct bio * bio )
{
2021-10-05 18:11:56 +03:00
2022-09-21 12:53:09 +03:00
if ( ! blk_should_throtl ( bio ) )
2021-10-05 18:11:56 +03:00
return false ;
return __blk_throtl_bio ( bio ) ;
}
# endif /* CONFIG_BLK_DEV_THROTTLING */
# endif