Merge branch 'sched/eevdf' into sched/core

Pick up the EEVDF work into the main branch - it's looking good so far.

 Conflicts:
	kernel/sched/features.h

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2023-08-10 09:05:43 +02:00
commit b41bbb33cf
7 changed files with 560 additions and 646 deletions

View File

@ -60,6 +60,32 @@ rb_insert_augmented_cached(struct rb_node *node,
rb_insert_augmented(node, &root->rb_root, augment); rb_insert_augmented(node, &root->rb_root, augment);
} }
static __always_inline struct rb_node *
rb_add_augmented_cached(struct rb_node *node, struct rb_root_cached *tree,
bool (*less)(struct rb_node *, const struct rb_node *),
const struct rb_augment_callbacks *augment)
{
struct rb_node **link = &tree->rb_root.rb_node;
struct rb_node *parent = NULL;
bool leftmost = true;
while (*link) {
parent = *link;
if (less(node, parent)) {
link = &parent->rb_left;
} else {
link = &parent->rb_right;
leftmost = false;
}
}
rb_link_node(node, parent, link);
augment->propagate(parent, NULL); /* suboptimal */
rb_insert_augmented_cached(node, tree, leftmost, augment);
return leftmost ? node : NULL;
}
/* /*
* Template for declaring augmented rbtree callbacks (generic case) * Template for declaring augmented rbtree callbacks (generic case)
* *

View File

@ -549,13 +549,18 @@ struct sched_entity {
/* For load-balancing: */ /* For load-balancing: */
struct load_weight load; struct load_weight load;
struct rb_node run_node; struct rb_node run_node;
u64 deadline;
u64 min_deadline;
struct list_head group_node; struct list_head group_node;
unsigned int on_rq; unsigned int on_rq;
u64 exec_start; u64 exec_start;
u64 sum_exec_runtime; u64 sum_exec_runtime;
u64 vruntime;
u64 prev_sum_exec_runtime; u64 prev_sum_exec_runtime;
u64 vruntime;
s64 vlag;
u64 slice;
u64 nr_migrations; u64 nr_migrations;

View File

@ -4527,6 +4527,8 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.prev_sum_exec_runtime = 0; p->se.prev_sum_exec_runtime = 0;
p->se.nr_migrations = 0; p->se.nr_migrations = 0;
p->se.vruntime = 0; p->se.vruntime = 0;
p->se.vlag = 0;
p->se.slice = sysctl_sched_base_slice;
INIT_LIST_HEAD(&p->se.group_node); INIT_LIST_HEAD(&p->se.group_node);
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED

View File

@ -347,10 +347,7 @@ static __init int sched_init_debug(void)
debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops); debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
#endif #endif
debugfs_create_u32("latency_ns", 0644, debugfs_sched, &sysctl_sched_latency); debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity);
debugfs_create_u32("idle_min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_idle_min_granularity);
debugfs_create_u32("wakeup_granularity_ns", 0644, debugfs_sched, &sysctl_sched_wakeup_granularity);
debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms); debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once); debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
@ -582,9 +579,13 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
else else
SEQ_printf(m, " %c", task_state_to_char(p)); SEQ_printf(m, " %c", task_state_to_char(p));
SEQ_printf(m, " %15s %5d %9Ld.%06ld %9Ld %5d ", SEQ_printf(m, "%15s %5d %9Ld.%06ld %c %9Ld.%06ld %9Ld.%06ld %9Ld.%06ld %9Ld %5d ",
p->comm, task_pid_nr(p), p->comm, task_pid_nr(p),
SPLIT_NS(p->se.vruntime), SPLIT_NS(p->se.vruntime),
entity_eligible(cfs_rq_of(&p->se), &p->se) ? 'E' : 'N',
SPLIT_NS(p->se.deadline),
SPLIT_NS(p->se.slice),
SPLIT_NS(p->se.sum_exec_runtime),
(long long)(p->nvcsw + p->nivcsw), (long long)(p->nvcsw + p->nivcsw),
p->prio); p->prio);
@ -627,10 +628,9 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
{ {
s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, s64 left_vruntime = -1, min_vruntime, right_vruntime = -1, spread;
spread, rq0_min_vruntime, spread0; struct sched_entity *last, *first;
struct rq *rq = cpu_rq(cpu); struct rq *rq = cpu_rq(cpu);
struct sched_entity *last;
unsigned long flags; unsigned long flags;
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
@ -644,26 +644,25 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
SPLIT_NS(cfs_rq->exec_clock)); SPLIT_NS(cfs_rq->exec_clock));
raw_spin_rq_lock_irqsave(rq, flags); raw_spin_rq_lock_irqsave(rq, flags);
if (rb_first_cached(&cfs_rq->tasks_timeline)) first = __pick_first_entity(cfs_rq);
MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime; if (first)
left_vruntime = first->vruntime;
last = __pick_last_entity(cfs_rq); last = __pick_last_entity(cfs_rq);
if (last) if (last)
max_vruntime = last->vruntime; right_vruntime = last->vruntime;
min_vruntime = cfs_rq->min_vruntime; min_vruntime = cfs_rq->min_vruntime;
rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
raw_spin_rq_unlock_irqrestore(rq, flags); raw_spin_rq_unlock_irqrestore(rq, flags);
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
SPLIT_NS(MIN_vruntime)); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "left_vruntime",
SPLIT_NS(left_vruntime));
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime", SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime",
SPLIT_NS(min_vruntime)); SPLIT_NS(min_vruntime));
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "max_vruntime", SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "avg_vruntime",
SPLIT_NS(max_vruntime)); SPLIT_NS(avg_vruntime(cfs_rq)));
spread = max_vruntime - MIN_vruntime; SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "right_vruntime",
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(right_vruntime));
SPLIT_NS(spread)); spread = right_vruntime - left_vruntime;
spread0 = min_vruntime - rq0_min_vruntime; SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread));
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0",
SPLIT_NS(spread0));
SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
cfs_rq->nr_spread_over); cfs_rq->nr_spread_over);
SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running); SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
@ -864,10 +863,7 @@ static void sched_debug_header(struct seq_file *m)
SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x)) SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x))
#define PN(x) \ #define PN(x) \
SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
PN(sysctl_sched_latency); PN(sysctl_sched_base_slice);
PN(sysctl_sched_min_granularity);
PN(sysctl_sched_idle_min_granularity);
PN(sysctl_sched_wakeup_granularity);
P(sysctl_sched_child_runs_first); P(sysctl_sched_child_runs_first);
P(sysctl_sched_features); P(sysctl_sched_features);
#undef PN #undef PN

File diff suppressed because it is too large Load Diff

View File

@ -1,16 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
/*
* Only give sleepers 50% of their service deficit. This allows
* them to run sooner, but does not allow tons of sleepers to
* rip the spread apart.
*/
SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)
/* /*
* Place new tasks ahead so that they do not starve already running * Using the avg_vruntime, do the right thing and preserve lag across
* tasks * sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
*/ */
SCHED_FEAT(START_DEBIT, true) SCHED_FEAT(PLACE_LAG, true)
SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
/* /*
* Prefer to schedule the task we woke last (assuming it failed * Prefer to schedule the task we woke last (assuming it failed
@ -19,13 +14,6 @@ SCHED_FEAT(START_DEBIT, true)
*/ */
SCHED_FEAT(NEXT_BUDDY, false) SCHED_FEAT(NEXT_BUDDY, false)
/*
* Prefer to schedule the task that ran last (when we did
* wake-preempt) as that likely will touch the same data, increases
* cache locality.
*/
SCHED_FEAT(LAST_BUDDY, true)
/* /*
* Consider buddies to be cache hot, decreases the likeliness of a * Consider buddies to be cache hot, decreases the likeliness of a
* cache buddy being migrated away, increases cache locality. * cache buddy being migrated away, increases cache locality.
@ -99,7 +87,4 @@ SCHED_FEAT(UTIL_EST_FASTUP, true)
SCHED_FEAT(LATENCY_WARN, false) SCHED_FEAT(LATENCY_WARN, false)
SCHED_FEAT(ALT_PERIOD, true)
SCHED_FEAT(BASE_SLICE, true)
SCHED_FEAT(HZ_BW, true) SCHED_FEAT(HZ_BW, true)

View File

@ -550,6 +550,9 @@ struct cfs_rq {
unsigned int idle_nr_running; /* SCHED_IDLE */ unsigned int idle_nr_running; /* SCHED_IDLE */
unsigned int idle_h_nr_running; /* SCHED_IDLE */ unsigned int idle_h_nr_running; /* SCHED_IDLE */
s64 avg_vruntime;
u64 avg_load;
u64 exec_clock; u64 exec_clock;
u64 min_vruntime; u64 min_vruntime;
#ifdef CONFIG_SCHED_CORE #ifdef CONFIG_SCHED_CORE
@ -569,8 +572,6 @@ struct cfs_rq {
*/ */
struct sched_entity *curr; struct sched_entity *curr;
struct sched_entity *next; struct sched_entity *next;
struct sched_entity *last;
struct sched_entity *skip;
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
unsigned int nr_spread_over; unsigned int nr_spread_over;
@ -2200,6 +2201,7 @@ extern const u32 sched_prio_to_wmult[40];
#else #else
#define ENQUEUE_MIGRATED 0x00 #define ENQUEUE_MIGRATED 0x00
#endif #endif
#define ENQUEUE_INITIAL 0x80
#define RETRY_TASK ((void *)-1UL) #define RETRY_TASK ((void *)-1UL)
@ -2504,11 +2506,9 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
extern const_debug unsigned int sysctl_sched_nr_migrate; extern const_debug unsigned int sysctl_sched_nr_migrate;
extern const_debug unsigned int sysctl_sched_migration_cost; extern const_debug unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_base_slice;
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_idle_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
extern int sysctl_resched_latency_warn_ms; extern int sysctl_resched_latency_warn_ms;
extern int sysctl_resched_latency_warn_once; extern int sysctl_resched_latency_warn_once;
@ -3485,4 +3485,7 @@ static inline void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) { }
static inline void init_sched_mm_cid(struct task_struct *t) { } static inline void init_sched_mm_cid(struct task_struct *t) { }
#endif #endif
extern u64 avg_vruntime(struct cfs_rq *cfs_rq);
extern int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se);
#endif /* _KERNEL_SCHED_SCHED_H */ #endif /* _KERNEL_SCHED_SCHED_H */