Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "The main scheduler changes in this cycle were: - NUMA balancing improvements (Mel Gorman) - Further load tracking improvements (Patrick Bellasi) - Various NOHZ balancing cleanups and optimizations (Peter Zijlstra) - Improve blocked load handling, in particular we can now reduce and eventually stop periodic load updates on 'very idle' CPUs. (Vincent Guittot) - On isolated CPUs offload the final 1Hz scheduler tick as well, plus related cleanups and reorganization. (Frederic Weisbecker) - Core scheduler code cleanups (Ingo Molnar)" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits) sched/core: Update preempt_notifier_key to modern API sched/cpufreq: Rate limits for SCHED_DEADLINE sched/fair: Update util_est only on util_avg updates sched/cpufreq/schedutil: Use util_est for OPP selection sched/fair: Use util_est in LB and WU paths sched/fair: Add util_est on top of PELT sched/core: Remove TASK_ALL sched/completions: Use bool in try_wait_for_completion() sched/fair: Update blocked load when newly idle sched/fair: Move idle_balance() sched/nohz: Merge CONFIG_NO_HZ_COMMON blocks sched/fair: Move rebalance_domains() sched/nohz: Optimize nohz_idle_balance() sched/fair: Reduce the periodic update duration sched/nohz: Stop NOHZ stats when decayed sched/cpufreq: Provide migration hint sched/nohz: Clean up nohz enter/exit sched/fair: Update blocked load from NEWIDLE sched/fair: Add NOHZ stats balancing sched/fair: Restructure nohz_balance_kick() ...
This commit is contained in:
commit
46e0d28bdb
@ -1766,6 +1766,17 @@
|
||||
|
||||
nohz
|
||||
Disable the tick when a single task runs.
|
||||
|
||||
A residual 1Hz tick is offloaded to workqueues, which you
|
||||
need to affine to housekeeping through the global
|
||||
workqueue's affinity configured via the
|
||||
/sys/devices/virtual/workqueue/cpumask sysfs file, or
|
||||
by using the 'domain' flag described below.
|
||||
|
||||
NOTE: by default the global workqueue runs on all CPUs,
|
||||
so to protect individual CPUs the 'cpumask' file has to
|
||||
be configured manually after bootup.
|
||||
|
||||
domain
|
||||
Isolate from the general SMP balancing and scheduling
|
||||
algorithms. Note that performing domain isolation this way
|
||||
|
@ -93,7 +93,6 @@ struct task_group;
|
||||
|
||||
/* Convenience macros for the sake of wake_up(): */
|
||||
#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
|
||||
#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
|
||||
|
||||
/* get_task_state(): */
|
||||
#define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \
|
||||
@ -275,6 +274,34 @@ struct load_weight {
|
||||
u32 inv_weight;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct util_est - Estimation utilization of FAIR tasks
|
||||
* @enqueued: instantaneous estimated utilization of a task/cpu
|
||||
* @ewma: the Exponential Weighted Moving Average (EWMA)
|
||||
* utilization of a task
|
||||
*
|
||||
* Support data structure to track an Exponential Weighted Moving Average
|
||||
* (EWMA) of a FAIR task's utilization. New samples are added to the moving
|
||||
* average each time a task completes an activation. Sample's weight is chosen
|
||||
* so that the EWMA will be relatively insensitive to transient changes to the
|
||||
* task's workload.
|
||||
*
|
||||
* The enqueued attribute has a slightly different meaning for tasks and cpus:
|
||||
* - task: the task's util_avg at last task dequeue time
|
||||
* - cfs_rq: the sum of util_est.enqueued for each RUNNABLE task on that CPU
|
||||
* Thus, the util_est.enqueued of a task represents the contribution on the
|
||||
* estimated utilization of the CPU where that task is currently enqueued.
|
||||
*
|
||||
* Only for tasks we track a moving average of the past instantaneous
|
||||
* estimated utilization. This allows to absorb sporadic drops in utilization
|
||||
* of an otherwise almost periodic task.
|
||||
*/
|
||||
struct util_est {
|
||||
unsigned int enqueued;
|
||||
unsigned int ewma;
|
||||
#define UTIL_EST_WEIGHT_SHIFT 2
|
||||
};
|
||||
|
||||
/*
|
||||
* The load_avg/util_avg accumulates an infinite geometric series
|
||||
* (see __update_load_avg() in kernel/sched/fair.c).
|
||||
@ -336,6 +363,7 @@ struct sched_avg {
|
||||
unsigned long load_avg;
|
||||
unsigned long runnable_load_avg;
|
||||
unsigned long util_avg;
|
||||
struct util_est util_est;
|
||||
};
|
||||
|
||||
struct sched_statistics {
|
||||
|
@ -8,9 +8,8 @@
|
||||
* Interface between cpufreq drivers and the scheduler:
|
||||
*/
|
||||
|
||||
#define SCHED_CPUFREQ_RT (1U << 0)
|
||||
#define SCHED_CPUFREQ_DL (1U << 1)
|
||||
#define SCHED_CPUFREQ_IOWAIT (1U << 2)
|
||||
#define SCHED_CPUFREQ_IOWAIT (1U << 0)
|
||||
#define SCHED_CPUFREQ_MIGRATION (1U << 1)
|
||||
|
||||
#ifdef CONFIG_CPU_FREQ
|
||||
struct update_util_data {
|
||||
|
@ -1,8 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_SCHED_DEADLINE_H
|
||||
#define _LINUX_SCHED_DEADLINE_H
|
||||
|
||||
#include <linux/sched.h>
|
||||
|
||||
/*
|
||||
* SCHED_DEADLINE tasks has negative priorities, reflecting
|
||||
@ -28,5 +24,3 @@ static inline bool dl_time_before(u64 a, u64 b)
|
||||
{
|
||||
return (s64)(a - b) < 0;
|
||||
}
|
||||
|
||||
#endif /* _LINUX_SCHED_DEADLINE_H */
|
||||
|
@ -12,6 +12,7 @@ enum hk_flags {
|
||||
HK_FLAG_SCHED = (1 << 3),
|
||||
HK_FLAG_TICK = (1 << 4),
|
||||
HK_FLAG_DOMAIN = (1 << 5),
|
||||
HK_FLAG_WQ = (1 << 6),
|
||||
};
|
||||
|
||||
#ifdef CONFIG_CPU_ISOLATION
|
||||
|
@ -16,11 +16,9 @@ static inline void cpu_load_update_nohz_stop(void) { }
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
|
||||
extern void nohz_balance_enter_idle(int cpu);
|
||||
extern void set_cpu_sd_state_idle(void);
|
||||
extern int get_nohz_timer_target(void);
|
||||
#else
|
||||
static inline void nohz_balance_enter_idle(int cpu) { }
|
||||
static inline void set_cpu_sd_state_idle(void) { }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
@ -37,8 +35,4 @@ extern void wake_up_nohz_cpu(int cpu);
|
||||
static inline void wake_up_nohz_cpu(int cpu) { }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
extern u64 scheduler_tick_max_deferment(void);
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_SCHED_NOHZ_H */
|
||||
|
@ -113,7 +113,8 @@ enum tick_dep_bits {
|
||||
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
extern bool tick_nohz_enabled;
|
||||
extern int tick_nohz_tick_stopped(void);
|
||||
extern bool tick_nohz_tick_stopped(void);
|
||||
extern bool tick_nohz_tick_stopped_cpu(int cpu);
|
||||
extern void tick_nohz_idle_enter(void);
|
||||
extern void tick_nohz_idle_exit(void);
|
||||
extern void tick_nohz_irq_exit(void);
|
||||
@ -125,6 +126,7 @@ extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
|
||||
#else /* !CONFIG_NO_HZ_COMMON */
|
||||
#define tick_nohz_enabled (0)
|
||||
static inline int tick_nohz_tick_stopped(void) { return 0; }
|
||||
static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; }
|
||||
static inline void tick_nohz_idle_enter(void) { }
|
||||
static inline void tick_nohz_idle_exit(void) { }
|
||||
|
||||
|
@ -17,8 +17,9 @@ CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
|
||||
endif
|
||||
|
||||
obj-y += core.o loadavg.o clock.o cputime.o
|
||||
obj-y += idle_task.o fair.o rt.o deadline.o
|
||||
obj-y += wait.o wait_bit.o swait.o completion.o idle.o
|
||||
obj-y += idle.o fair.o rt.o deadline.o
|
||||
obj-y += wait.o wait_bit.o swait.o completion.o
|
||||
|
||||
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o
|
||||
obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
|
||||
obj-$(CONFIG_SCHEDSTATS) += stats.o
|
||||
|
@ -1,10 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/export.h>
|
||||
|
||||
/*
|
||||
* Auto-group scheduling implementation:
|
||||
*/
|
||||
#include "sched.h"
|
||||
|
||||
unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
|
||||
@ -168,18 +165,19 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
|
||||
autogroup_kref_put(prev);
|
||||
}
|
||||
|
||||
/* Allocates GFP_KERNEL, cannot be called under any spinlock */
|
||||
/* Allocates GFP_KERNEL, cannot be called under any spinlock: */
|
||||
void sched_autogroup_create_attach(struct task_struct *p)
|
||||
{
|
||||
struct autogroup *ag = autogroup_create();
|
||||
|
||||
autogroup_move_group(p, ag);
|
||||
/* drop extra reference added by autogroup_create() */
|
||||
|
||||
/* Drop extra reference added by autogroup_create(): */
|
||||
autogroup_kref_put(ag);
|
||||
}
|
||||
EXPORT_SYMBOL(sched_autogroup_create_attach);
|
||||
|
||||
/* Cannot be called under siglock. Currently has no users */
|
||||
/* Cannot be called under siglock. Currently has no users: */
|
||||
void sched_autogroup_detach(struct task_struct *p)
|
||||
{
|
||||
autogroup_move_group(p, &autogroup_default);
|
||||
@ -202,7 +200,6 @@ static int __init setup_autogroup(char *str)
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("noautogroup", setup_autogroup);
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
@ -224,7 +221,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
|
||||
if (nice < 0 && !can_nice(current, nice))
|
||||
return -EPERM;
|
||||
|
||||
/* this is a heavy operation taking global locks.. */
|
||||
/* This is a heavy operation, taking global locks.. */
|
||||
if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next))
|
||||
return -EAGAIN;
|
||||
|
||||
@ -267,4 +264,4 @@ int autogroup_path(struct task_group *tg, char *buf, int buflen)
|
||||
|
||||
return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
|
||||
}
|
||||
#endif /* CONFIG_SCHED_DEBUG */
|
||||
#endif
|
||||
|
@ -1,15 +1,11 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifdef CONFIG_SCHED_AUTOGROUP
|
||||
|
||||
#include <linux/kref.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/sched/autogroup.h>
|
||||
|
||||
struct autogroup {
|
||||
/*
|
||||
* reference doesn't mean how many thread attach to this
|
||||
* autogroup now. It just stands for the number of task
|
||||
* could use this autogroup.
|
||||
* Reference doesn't mean how many threads attach to this
|
||||
* autogroup now. It just stands for the number of tasks
|
||||
* which could use this autogroup.
|
||||
*/
|
||||
struct kref kref;
|
||||
struct task_group *tg;
|
||||
@ -56,11 +52,9 @@ autogroup_task_group(struct task_struct *p, struct task_group *tg)
|
||||
return tg;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_SCHED_AUTOGROUP */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* sched_clock for unstable cpu clocks
|
||||
* sched_clock() for unstable CPU clocks
|
||||
*
|
||||
* Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra
|
||||
*
|
||||
@ -11,7 +11,7 @@
|
||||
* Guillaume Chazarain <guichaz@gmail.com>
|
||||
*
|
||||
*
|
||||
* What:
|
||||
* What this file implements:
|
||||
*
|
||||
* cpu_clock(i) provides a fast (execution time) high resolution
|
||||
* clock with bounded drift between CPUs. The value of cpu_clock(i)
|
||||
@ -26,11 +26,11 @@
|
||||
* at 0 on boot (but people really shouldn't rely on that).
|
||||
*
|
||||
* cpu_clock(i) -- can be used from any context, including NMI.
|
||||
* local_clock() -- is cpu_clock() on the current cpu.
|
||||
* local_clock() -- is cpu_clock() on the current CPU.
|
||||
*
|
||||
* sched_clock_cpu(i)
|
||||
*
|
||||
* How:
|
||||
* How it is implemented:
|
||||
*
|
||||
* The implementation either uses sched_clock() when
|
||||
* !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the
|
||||
@ -52,19 +52,7 @@
|
||||
* that is otherwise invisible (TSC gets stopped).
|
||||
*
|
||||
*/
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/static_key.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/init.h>
|
||||
#include "sched.h"
|
||||
|
||||
/*
|
||||
* Scheduler clock - returns current time in nanosec units.
|
||||
@ -302,21 +290,21 @@ again:
|
||||
* cmpxchg64 below only protects one readout.
|
||||
*
|
||||
* We must reread via sched_clock_local() in the retry case on
|
||||
* 32bit as an NMI could use sched_clock_local() via the
|
||||
* 32-bit kernels as an NMI could use sched_clock_local() via the
|
||||
* tracer and hit between the readout of
|
||||
* the low32bit and the high 32bit portion.
|
||||
* the low 32-bit and the high 32-bit portion.
|
||||
*/
|
||||
this_clock = sched_clock_local(my_scd);
|
||||
/*
|
||||
* We must enforce atomic readout on 32bit, otherwise the
|
||||
* update on the remote cpu can hit inbetween the readout of
|
||||
* the low32bit and the high 32bit portion.
|
||||
* We must enforce atomic readout on 32-bit, otherwise the
|
||||
* update on the remote CPU can hit inbetween the readout of
|
||||
* the low 32-bit and the high 32-bit portion.
|
||||
*/
|
||||
remote_clock = cmpxchg64(&scd->clock, 0, 0);
|
||||
#else
|
||||
/*
|
||||
* On 64bit the read of [my]scd->clock is atomic versus the
|
||||
* update, so we can avoid the above 32bit dance.
|
||||
* On 64-bit kernels the read of [my]scd->clock is atomic versus the
|
||||
* update, so we can avoid the above 32-bit dance.
|
||||
*/
|
||||
sched_clock_local(my_scd);
|
||||
again:
|
||||
|
@ -11,10 +11,7 @@
|
||||
* typically be used for exclusion which gives rise to priority inversion.
|
||||
* Waiting for completion is a typically sync point, but not an exclusion point.
|
||||
*/
|
||||
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/completion.h>
|
||||
#include "sched.h"
|
||||
|
||||
/**
|
||||
* complete: - signals a single thread waiting on this completion
|
||||
@ -283,7 +280,7 @@ EXPORT_SYMBOL(wait_for_completion_killable_timeout);
|
||||
bool try_wait_for_completion(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret = 1;
|
||||
bool ret = true;
|
||||
|
||||
/*
|
||||
* Since x->done will need to be locked only
|
||||
@ -292,11 +289,11 @@ bool try_wait_for_completion(struct completion *x)
|
||||
* return early in the blocking case.
|
||||
*/
|
||||
if (!READ_ONCE(x->done))
|
||||
return 0;
|
||||
return false;
|
||||
|
||||
spin_lock_irqsave(&x->wait.lock, flags);
|
||||
if (!x->done)
|
||||
ret = 0;
|
||||
ret = false;
|
||||
else if (x->done != UINT_MAX)
|
||||
x->done--;
|
||||
spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
|
@ -5,37 +5,11 @@
|
||||
*
|
||||
* Copyright (C) 1991-2002 Linus Torvalds
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <uapi/linux/sched/types.h>
|
||||
#include <linux/sched/loadavg.h>
|
||||
#include <linux/sched/hotplug.h>
|
||||
#include <linux/wait_bit.h>
|
||||
#include <linux/cpuset.h>
|
||||
#include <linux/delayacct.h>
|
||||
#include <linux/init_task.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/rcupdate_wait.h>
|
||||
#include <linux/compat.h>
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/profile.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
#include "sched.h"
|
||||
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/tlb.h>
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
#include <asm/paravirt.h>
|
||||
#endif
|
||||
|
||||
#include "sched.h"
|
||||
#include "../workqueue_internal.h"
|
||||
#include "../smpboot.h"
|
||||
|
||||
@ -135,7 +109,7 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
|
||||
* [L] ->on_rq
|
||||
* RELEASE (rq->lock)
|
||||
*
|
||||
* If we observe the old cpu in task_rq_lock, the acquire of
|
||||
* If we observe the old CPU in task_rq_lock, the acquire of
|
||||
* the old rq->lock will fully serialize against the stores.
|
||||
*
|
||||
* If we observe the new CPU in task_rq_lock, the acquire will
|
||||
@ -333,7 +307,7 @@ void hrtick_start(struct rq *rq, u64 delay)
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
static void init_rq_hrtick(struct rq *rq)
|
||||
static void hrtick_rq_init(struct rq *rq)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
rq->hrtick_csd_pending = 0;
|
||||
@ -351,7 +325,7 @@ static inline void hrtick_clear(struct rq *rq)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void init_rq_hrtick(struct rq *rq)
|
||||
static inline void hrtick_rq_init(struct rq *rq)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_SCHED_HRTICK */
|
||||
@ -609,7 +583,7 @@ static inline bool got_nohz_idle_kick(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
if (!test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)))
|
||||
if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK))
|
||||
return false;
|
||||
|
||||
if (idle_cpu(cpu) && !need_resched())
|
||||
@ -619,7 +593,7 @@ static inline bool got_nohz_idle_kick(void)
|
||||
* We can't run Idle Load Balance on this CPU for this time so we
|
||||
* cancel it and clear NOHZ_BALANCE_KICK
|
||||
*/
|
||||
clear_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu));
|
||||
atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1457,7 +1431,7 @@ EXPORT_SYMBOL_GPL(kick_process);
|
||||
*
|
||||
* - cpu_active must be a subset of cpu_online
|
||||
*
|
||||
* - on cpu-up we allow per-cpu kthreads on the online && !active cpu,
|
||||
* - on CPU-up we allow per-CPU kthreads on the online && !active CPU,
|
||||
* see __set_cpus_allowed_ptr(). At this point the newly online
|
||||
* CPU isn't yet part of the sched domains, and balancing will not
|
||||
* see it.
|
||||
@ -2488,17 +2462,17 @@ void wake_up_new_task(struct task_struct *p)
|
||||
|
||||
#ifdef CONFIG_PREEMPT_NOTIFIERS
|
||||
|
||||
static struct static_key preempt_notifier_key = STATIC_KEY_INIT_FALSE;
|
||||
static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key);
|
||||
|
||||
void preempt_notifier_inc(void)
|
||||
{
|
||||
static_key_slow_inc(&preempt_notifier_key);
|
||||
static_branch_inc(&preempt_notifier_key);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(preempt_notifier_inc);
|
||||
|
||||
void preempt_notifier_dec(void)
|
||||
{
|
||||
static_key_slow_dec(&preempt_notifier_key);
|
||||
static_branch_dec(&preempt_notifier_key);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(preempt_notifier_dec);
|
||||
|
||||
@ -2508,7 +2482,7 @@ EXPORT_SYMBOL_GPL(preempt_notifier_dec);
|
||||
*/
|
||||
void preempt_notifier_register(struct preempt_notifier *notifier)
|
||||
{
|
||||
if (!static_key_false(&preempt_notifier_key))
|
||||
if (!static_branch_unlikely(&preempt_notifier_key))
|
||||
WARN(1, "registering preempt_notifier while notifiers disabled\n");
|
||||
|
||||
hlist_add_head(¬ifier->link, ¤t->preempt_notifiers);
|
||||
@ -2537,7 +2511,7 @@ static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
|
||||
|
||||
static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
|
||||
{
|
||||
if (static_key_false(&preempt_notifier_key))
|
||||
if (static_branch_unlikely(&preempt_notifier_key))
|
||||
__fire_sched_in_preempt_notifiers(curr);
|
||||
}
|
||||
|
||||
@ -2555,7 +2529,7 @@ static __always_inline void
|
||||
fire_sched_out_preempt_notifiers(struct task_struct *curr,
|
||||
struct task_struct *next)
|
||||
{
|
||||
if (static_key_false(&preempt_notifier_key))
|
||||
if (static_branch_unlikely(&preempt_notifier_key))
|
||||
__fire_sched_out_preempt_notifiers(curr, next);
|
||||
}
|
||||
|
||||
@ -2629,6 +2603,18 @@ static inline void finish_lock_switch(struct rq *rq)
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* NOP if the arch has not defined these:
|
||||
*/
|
||||
|
||||
#ifndef prepare_arch_switch
|
||||
# define prepare_arch_switch(next) do { } while (0)
|
||||
#endif
|
||||
|
||||
#ifndef finish_arch_post_lock_switch
|
||||
# define finish_arch_post_lock_switch() do { } while (0)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* prepare_task_switch - prepare to switch tasks
|
||||
* @rq: the runqueue preparing to switch
|
||||
@ -3037,7 +3023,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
|
||||
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
|
||||
/*
|
||||
* 64-bit doesn't need locks to atomically read a 64bit value.
|
||||
* 64-bit doesn't need locks to atomically read a 64-bit value.
|
||||
* So we have a optimization chance when the task's delta_exec is 0.
|
||||
* Reading ->on_cpu is racy, but this is ok.
|
||||
*
|
||||
@ -3096,35 +3082,99 @@ void scheduler_tick(void)
|
||||
rq->idle_balance = idle_cpu(cpu);
|
||||
trigger_load_balance(rq);
|
||||
#endif
|
||||
rq_last_tick_reset(rq);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
/**
|
||||
* scheduler_tick_max_deferment
|
||||
*
|
||||
* Keep at least one tick per second when a single
|
||||
* active task is running because the scheduler doesn't
|
||||
* yet completely support full dynticks environment.
|
||||
*
|
||||
* This makes sure that uptime, CFS vruntime, load
|
||||
* balancing, etc... continue to move forward, even
|
||||
* with a very low granularity.
|
||||
*
|
||||
* Return: Maximum deferment in nanoseconds.
|
||||
*/
|
||||
u64 scheduler_tick_max_deferment(void)
|
||||
|
||||
struct tick_work {
|
||||
int cpu;
|
||||
struct delayed_work work;
|
||||
};
|
||||
|
||||
static struct tick_work __percpu *tick_work_cpu;
|
||||
|
||||
static void sched_tick_remote(struct work_struct *work)
|
||||
{
|
||||
struct rq *rq = this_rq();
|
||||
unsigned long next, now = READ_ONCE(jiffies);
|
||||
struct delayed_work *dwork = to_delayed_work(work);
|
||||
struct tick_work *twork = container_of(dwork, struct tick_work, work);
|
||||
int cpu = twork->cpu;
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
struct rq_flags rf;
|
||||
|
||||
next = rq->last_sched_tick + HZ;
|
||||
/*
|
||||
* Handle the tick only if it appears the remote CPU is running in full
|
||||
* dynticks mode. The check is racy by nature, but missing a tick or
|
||||
* having one too much is no big deal because the scheduler tick updates
|
||||
* statistics and checks timeslices in a time-independent way, regardless
|
||||
* of when exactly it is running.
|
||||
*/
|
||||
if (!idle_cpu(cpu) && tick_nohz_tick_stopped_cpu(cpu)) {
|
||||
struct task_struct *curr;
|
||||
u64 delta;
|
||||
|
||||
if (time_before_eq(next, now))
|
||||
return 0;
|
||||
rq_lock_irq(rq, &rf);
|
||||
update_rq_clock(rq);
|
||||
curr = rq->curr;
|
||||
delta = rq_clock_task(rq) - curr->se.exec_start;
|
||||
|
||||
return jiffies_to_nsecs(next - now);
|
||||
/*
|
||||
* Make sure the next tick runs within a reasonable
|
||||
* amount of time.
|
||||
*/
|
||||
WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
|
||||
curr->sched_class->task_tick(rq, curr, 0);
|
||||
rq_unlock_irq(rq, &rf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Run the remote tick once per second (1Hz). This arbitrary
|
||||
* frequency is large enough to avoid overload but short enough
|
||||
* to keep scheduler internal stats reasonably up to date.
|
||||
*/
|
||||
queue_delayed_work(system_unbound_wq, dwork, HZ);
|
||||
}
|
||||
|
||||
static void sched_tick_start(int cpu)
|
||||
{
|
||||
struct tick_work *twork;
|
||||
|
||||
if (housekeeping_cpu(cpu, HK_FLAG_TICK))
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!tick_work_cpu);
|
||||
|
||||
twork = per_cpu_ptr(tick_work_cpu, cpu);
|
||||
twork->cpu = cpu;
|
||||
INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
|
||||
queue_delayed_work(system_unbound_wq, &twork->work, HZ);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static void sched_tick_stop(int cpu)
|
||||
{
|
||||
struct tick_work *twork;
|
||||
|
||||
if (housekeeping_cpu(cpu, HK_FLAG_TICK))
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!tick_work_cpu);
|
||||
|
||||
twork = per_cpu_ptr(tick_work_cpu, cpu);
|
||||
cancel_delayed_work_sync(&twork->work);
|
||||
}
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
int __init sched_tick_offload_init(void)
|
||||
{
|
||||
tick_work_cpu = alloc_percpu(struct tick_work);
|
||||
BUG_ON(!tick_work_cpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else /* !CONFIG_NO_HZ_FULL */
|
||||
static inline void sched_tick_start(int cpu) { }
|
||||
static inline void sched_tick_stop(int cpu) { }
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
|
||||
@ -5786,6 +5836,7 @@ int sched_cpu_starting(unsigned int cpu)
|
||||
{
|
||||
set_cpu_rq_start_time(cpu);
|
||||
sched_rq_cpu_starting(cpu);
|
||||
sched_tick_start(cpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -5797,6 +5848,7 @@ int sched_cpu_dying(unsigned int cpu)
|
||||
|
||||
/* Handle pending wakeups and then migrate everything off */
|
||||
sched_ttwu_pending();
|
||||
sched_tick_stop(cpu);
|
||||
|
||||
rq_lock_irqsave(rq, &rf);
|
||||
if (rq->rd) {
|
||||
@ -5809,7 +5861,7 @@ int sched_cpu_dying(unsigned int cpu)
|
||||
|
||||
calc_load_migrate(rq);
|
||||
update_max_interval();
|
||||
nohz_balance_exit_idle(cpu);
|
||||
nohz_balance_exit_idle(rq);
|
||||
hrtick_clear(rq);
|
||||
return 0;
|
||||
}
|
||||
@ -6022,13 +6074,11 @@ void __init sched_init(void)
|
||||
rq_attach_root(rq, &def_root_domain);
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
rq->last_load_update_tick = jiffies;
|
||||
rq->nohz_flags = 0;
|
||||
#endif
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
rq->last_sched_tick = 0;
|
||||
rq->last_blocked_load_update_tick = jiffies;
|
||||
atomic_set(&rq->nohz_flags, 0);
|
||||
#endif
|
||||
#endif /* CONFIG_SMP */
|
||||
init_rq_hrtick(rq);
|
||||
hrtick_rq_init(rq);
|
||||
atomic_set(&rq->nr_iowait, 0);
|
||||
}
|
||||
|
||||
@ -7027,3 +7077,5 @@ const u32 sched_prio_to_wmult[40] = {
|
||||
/* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717,
|
||||
/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
|
||||
};
|
||||
|
||||
#undef CREATE_TRACE_POINTS
|
||||
|
@ -1,24 +1,13 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/err.h>
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
/*
|
||||
* CPU accounting code for task groups.
|
||||
*
|
||||
* Based on the work by Paul Menage (menage@google.com) and Balbir Singh
|
||||
* (balbir@in.ibm.com).
|
||||
*/
|
||||
#include "sched.h"
|
||||
|
||||
/* Time spent by the tasks of the cpu accounting group executing in ... */
|
||||
/* Time spent by the tasks of the CPU accounting group executing in ... */
|
||||
enum cpuacct_stat_index {
|
||||
CPUACCT_STAT_USER, /* ... user mode */
|
||||
CPUACCT_STAT_SYSTEM, /* ... kernel mode */
|
||||
@ -35,12 +24,12 @@ struct cpuacct_usage {
|
||||
u64 usages[CPUACCT_STAT_NSTATS];
|
||||
};
|
||||
|
||||
/* track cpu usage of a group of tasks and its child groups */
|
||||
/* track CPU usage of a group of tasks and its child groups */
|
||||
struct cpuacct {
|
||||
struct cgroup_subsys_state css;
|
||||
/* cpuusage holds pointer to a u64-type object on every cpu */
|
||||
struct cpuacct_usage __percpu *cpuusage;
|
||||
struct kernel_cpustat __percpu *cpustat;
|
||||
struct cgroup_subsys_state css;
|
||||
/* cpuusage holds pointer to a u64-type object on every CPU */
|
||||
struct cpuacct_usage __percpu *cpuusage;
|
||||
struct kernel_cpustat __percpu *cpustat;
|
||||
};
|
||||
|
||||
static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
|
||||
@ -48,7 +37,7 @@ static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
|
||||
return css ? container_of(css, struct cpuacct, css) : NULL;
|
||||
}
|
||||
|
||||
/* return cpu accounting group to which this task belongs */
|
||||
/* Return CPU accounting group to which this task belongs */
|
||||
static inline struct cpuacct *task_ca(struct task_struct *tsk)
|
||||
{
|
||||
return css_ca(task_css(tsk, cpuacct_cgrp_id));
|
||||
@ -65,7 +54,7 @@ static struct cpuacct root_cpuacct = {
|
||||
.cpuusage = &root_cpuacct_cpuusage,
|
||||
};
|
||||
|
||||
/* create a new cpu accounting group */
|
||||
/* Create a new CPU accounting group */
|
||||
static struct cgroup_subsys_state *
|
||||
cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||
{
|
||||
@ -96,7 +85,7 @@ out:
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
/* destroy an existing cpu accounting group */
|
||||
/* Destroy an existing CPU accounting group */
|
||||
static void cpuacct_css_free(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct cpuacct *ca = css_ca(css);
|
||||
@ -162,7 +151,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
|
||||
#endif
|
||||
}
|
||||
|
||||
/* return total cpu usage (in nanoseconds) of a group */
|
||||
/* Return total CPU usage (in nanoseconds) of a group */
|
||||
static u64 __cpuusage_read(struct cgroup_subsys_state *css,
|
||||
enum cpuacct_stat_index index)
|
||||
{
|
||||
|
@ -10,11 +10,7 @@
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*/
|
||||
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
#include "cpudeadline.h"
|
||||
#include "sched.h"
|
||||
|
||||
static inline int parent(int i)
|
||||
{
|
||||
@ -42,8 +38,9 @@ static void cpudl_heapify_down(struct cpudl *cp, int idx)
|
||||
return;
|
||||
|
||||
/* adapted from lib/prio_heap.c */
|
||||
while(1) {
|
||||
while (1) {
|
||||
u64 largest_dl;
|
||||
|
||||
l = left_child(idx);
|
||||
r = right_child(idx);
|
||||
largest = idx;
|
||||
@ -131,6 +128,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
|
||||
return 1;
|
||||
} else {
|
||||
int best_cpu = cpudl_maximum(cp);
|
||||
|
||||
WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));
|
||||
|
||||
if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) &&
|
||||
@ -145,9 +143,9 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
|
||||
}
|
||||
|
||||
/*
|
||||
* cpudl_clear - remove a cpu from the cpudl max-heap
|
||||
* cpudl_clear - remove a CPU from the cpudl max-heap
|
||||
* @cp: the cpudl max-heap context
|
||||
* @cpu: the target cpu
|
||||
* @cpu: the target CPU
|
||||
*
|
||||
* Notes: assumes cpu_rq(cpu)->lock is locked
|
||||
*
|
||||
@ -186,8 +184,8 @@ void cpudl_clear(struct cpudl *cp, int cpu)
|
||||
/*
|
||||
* cpudl_set - update the cpudl max-heap
|
||||
* @cp: the cpudl max-heap context
|
||||
* @cpu: the target cpu
|
||||
* @dl: the new earliest deadline for this cpu
|
||||
* @cpu: the target CPU
|
||||
* @dl: the new earliest deadline for this CPU
|
||||
*
|
||||
* Notes: assumes cpu_rq(cpu)->lock is locked
|
||||
*
|
||||
@ -205,6 +203,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl)
|
||||
old_idx = cp->elements[cpu].idx;
|
||||
if (old_idx == IDX_INVALID) {
|
||||
int new_idx = cp->size++;
|
||||
|
||||
cp->elements[new_idx].dl = dl;
|
||||
cp->elements[new_idx].cpu = cpu;
|
||||
cp->elements[cpu].idx = new_idx;
|
||||
@ -221,7 +220,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl)
|
||||
/*
|
||||
* cpudl_set_freecpu - Set the cpudl.free_cpus
|
||||
* @cp: the cpudl max-heap context
|
||||
* @cpu: rd attached cpu
|
||||
* @cpu: rd attached CPU
|
||||
*/
|
||||
void cpudl_set_freecpu(struct cpudl *cp, int cpu)
|
||||
{
|
||||
@ -231,7 +230,7 @@ void cpudl_set_freecpu(struct cpudl *cp, int cpu)
|
||||
/*
|
||||
* cpudl_clear_freecpu - Clear the cpudl.free_cpus
|
||||
* @cp: the cpudl max-heap context
|
||||
* @cpu: rd attached cpu
|
||||
* @cpu: rd attached CPU
|
||||
*/
|
||||
void cpudl_clear_freecpu(struct cpudl *cp, int cpu)
|
||||
{
|
||||
|
@ -1,35 +1,26 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_CPUDL_H
|
||||
#define _LINUX_CPUDL_H
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/deadline.h>
|
||||
|
||||
#define IDX_INVALID -1
|
||||
#define IDX_INVALID -1
|
||||
|
||||
struct cpudl_item {
|
||||
u64 dl;
|
||||
int cpu;
|
||||
int idx;
|
||||
u64 dl;
|
||||
int cpu;
|
||||
int idx;
|
||||
};
|
||||
|
||||
struct cpudl {
|
||||
raw_spinlock_t lock;
|
||||
int size;
|
||||
cpumask_var_t free_cpus;
|
||||
struct cpudl_item *elements;
|
||||
raw_spinlock_t lock;
|
||||
int size;
|
||||
cpumask_var_t free_cpus;
|
||||
struct cpudl_item *elements;
|
||||
};
|
||||
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
int cpudl_find(struct cpudl *cp, struct task_struct *p,
|
||||
struct cpumask *later_mask);
|
||||
int cpudl_find(struct cpudl *cp, struct task_struct *p, struct cpumask *later_mask);
|
||||
void cpudl_set(struct cpudl *cp, int cpu, u64 dl);
|
||||
void cpudl_clear(struct cpudl *cp, int cpu);
|
||||
int cpudl_init(struct cpudl *cp);
|
||||
int cpudl_init(struct cpudl *cp);
|
||||
void cpudl_set_freecpu(struct cpudl *cp, int cpu);
|
||||
void cpudl_clear_freecpu(struct cpudl *cp, int cpu);
|
||||
void cpudl_cleanup(struct cpudl *cp);
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#endif /* _LINUX_CPUDL_H */
|
||||
|
@ -8,7 +8,6 @@
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
|
||||
|
@ -11,61 +11,56 @@
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <uapi/linux/sched/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <trace/events/power.h>
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
#include <trace/events/power.h>
|
||||
|
||||
struct sugov_tunables {
|
||||
struct gov_attr_set attr_set;
|
||||
unsigned int rate_limit_us;
|
||||
struct gov_attr_set attr_set;
|
||||
unsigned int rate_limit_us;
|
||||
};
|
||||
|
||||
struct sugov_policy {
|
||||
struct cpufreq_policy *policy;
|
||||
struct cpufreq_policy *policy;
|
||||
|
||||
struct sugov_tunables *tunables;
|
||||
struct list_head tunables_hook;
|
||||
struct sugov_tunables *tunables;
|
||||
struct list_head tunables_hook;
|
||||
|
||||
raw_spinlock_t update_lock; /* For shared policies */
|
||||
u64 last_freq_update_time;
|
||||
s64 freq_update_delay_ns;
|
||||
unsigned int next_freq;
|
||||
unsigned int cached_raw_freq;
|
||||
raw_spinlock_t update_lock; /* For shared policies */
|
||||
u64 last_freq_update_time;
|
||||
s64 freq_update_delay_ns;
|
||||
unsigned int next_freq;
|
||||
unsigned int cached_raw_freq;
|
||||
|
||||
/* The next fields are only needed if fast switch cannot be used. */
|
||||
struct irq_work irq_work;
|
||||
struct kthread_work work;
|
||||
struct mutex work_lock;
|
||||
struct kthread_worker worker;
|
||||
struct task_struct *thread;
|
||||
bool work_in_progress;
|
||||
/* The next fields are only needed if fast switch cannot be used: */
|
||||
struct irq_work irq_work;
|
||||
struct kthread_work work;
|
||||
struct mutex work_lock;
|
||||
struct kthread_worker worker;
|
||||
struct task_struct *thread;
|
||||
bool work_in_progress;
|
||||
|
||||
bool need_freq_update;
|
||||
bool need_freq_update;
|
||||
};
|
||||
|
||||
struct sugov_cpu {
|
||||
struct update_util_data update_util;
|
||||
struct sugov_policy *sg_policy;
|
||||
unsigned int cpu;
|
||||
struct update_util_data update_util;
|
||||
struct sugov_policy *sg_policy;
|
||||
unsigned int cpu;
|
||||
|
||||
bool iowait_boost_pending;
|
||||
unsigned int iowait_boost;
|
||||
unsigned int iowait_boost_max;
|
||||
bool iowait_boost_pending;
|
||||
unsigned int iowait_boost;
|
||||
unsigned int iowait_boost_max;
|
||||
u64 last_update;
|
||||
|
||||
/* The fields below are only needed when sharing a policy. */
|
||||
unsigned long util_cfs;
|
||||
unsigned long util_dl;
|
||||
unsigned long max;
|
||||
unsigned int flags;
|
||||
/* The fields below are only needed when sharing a policy: */
|
||||
unsigned long util_cfs;
|
||||
unsigned long util_dl;
|
||||
unsigned long max;
|
||||
|
||||
/* The field below is for single-CPU policies only. */
|
||||
/* The field below is for single-CPU policies only: */
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
unsigned long saved_idle_calls;
|
||||
unsigned long saved_idle_calls;
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -79,9 +74,9 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
|
||||
|
||||
/*
|
||||
* Since cpufreq_update_util() is called with rq->lock held for
|
||||
* the @target_cpu, our per-cpu data is fully serialized.
|
||||
* the @target_cpu, our per-CPU data is fully serialized.
|
||||
*
|
||||
* However, drivers cannot in general deal with cross-cpu
|
||||
* However, drivers cannot in general deal with cross-CPU
|
||||
* requests, so while get_next_freq() will work, our
|
||||
* sugov_update_commit() call may not for the fast switching platforms.
|
||||
*
|
||||
@ -111,6 +106,7 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
|
||||
}
|
||||
|
||||
delta_ns = time - sg_policy->last_freq_update_time;
|
||||
|
||||
return delta_ns >= sg_policy->freq_update_delay_ns;
|
||||
}
|
||||
|
||||
@ -186,17 +182,28 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
|
||||
|
||||
static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(sg_cpu->cpu);
|
||||
unsigned long util;
|
||||
|
||||
if (rq->rt.rt_nr_running) {
|
||||
util = sg_cpu->max;
|
||||
} else {
|
||||
util = sg_cpu->util_dl;
|
||||
if (rq->cfs.h_nr_running)
|
||||
util += sg_cpu->util_cfs;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ideally we would like to set util_dl as min/guaranteed freq and
|
||||
* util_cfs + util_dl as requested freq. However, cpufreq is not yet
|
||||
* ready for such an interface. So, we only do the latter for now.
|
||||
*/
|
||||
return min(sg_cpu->util_cfs + sg_cpu->util_dl, sg_cpu->max);
|
||||
return min(util, sg_cpu->max);
|
||||
}
|
||||
|
||||
static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time)
|
||||
static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, unsigned int flags)
|
||||
{
|
||||
if (sg_cpu->flags & SCHED_CPUFREQ_IOWAIT) {
|
||||
if (flags & SCHED_CPUFREQ_IOWAIT) {
|
||||
if (sg_cpu->iowait_boost_pending)
|
||||
return;
|
||||
|
||||
@ -260,43 +267,51 @@ static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
|
||||
static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
|
||||
#endif /* CONFIG_NO_HZ_COMMON */
|
||||
|
||||
/*
|
||||
* Make sugov_should_update_freq() ignore the rate limit when DL
|
||||
* has increased the utilization.
|
||||
*/
|
||||
static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
|
||||
{
|
||||
if (cpu_util_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->util_dl)
|
||||
sg_policy->need_freq_update = true;
|
||||
}
|
||||
|
||||
static void sugov_update_single(struct update_util_data *hook, u64 time,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
|
||||
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
|
||||
struct cpufreq_policy *policy = sg_policy->policy;
|
||||
unsigned long util, max;
|
||||
unsigned int next_f;
|
||||
bool busy;
|
||||
|
||||
sugov_set_iowait_boost(sg_cpu, time);
|
||||
sugov_set_iowait_boost(sg_cpu, time, flags);
|
||||
sg_cpu->last_update = time;
|
||||
|
||||
ignore_dl_rate_limit(sg_cpu, sg_policy);
|
||||
|
||||
if (!sugov_should_update_freq(sg_policy, time))
|
||||
return;
|
||||
|
||||
busy = sugov_cpu_is_busy(sg_cpu);
|
||||
|
||||
if (flags & SCHED_CPUFREQ_RT) {
|
||||
next_f = policy->cpuinfo.max_freq;
|
||||
} else {
|
||||
sugov_get_util(sg_cpu);
|
||||
max = sg_cpu->max;
|
||||
util = sugov_aggregate_util(sg_cpu);
|
||||
sugov_iowait_boost(sg_cpu, &util, &max);
|
||||
next_f = get_next_freq(sg_policy, util, max);
|
||||
/*
|
||||
* Do not reduce the frequency if the CPU has not been idle
|
||||
* recently, as the reduction is likely to be premature then.
|
||||
*/
|
||||
if (busy && next_f < sg_policy->next_freq) {
|
||||
next_f = sg_policy->next_freq;
|
||||
sugov_get_util(sg_cpu);
|
||||
max = sg_cpu->max;
|
||||
util = sugov_aggregate_util(sg_cpu);
|
||||
sugov_iowait_boost(sg_cpu, &util, &max);
|
||||
next_f = get_next_freq(sg_policy, util, max);
|
||||
/*
|
||||
* Do not reduce the frequency if the CPU has not been idle
|
||||
* recently, as the reduction is likely to be premature then.
|
||||
*/
|
||||
if (busy && next_f < sg_policy->next_freq) {
|
||||
next_f = sg_policy->next_freq;
|
||||
|
||||
/* Reset cached freq as next_freq has changed */
|
||||
sg_policy->cached_raw_freq = 0;
|
||||
}
|
||||
/* Reset cached freq as next_freq has changed */
|
||||
sg_policy->cached_raw_freq = 0;
|
||||
}
|
||||
|
||||
sugov_update_commit(sg_policy, time, next_f);
|
||||
}
|
||||
|
||||
@ -312,6 +327,8 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
|
||||
unsigned long j_util, j_max;
|
||||
s64 delta_ns;
|
||||
|
||||
sugov_get_util(j_sg_cpu);
|
||||
|
||||
/*
|
||||
* If the CFS CPU utilization was last updated before the
|
||||
* previous frequency update and the time elapsed between the
|
||||
@ -325,28 +342,22 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
|
||||
if (delta_ns > TICK_NSEC) {
|
||||
j_sg_cpu->iowait_boost = 0;
|
||||
j_sg_cpu->iowait_boost_pending = false;
|
||||
j_sg_cpu->util_cfs = 0;
|
||||
if (j_sg_cpu->util_dl == 0)
|
||||
continue;
|
||||
}
|
||||
if (j_sg_cpu->flags & SCHED_CPUFREQ_RT)
|
||||
return policy->cpuinfo.max_freq;
|
||||
|
||||
j_max = j_sg_cpu->max;
|
||||
j_util = sugov_aggregate_util(j_sg_cpu);
|
||||
sugov_iowait_boost(j_sg_cpu, &j_util, &j_max);
|
||||
if (j_util * max > j_max * util) {
|
||||
util = j_util;
|
||||
max = j_max;
|
||||
}
|
||||
|
||||
sugov_iowait_boost(j_sg_cpu, &util, &max);
|
||||
}
|
||||
|
||||
return get_next_freq(sg_policy, util, max);
|
||||
}
|
||||
|
||||
static void sugov_update_shared(struct update_util_data *hook, u64 time,
|
||||
unsigned int flags)
|
||||
static void
|
||||
sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
|
||||
{
|
||||
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
|
||||
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
|
||||
@ -354,18 +365,13 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,
|
||||
|
||||
raw_spin_lock(&sg_policy->update_lock);
|
||||
|
||||
sugov_get_util(sg_cpu);
|
||||
sg_cpu->flags = flags;
|
||||
|
||||
sugov_set_iowait_boost(sg_cpu, time);
|
||||
sugov_set_iowait_boost(sg_cpu, time, flags);
|
||||
sg_cpu->last_update = time;
|
||||
|
||||
if (sugov_should_update_freq(sg_policy, time)) {
|
||||
if (flags & SCHED_CPUFREQ_RT)
|
||||
next_f = sg_policy->policy->cpuinfo.max_freq;
|
||||
else
|
||||
next_f = sugov_next_freq_shared(sg_cpu, time);
|
||||
ignore_dl_rate_limit(sg_cpu, sg_policy);
|
||||
|
||||
if (sugov_should_update_freq(sg_policy, time)) {
|
||||
next_f = sugov_next_freq_shared(sg_cpu, time);
|
||||
sugov_update_commit(sg_policy, time, next_f);
|
||||
}
|
||||
|
||||
@ -423,8 +429,8 @@ static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
|
||||
return sprintf(buf, "%u\n", tunables->rate_limit_us);
|
||||
}
|
||||
|
||||
static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf,
|
||||
size_t count)
|
||||
static ssize_t
|
||||
rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
|
||||
{
|
||||
struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
|
||||
struct sugov_policy *sg_policy;
|
||||
@ -479,11 +485,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
|
||||
{
|
||||
struct task_struct *thread;
|
||||
struct sched_attr attr = {
|
||||
.size = sizeof(struct sched_attr),
|
||||
.sched_policy = SCHED_DEADLINE,
|
||||
.sched_flags = SCHED_FLAG_SUGOV,
|
||||
.sched_nice = 0,
|
||||
.sched_priority = 0,
|
||||
.size = sizeof(struct sched_attr),
|
||||
.sched_policy = SCHED_DEADLINE,
|
||||
.sched_flags = SCHED_FLAG_SUGOV,
|
||||
.sched_nice = 0,
|
||||
.sched_priority = 0,
|
||||
/*
|
||||
* Fake (unused) bandwidth; workaround to "fix"
|
||||
* priority inheritance.
|
||||
@ -663,21 +669,20 @@ static int sugov_start(struct cpufreq_policy *policy)
|
||||
struct sugov_policy *sg_policy = policy->governor_data;
|
||||
unsigned int cpu;
|
||||
|
||||
sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
|
||||
sg_policy->last_freq_update_time = 0;
|
||||
sg_policy->next_freq = UINT_MAX;
|
||||
sg_policy->work_in_progress = false;
|
||||
sg_policy->need_freq_update = false;
|
||||
sg_policy->cached_raw_freq = 0;
|
||||
sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
|
||||
sg_policy->last_freq_update_time = 0;
|
||||
sg_policy->next_freq = UINT_MAX;
|
||||
sg_policy->work_in_progress = false;
|
||||
sg_policy->need_freq_update = false;
|
||||
sg_policy->cached_raw_freq = 0;
|
||||
|
||||
for_each_cpu(cpu, policy->cpus) {
|
||||
struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
|
||||
|
||||
memset(sg_cpu, 0, sizeof(*sg_cpu));
|
||||
sg_cpu->cpu = cpu;
|
||||
sg_cpu->sg_policy = sg_policy;
|
||||
sg_cpu->flags = 0;
|
||||
sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
|
||||
sg_cpu->cpu = cpu;
|
||||
sg_cpu->sg_policy = sg_policy;
|
||||
sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
|
||||
}
|
||||
|
||||
for_each_cpu(cpu, policy->cpus) {
|
||||
@ -721,14 +726,14 @@ static void sugov_limits(struct cpufreq_policy *policy)
|
||||
}
|
||||
|
||||
static struct cpufreq_governor schedutil_gov = {
|
||||
.name = "schedutil",
|
||||
.owner = THIS_MODULE,
|
||||
.dynamic_switching = true,
|
||||
.init = sugov_init,
|
||||
.exit = sugov_exit,
|
||||
.start = sugov_start,
|
||||
.stop = sugov_stop,
|
||||
.limits = sugov_limits,
|
||||
.name = "schedutil",
|
||||
.owner = THIS_MODULE,
|
||||
.dynamic_switching = true,
|
||||
.init = sugov_init,
|
||||
.exit = sugov_exit,
|
||||
.start = sugov_start,
|
||||
.stop = sugov_stop,
|
||||
.limits = sugov_limits,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
|
||||
|
@ -14,7 +14,7 @@
|
||||
*
|
||||
* going from the lowest priority to the highest. CPUs in the INVALID state
|
||||
* are not eligible for routing. The system maintains this state with
|
||||
* a 2 dimensional bitmap (the first for priority class, the second for cpus
|
||||
* a 2 dimensional bitmap (the first for priority class, the second for CPUs
|
||||
* in that class). Therefore a typical application without affinity
|
||||
* restrictions can find a suitable CPU with O(1) complexity (e.g. two bit
|
||||
* searches). For tasks with affinity restrictions, the algorithm has a
|
||||
@ -26,12 +26,7 @@
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*/
|
||||
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/rt.h>
|
||||
#include <linux/slab.h>
|
||||
#include "cpupri.h"
|
||||
#include "sched.h"
|
||||
|
||||
/* Convert between a 140 based task->prio, and our 102 based cpupri */
|
||||
static int convert_prio(int prio)
|
||||
@ -128,9 +123,9 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
|
||||
}
|
||||
|
||||
/**
|
||||
* cpupri_set - update the cpu priority setting
|
||||
* cpupri_set - update the CPU priority setting
|
||||
* @cp: The cpupri context
|
||||
* @cpu: The target cpu
|
||||
* @cpu: The target CPU
|
||||
* @newpri: The priority (INVALID-RT99) to assign to this CPU
|
||||
*
|
||||
* Note: Assumes cpu_rq(cpu)->lock is locked
|
||||
@ -151,7 +146,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the cpu was currently mapped to a different value, we
|
||||
* If the CPU was currently mapped to a different value, we
|
||||
* need to map it to the new value then remove the old value.
|
||||
* Note, we must add the new value first, otherwise we risk the
|
||||
* cpu being missed by the priority loop in cpupri_find.
|
||||
|
@ -1,32 +1,25 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_CPUPRI_H
|
||||
#define _LINUX_CPUPRI_H
|
||||
|
||||
#include <linux/sched.h>
|
||||
|
||||
#define CPUPRI_NR_PRIORITIES (MAX_RT_PRIO + 2)
|
||||
|
||||
#define CPUPRI_INVALID -1
|
||||
#define CPUPRI_IDLE 0
|
||||
#define CPUPRI_NORMAL 1
|
||||
#define CPUPRI_INVALID -1
|
||||
#define CPUPRI_IDLE 0
|
||||
#define CPUPRI_NORMAL 1
|
||||
/* values 2-101 are RT priorities 0-99 */
|
||||
|
||||
struct cpupri_vec {
|
||||
atomic_t count;
|
||||
cpumask_var_t mask;
|
||||
atomic_t count;
|
||||
cpumask_var_t mask;
|
||||
};
|
||||
|
||||
struct cpupri {
|
||||
struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES];
|
||||
int *cpu_to_pri;
|
||||
struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES];
|
||||
int *cpu_to_pri;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
int cpupri_find(struct cpupri *cp,
|
||||
struct task_struct *p, struct cpumask *lowest_mask);
|
||||
int cpupri_find(struct cpupri *cp, struct task_struct *p, struct cpumask *lowest_mask);
|
||||
void cpupri_set(struct cpupri *cp, int cpu, int pri);
|
||||
int cpupri_init(struct cpupri *cp);
|
||||
int cpupri_init(struct cpupri *cp);
|
||||
void cpupri_cleanup(struct cpupri *cp);
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_CPUPRI_H */
|
||||
|
@ -1,10 +1,6 @@
|
||||
#include <linux/export.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/tsacct_kern.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/static_key.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/sched/cputime.h>
|
||||
/*
|
||||
* Simple CPU accounting cgroup controller
|
||||
*/
|
||||
#include "sched.h"
|
||||
|
||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||
@ -113,9 +109,9 @@ static inline void task_group_account_field(struct task_struct *p, int index,
|
||||
}
|
||||
|
||||
/*
|
||||
* Account user cpu time to a process.
|
||||
* @p: the process that the cpu time gets accounted to
|
||||
* @cputime: the cpu time spent in user space since the last update
|
||||
* Account user CPU time to a process.
|
||||
* @p: the process that the CPU time gets accounted to
|
||||
* @cputime: the CPU time spent in user space since the last update
|
||||
*/
|
||||
void account_user_time(struct task_struct *p, u64 cputime)
|
||||
{
|
||||
@ -135,9 +131,9 @@ void account_user_time(struct task_struct *p, u64 cputime)
|
||||
}
|
||||
|
||||
/*
|
||||
* Account guest cpu time to a process.
|
||||
* @p: the process that the cpu time gets accounted to
|
||||
* @cputime: the cpu time spent in virtual machine since the last update
|
||||
* Account guest CPU time to a process.
|
||||
* @p: the process that the CPU time gets accounted to
|
||||
* @cputime: the CPU time spent in virtual machine since the last update
|
||||
*/
|
||||
void account_guest_time(struct task_struct *p, u64 cputime)
|
||||
{
|
||||
@ -159,9 +155,9 @@ void account_guest_time(struct task_struct *p, u64 cputime)
|
||||
}
|
||||
|
||||
/*
|
||||
* Account system cpu time to a process and desired cpustat field
|
||||
* @p: the process that the cpu time gets accounted to
|
||||
* @cputime: the cpu time spent in kernel space since the last update
|
||||
* Account system CPU time to a process and desired cpustat field
|
||||
* @p: the process that the CPU time gets accounted to
|
||||
* @cputime: the CPU time spent in kernel space since the last update
|
||||
* @index: pointer to cpustat field that has to be updated
|
||||
*/
|
||||
void account_system_index_time(struct task_struct *p,
|
||||
@ -179,10 +175,10 @@ void account_system_index_time(struct task_struct *p,
|
||||
}
|
||||
|
||||
/*
|
||||
* Account system cpu time to a process.
|
||||
* @p: the process that the cpu time gets accounted to
|
||||
* Account system CPU time to a process.
|
||||
* @p: the process that the CPU time gets accounted to
|
||||
* @hardirq_offset: the offset to subtract from hardirq_count()
|
||||
* @cputime: the cpu time spent in kernel space since the last update
|
||||
* @cputime: the CPU time spent in kernel space since the last update
|
||||
*/
|
||||
void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime)
|
||||
{
|
||||
@ -205,7 +201,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime)
|
||||
|
||||
/*
|
||||
* Account for involuntary wait time.
|
||||
* @cputime: the cpu time spent in involuntary wait
|
||||
* @cputime: the CPU time spent in involuntary wait
|
||||
*/
|
||||
void account_steal_time(u64 cputime)
|
||||
{
|
||||
@ -216,7 +212,7 @@ void account_steal_time(u64 cputime)
|
||||
|
||||
/*
|
||||
* Account for idle time.
|
||||
* @cputime: the cpu time spent in idle wait
|
||||
* @cputime: the CPU time spent in idle wait
|
||||
*/
|
||||
void account_idle_time(u64 cputime)
|
||||
{
|
||||
@ -338,7 +334,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
|
||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||
/*
|
||||
* Account a tick to a process and cpustat
|
||||
* @p: the process that the cpu time gets accounted to
|
||||
* @p: the process that the CPU time gets accounted to
|
||||
* @user_tick: is the tick from userspace
|
||||
* @rq: the pointer to rq
|
||||
*
|
||||
@ -400,17 +396,16 @@ static void irqtime_account_idle_ticks(int ticks)
|
||||
irqtime_account_process_tick(current, 0, rq, ticks);
|
||||
}
|
||||
#else /* CONFIG_IRQ_TIME_ACCOUNTING */
|
||||
static inline void irqtime_account_idle_ticks(int ticks) {}
|
||||
static inline void irqtime_account_idle_ticks(int ticks) { }
|
||||
static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
|
||||
struct rq *rq, int nr_ticks) {}
|
||||
struct rq *rq, int nr_ticks) { }
|
||||
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
|
||||
|
||||
/*
|
||||
* Use precise platform statistics if available:
|
||||
*/
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
|
||||
|
||||
#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
|
||||
# ifndef __ARCH_HAS_VTIME_TASK_SWITCH
|
||||
void vtime_common_task_switch(struct task_struct *prev)
|
||||
{
|
||||
if (is_idle_task(prev))
|
||||
@ -421,8 +416,7 @@ void vtime_common_task_switch(struct task_struct *prev)
|
||||
vtime_flush(prev);
|
||||
arch_vtime_task_switch(prev);
|
||||
}
|
||||
#endif
|
||||
|
||||
# endif
|
||||
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
|
||||
|
||||
|
||||
@ -469,10 +463,12 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
|
||||
*ut = cputime.utime;
|
||||
*st = cputime.stime;
|
||||
}
|
||||
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
|
||||
|
||||
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE: */
|
||||
|
||||
/*
|
||||
* Account a single tick of cpu time.
|
||||
* @p: the process that the cpu time gets accounted to
|
||||
* Account a single tick of CPU time.
|
||||
* @p: the process that the CPU time gets accounted to
|
||||
* @user_tick: indicates if the tick is a user or a system tick
|
||||
*/
|
||||
void account_process_tick(struct task_struct *p, int user_tick)
|
||||
|
@ -17,9 +17,6 @@
|
||||
*/
|
||||
#include "sched.h"
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <uapi/linux/sched/types.h>
|
||||
|
||||
struct dl_bandwidth def_dl_bandwidth;
|
||||
|
||||
static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
|
||||
@ -87,7 +84,7 @@ void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
|
||||
SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */
|
||||
SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
|
||||
/* kick cpufreq (see the comment in kernel/sched/sched.h). */
|
||||
cpufreq_update_util(rq_of_dl_rq(dl_rq), SCHED_CPUFREQ_DL);
|
||||
cpufreq_update_util(rq_of_dl_rq(dl_rq), 0);
|
||||
}
|
||||
|
||||
static inline
|
||||
@ -101,7 +98,7 @@ void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
|
||||
if (dl_rq->running_bw > old)
|
||||
dl_rq->running_bw = 0;
|
||||
/* kick cpufreq (see the comment in kernel/sched/sched.h). */
|
||||
cpufreq_update_util(rq_of_dl_rq(dl_rq), SCHED_CPUFREQ_DL);
|
||||
cpufreq_update_util(rq_of_dl_rq(dl_rq), 0);
|
||||
}
|
||||
|
||||
static inline
|
||||
@ -514,7 +511,7 @@ static DEFINE_PER_CPU(struct callback_head, dl_pull_head);
|
||||
static void push_dl_tasks(struct rq *);
|
||||
static void pull_dl_task(struct rq *);
|
||||
|
||||
static inline void queue_push_tasks(struct rq *rq)
|
||||
static inline void deadline_queue_push_tasks(struct rq *rq)
|
||||
{
|
||||
if (!has_pushable_dl_tasks(rq))
|
||||
return;
|
||||
@ -522,7 +519,7 @@ static inline void queue_push_tasks(struct rq *rq)
|
||||
queue_balance_callback(rq, &per_cpu(dl_push_head, rq->cpu), push_dl_tasks);
|
||||
}
|
||||
|
||||
static inline void queue_pull_task(struct rq *rq)
|
||||
static inline void deadline_queue_pull_task(struct rq *rq)
|
||||
{
|
||||
queue_balance_callback(rq, &per_cpu(dl_pull_head, rq->cpu), pull_dl_task);
|
||||
}
|
||||
@ -539,12 +536,12 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
|
||||
|
||||
/*
|
||||
* If we cannot preempt any rq, fall back to pick any
|
||||
* online cpu.
|
||||
* online CPU:
|
||||
*/
|
||||
cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
|
||||
if (cpu >= nr_cpu_ids) {
|
||||
/*
|
||||
* Fail to find any suitable cpu.
|
||||
* Failed to find any suitable CPU.
|
||||
* The task will never come back!
|
||||
*/
|
||||
BUG_ON(dl_bandwidth_enabled());
|
||||
@ -597,19 +594,18 @@ static inline void pull_dl_task(struct rq *rq)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void queue_push_tasks(struct rq *rq)
|
||||
static inline void deadline_queue_push_tasks(struct rq *rq)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void queue_pull_task(struct rq *rq)
|
||||
static inline void deadline_queue_pull_task(struct rq *rq)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);
|
||||
static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);
|
||||
static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
|
||||
int flags);
|
||||
static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, int flags);
|
||||
|
||||
/*
|
||||
* We are being explicitly informed that a new instance is starting,
|
||||
@ -1763,7 +1759,7 @@ pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
if (hrtick_enabled(rq))
|
||||
start_hrtick_dl(rq, p);
|
||||
|
||||
queue_push_tasks(rq);
|
||||
deadline_queue_push_tasks(rq);
|
||||
|
||||
return p;
|
||||
}
|
||||
@ -1776,6 +1772,14 @@ static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
|
||||
enqueue_pushable_dl_task(rq, p);
|
||||
}
|
||||
|
||||
/*
|
||||
* scheduler tick hitting a task of our scheduling class.
|
||||
*
|
||||
* NOTE: This function can be called remotely by the tick offload that
|
||||
* goes along full dynticks. Therefore no local assumption can be made
|
||||
* and everything must be accessed through the @rq and @curr passed in
|
||||
* parameters.
|
||||
*/
|
||||
static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)
|
||||
{
|
||||
update_curr_dl(rq);
|
||||
@ -1865,7 +1869,7 @@ static int find_later_rq(struct task_struct *task)
|
||||
|
||||
/*
|
||||
* We have to consider system topology and task affinity
|
||||
* first, then we can look for a suitable cpu.
|
||||
* first, then we can look for a suitable CPU.
|
||||
*/
|
||||
if (!cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask))
|
||||
return -1;
|
||||
@ -1879,7 +1883,7 @@ static int find_later_rq(struct task_struct *task)
|
||||
* Now we check how well this matches with task's
|
||||
* affinity and system topology.
|
||||
*
|
||||
* The last cpu where the task run is our first
|
||||
* The last CPU where the task run is our first
|
||||
* guess, since it is most likely cache-hot there.
|
||||
*/
|
||||
if (cpumask_test_cpu(cpu, later_mask))
|
||||
@ -1909,9 +1913,9 @@ static int find_later_rq(struct task_struct *task)
|
||||
best_cpu = cpumask_first_and(later_mask,
|
||||
sched_domain_span(sd));
|
||||
/*
|
||||
* Last chance: if a cpu being in both later_mask
|
||||
* Last chance: if a CPU being in both later_mask
|
||||
* and current sd span is valid, that becomes our
|
||||
* choice. Of course, the latest possible cpu is
|
||||
* choice. Of course, the latest possible CPU is
|
||||
* already under consideration through later_mask.
|
||||
*/
|
||||
if (best_cpu < nr_cpu_ids) {
|
||||
@ -2067,7 +2071,7 @@ retry:
|
||||
if (task == next_task) {
|
||||
/*
|
||||
* The task is still there. We don't try
|
||||
* again, some other cpu will pull it when ready.
|
||||
* again, some other CPU will pull it when ready.
|
||||
*/
|
||||
goto out;
|
||||
}
|
||||
@ -2300,12 +2304,12 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
|
||||
/*
|
||||
* Since this might be the only -deadline task on the rq,
|
||||
* this is the right place to try to pull some other one
|
||||
* from an overloaded cpu, if any.
|
||||
* from an overloaded CPU, if any.
|
||||
*/
|
||||
if (!task_on_rq_queued(p) || rq->dl.dl_nr_running)
|
||||
return;
|
||||
|
||||
queue_pull_task(rq);
|
||||
deadline_queue_pull_task(rq);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2327,7 +2331,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
|
||||
if (rq->curr != p) {
|
||||
#ifdef CONFIG_SMP
|
||||
if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
|
||||
queue_push_tasks(rq);
|
||||
deadline_queue_push_tasks(rq);
|
||||
#endif
|
||||
if (dl_task(rq->curr))
|
||||
check_preempt_curr_dl(rq, p, 0);
|
||||
@ -2352,7 +2356,7 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
|
||||
* or lowering its prio, so...
|
||||
*/
|
||||
if (!rq->dl.overloaded)
|
||||
queue_pull_task(rq);
|
||||
deadline_queue_pull_task(rq);
|
||||
|
||||
/*
|
||||
* If we now have a earlier deadline task than p,
|
||||
@ -2626,17 +2630,17 @@ void __dl_clear_params(struct task_struct *p)
|
||||
{
|
||||
struct sched_dl_entity *dl_se = &p->dl;
|
||||
|
||||
dl_se->dl_runtime = 0;
|
||||
dl_se->dl_deadline = 0;
|
||||
dl_se->dl_period = 0;
|
||||
dl_se->flags = 0;
|
||||
dl_se->dl_bw = 0;
|
||||
dl_se->dl_density = 0;
|
||||
dl_se->dl_runtime = 0;
|
||||
dl_se->dl_deadline = 0;
|
||||
dl_se->dl_period = 0;
|
||||
dl_se->flags = 0;
|
||||
dl_se->dl_bw = 0;
|
||||
dl_se->dl_density = 0;
|
||||
|
||||
dl_se->dl_throttled = 0;
|
||||
dl_se->dl_yielded = 0;
|
||||
dl_se->dl_non_contending = 0;
|
||||
dl_se->dl_overrun = 0;
|
||||
dl_se->dl_throttled = 0;
|
||||
dl_se->dl_yielded = 0;
|
||||
dl_se->dl_non_contending = 0;
|
||||
dl_se->dl_overrun = 0;
|
||||
}
|
||||
|
||||
bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
|
||||
@ -2655,21 +2659,22 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
|
||||
#ifdef CONFIG_SMP
|
||||
int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed)
|
||||
{
|
||||
unsigned int dest_cpu = cpumask_any_and(cpu_active_mask,
|
||||
cs_cpus_allowed);
|
||||
unsigned int dest_cpu;
|
||||
struct dl_bw *dl_b;
|
||||
bool overflow;
|
||||
int cpus, ret;
|
||||
unsigned long flags;
|
||||
|
||||
dest_cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
|
||||
|
||||
rcu_read_lock_sched();
|
||||
dl_b = dl_bw_of(dest_cpu);
|
||||
raw_spin_lock_irqsave(&dl_b->lock, flags);
|
||||
cpus = dl_bw_cpus(dest_cpu);
|
||||
overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
|
||||
if (overflow)
|
||||
if (overflow) {
|
||||
ret = -EBUSY;
|
||||
else {
|
||||
} else {
|
||||
/*
|
||||
* We reserve space for this task in the destination
|
||||
* root_domain, as we can't fail after this point.
|
||||
@ -2681,6 +2686,7 @@ int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allo
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
|
||||
rcu_read_unlock_sched();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2701,6 +2707,7 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
|
||||
ret = 0;
|
||||
raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
|
||||
rcu_read_unlock_sched();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2718,6 +2725,7 @@ bool dl_cpu_busy(unsigned int cpu)
|
||||
overflow = __dl_overflow(dl_b, cpus, 0, 0);
|
||||
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
|
||||
rcu_read_unlock_sched();
|
||||
|
||||
return overflow;
|
||||
}
|
||||
#endif
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* kernel/sched/debug.c
|
||||
*
|
||||
* Print the CFS rbtree
|
||||
* Print the CFS rbtree and other debugging details
|
||||
*
|
||||
* Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
|
||||
*
|
||||
@ -9,16 +9,6 @@
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
static DEFINE_SPINLOCK(sched_debug_lock);
|
||||
@ -274,34 +264,19 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd)
|
||||
if (table == NULL)
|
||||
return NULL;
|
||||
|
||||
set_table_entry(&table[0], "min_interval", &sd->min_interval,
|
||||
sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[1], "max_interval", &sd->max_interval,
|
||||
sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[9], "cache_nice_tries",
|
||||
&sd->cache_nice_tries,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[10], "flags", &sd->flags,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[11], "max_newidle_lb_cost",
|
||||
&sd->max_newidle_lb_cost,
|
||||
sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[12], "name", sd->name,
|
||||
CORENAME_MAX_SIZE, 0444, proc_dostring, false);
|
||||
set_table_entry(&table[0] , "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[1] , "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[2] , "busy_idx", &sd->busy_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
|
||||
set_table_entry(&table[3] , "idle_idx", &sd->idle_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
|
||||
set_table_entry(&table[4] , "newidle_idx", &sd->newidle_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
|
||||
set_table_entry(&table[5] , "wake_idx", &sd->wake_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
|
||||
set_table_entry(&table[6] , "forkexec_idx", &sd->forkexec_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
|
||||
set_table_entry(&table[7] , "busy_factor", &sd->busy_factor, sizeof(int) , 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[8] , "imbalance_pct", &sd->imbalance_pct, sizeof(int) , 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[9] , "cache_nice_tries", &sd->cache_nice_tries, sizeof(int) , 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[10], "flags", &sd->flags, sizeof(int) , 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[11], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[12], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring, false);
|
||||
/* &table[13] is terminator */
|
||||
|
||||
return table;
|
||||
@ -332,8 +307,8 @@ static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
|
||||
return table;
|
||||
}
|
||||
|
||||
static cpumask_var_t sd_sysctl_cpus;
|
||||
static struct ctl_table_header *sd_sysctl_header;
|
||||
static cpumask_var_t sd_sysctl_cpus;
|
||||
static struct ctl_table_header *sd_sysctl_header;
|
||||
|
||||
void register_sched_domain_sysctl(void)
|
||||
{
|
||||
@ -413,14 +388,10 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
|
||||
{
|
||||
struct sched_entity *se = tg->se[cpu];
|
||||
|
||||
#define P(F) \
|
||||
SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
|
||||
#define P_SCHEDSTAT(F) \
|
||||
SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)schedstat_val(F))
|
||||
#define PN(F) \
|
||||
SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
|
||||
#define PN_SCHEDSTAT(F) \
|
||||
SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
|
||||
#define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
|
||||
#define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)schedstat_val(F))
|
||||
#define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
|
||||
#define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
|
||||
|
||||
if (!se)
|
||||
return;
|
||||
@ -428,6 +399,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
|
||||
PN(se->exec_start);
|
||||
PN(se->vruntime);
|
||||
PN(se->sum_exec_runtime);
|
||||
|
||||
if (schedstat_enabled()) {
|
||||
PN_SCHEDSTAT(se->statistics.wait_start);
|
||||
PN_SCHEDSTAT(se->statistics.sleep_start);
|
||||
@ -440,6 +412,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
|
||||
PN_SCHEDSTAT(se->statistics.wait_sum);
|
||||
P_SCHEDSTAT(se->statistics.wait_count);
|
||||
}
|
||||
|
||||
P(se->load.weight);
|
||||
P(se->runnable_weight);
|
||||
#ifdef CONFIG_SMP
|
||||
@ -464,6 +437,7 @@ static char *task_group_path(struct task_group *tg)
|
||||
return group_path;
|
||||
|
||||
cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
|
||||
|
||||
return group_path;
|
||||
}
|
||||
#endif
|
||||
@ -569,6 +543,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
||||
cfs_rq->avg.runnable_load_avg);
|
||||
SEQ_printf(m, " .%-30s: %lu\n", "util_avg",
|
||||
cfs_rq->avg.util_avg);
|
||||
SEQ_printf(m, " .%-30s: %u\n", "util_est_enqueued",
|
||||
cfs_rq->avg.util_est.enqueued);
|
||||
SEQ_printf(m, " .%-30s: %ld\n", "removed.load_avg",
|
||||
cfs_rq->removed.load_avg);
|
||||
SEQ_printf(m, " .%-30s: %ld\n", "removed.util_avg",
|
||||
@ -804,9 +780,9 @@ void sysrq_sched_debug_show(void)
|
||||
/*
|
||||
* This itererator needs some explanation.
|
||||
* It returns 1 for the header position.
|
||||
* This means 2 is cpu 0.
|
||||
* In a hotplugged system some cpus, including cpu 0, may be missing so we have
|
||||
* to use cpumask_* to iterate over the cpus.
|
||||
* This means 2 is CPU 0.
|
||||
* In a hotplugged system some CPUs, including CPU 0, may be missing so we have
|
||||
* to use cpumask_* to iterate over the CPUs.
|
||||
*/
|
||||
static void *sched_debug_start(struct seq_file *file, loff_t *offset)
|
||||
{
|
||||
@ -826,6 +802,7 @@ static void *sched_debug_start(struct seq_file *file, loff_t *offset)
|
||||
|
||||
if (n < nr_cpu_ids)
|
||||
return (void *)(unsigned long)(n + 2);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -840,10 +817,10 @@ static void sched_debug_stop(struct seq_file *file, void *data)
|
||||
}
|
||||
|
||||
static const struct seq_operations sched_debug_sops = {
|
||||
.start = sched_debug_start,
|
||||
.next = sched_debug_next,
|
||||
.stop = sched_debug_stop,
|
||||
.show = sched_debug_show,
|
||||
.start = sched_debug_start,
|
||||
.next = sched_debug_next,
|
||||
.stop = sched_debug_stop,
|
||||
.show = sched_debug_show,
|
||||
};
|
||||
|
||||
static int sched_debug_release(struct inode *inode, struct file *file)
|
||||
@ -881,14 +858,10 @@ static int __init init_sched_debug_procfs(void)
|
||||
|
||||
__initcall(init_sched_debug_procfs);
|
||||
|
||||
#define __P(F) \
|
||||
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
|
||||
#define P(F) \
|
||||
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
|
||||
#define __PN(F) \
|
||||
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
|
||||
#define PN(F) \
|
||||
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
|
||||
#define __P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
|
||||
#define P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
|
||||
#define __PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
|
||||
#define PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
|
||||
|
||||
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
@ -1023,6 +996,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
|
||||
P(se.avg.runnable_load_avg);
|
||||
P(se.avg.util_avg);
|
||||
P(se.avg.last_update_time);
|
||||
P(se.avg.util_est.ewma);
|
||||
P(se.avg.util_est.enqueued);
|
||||
#endif
|
||||
P(policy);
|
||||
P(prio);
|
||||
|
1417
kernel/sched/fair.c
1417
kernel/sched/fair.c
File diff suppressed because it is too large
Load Diff
@ -85,3 +85,8 @@ SCHED_FEAT(ATTACH_AGE_LOAD, true)
|
||||
SCHED_FEAT(WA_IDLE, true)
|
||||
SCHED_FEAT(WA_WEIGHT, true)
|
||||
SCHED_FEAT(WA_BIAS, true)
|
||||
|
||||
/*
|
||||
* UtilEstimation. Use estimated CPU utilization.
|
||||
*/
|
||||
SCHED_FEAT(UTIL_EST, true)
|
||||
|
@ -1,23 +1,14 @@
|
||||
/*
|
||||
* Generic entry point for the idle threads
|
||||
* Generic entry points for the idle threads and
|
||||
* implementation of the idle task scheduling class.
|
||||
*
|
||||
* (NOTE: these are not related to SCHED_IDLE batch scheduled
|
||||
* tasks which are handled in sched/fair.c )
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/idle.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/cpuidle.h>
|
||||
#include <linux/cpuhotplug.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/stackprotector.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/livepatch.h>
|
||||
|
||||
#include <asm/tlb.h>
|
||||
#include "sched.h"
|
||||
|
||||
#include <trace/events/power.h>
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
/* Linker adds these: start and end of __cpuidle functions */
|
||||
extern char __cpuidle_text_start[], __cpuidle_text_end[];
|
||||
|
||||
@ -46,6 +37,7 @@ void cpu_idle_poll_ctrl(bool enable)
|
||||
static int __init cpu_idle_poll_setup(char *__unused)
|
||||
{
|
||||
cpu_idle_force_poll = 1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
__setup("nohlt", cpu_idle_poll_setup);
|
||||
@ -53,6 +45,7 @@ __setup("nohlt", cpu_idle_poll_setup);
|
||||
static int __init cpu_idle_nopoll_setup(char *__unused)
|
||||
{
|
||||
cpu_idle_force_poll = 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
__setup("hlt", cpu_idle_nopoll_setup);
|
||||
@ -64,12 +57,14 @@ static noinline int __cpuidle cpu_idle_poll(void)
|
||||
trace_cpu_idle_rcuidle(0, smp_processor_id());
|
||||
local_irq_enable();
|
||||
stop_critical_timings();
|
||||
|
||||
while (!tif_need_resched() &&
|
||||
(cpu_idle_force_poll || tick_check_broadcast_expired()))
|
||||
cpu_relax();
|
||||
start_critical_timings();
|
||||
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
|
||||
rcu_idle_exit();
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -332,8 +327,8 @@ void cpu_startup_entry(enum cpuhp_state state)
|
||||
{
|
||||
/*
|
||||
* This #ifdef needs to die, but it's too late in the cycle to
|
||||
* make this generic (arm and sh have never invoked the canary
|
||||
* init for the non boot cpus!). Will be fixed in 3.11
|
||||
* make this generic (ARM and SH have never invoked the canary
|
||||
* init for the non boot CPUs!). Will be fixed in 3.11
|
||||
*/
|
||||
#ifdef CONFIG_X86
|
||||
/*
|
||||
@ -350,3 +345,116 @@ void cpu_startup_entry(enum cpuhp_state state)
|
||||
while (1)
|
||||
do_idle();
|
||||
}
|
||||
|
||||
/*
|
||||
* idle-task scheduling class.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int
|
||||
select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags)
|
||||
{
|
||||
return task_cpu(p); /* IDLE tasks as never migrated */
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Idle tasks are unconditionally rescheduled:
|
||||
*/
|
||||
static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
resched_curr(rq);
|
||||
}
|
||||
|
||||
static struct task_struct *
|
||||
pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
{
|
||||
put_prev_task(rq, prev);
|
||||
update_idle_core(rq);
|
||||
schedstat_inc(rq->sched_goidle);
|
||||
|
||||
return rq->idle;
|
||||
}
|
||||
|
||||
/*
|
||||
* It is not legal to sleep in the idle task - print a warning
|
||||
* message if some code attempts to do it:
|
||||
*/
|
||||
static void
|
||||
dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
printk(KERN_ERR "bad: scheduling from the idle thread!\n");
|
||||
dump_stack();
|
||||
raw_spin_lock_irq(&rq->lock);
|
||||
}
|
||||
|
||||
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* scheduler tick hitting a task of our scheduling class.
|
||||
*
|
||||
* NOTE: This function can be called remotely by the tick offload that
|
||||
* goes along full dynticks. Therefore no local assumption can be made
|
||||
* and everything must be accessed through the @rq and @curr passed in
|
||||
* parameters.
|
||||
*/
|
||||
static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued)
|
||||
{
|
||||
}
|
||||
|
||||
static void set_curr_task_idle(struct rq *rq)
|
||||
{
|
||||
}
|
||||
|
||||
static void switched_to_idle(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
BUG();
|
||||
}
|
||||
|
||||
static void
|
||||
prio_changed_idle(struct rq *rq, struct task_struct *p, int oldprio)
|
||||
{
|
||||
BUG();
|
||||
}
|
||||
|
||||
static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void update_curr_idle(struct rq *rq)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Simple, special scheduling class for the per-CPU idle tasks:
|
||||
*/
|
||||
const struct sched_class idle_sched_class = {
|
||||
/* .next is NULL */
|
||||
/* no enqueue/yield_task for idle tasks */
|
||||
|
||||
/* dequeue is not valid, we print a debug message there: */
|
||||
.dequeue_task = dequeue_task_idle,
|
||||
|
||||
.check_preempt_curr = check_preempt_curr_idle,
|
||||
|
||||
.pick_next_task = pick_next_task_idle,
|
||||
.put_prev_task = put_prev_task_idle,
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
.select_task_rq = select_task_rq_idle,
|
||||
.set_cpus_allowed = set_cpus_allowed_common,
|
||||
#endif
|
||||
|
||||
.set_curr_task = set_curr_task_idle,
|
||||
.task_tick = task_tick_idle,
|
||||
|
||||
.get_rr_interval = get_rr_interval_idle,
|
||||
|
||||
.prio_changed = prio_changed_idle,
|
||||
.switched_to = switched_to_idle,
|
||||
.update_curr = update_curr_idle,
|
||||
};
|
||||
|
@ -1,110 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include "sched.h"
|
||||
|
||||
/*
|
||||
* idle-task scheduling class.
|
||||
*
|
||||
* (NOTE: these are not related to SCHED_IDLE tasks which are
|
||||
* handled in sched/fair.c)
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int
|
||||
select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags)
|
||||
{
|
||||
return task_cpu(p); /* IDLE tasks as never migrated */
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
* Idle tasks are unconditionally rescheduled:
|
||||
*/
|
||||
static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
resched_curr(rq);
|
||||
}
|
||||
|
||||
static struct task_struct *
|
||||
pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
{
|
||||
put_prev_task(rq, prev);
|
||||
update_idle_core(rq);
|
||||
schedstat_inc(rq->sched_goidle);
|
||||
return rq->idle;
|
||||
}
|
||||
|
||||
/*
|
||||
* It is not legal to sleep in the idle task - print a warning
|
||||
* message if some code attempts to do it:
|
||||
*/
|
||||
static void
|
||||
dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
printk(KERN_ERR "bad: scheduling from the idle thread!\n");
|
||||
dump_stack();
|
||||
raw_spin_lock_irq(&rq->lock);
|
||||
}
|
||||
|
||||
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
|
||||
{
|
||||
rq_last_tick_reset(rq);
|
||||
}
|
||||
|
||||
static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued)
|
||||
{
|
||||
}
|
||||
|
||||
static void set_curr_task_idle(struct rq *rq)
|
||||
{
|
||||
}
|
||||
|
||||
static void switched_to_idle(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
BUG();
|
||||
}
|
||||
|
||||
static void
|
||||
prio_changed_idle(struct rq *rq, struct task_struct *p, int oldprio)
|
||||
{
|
||||
BUG();
|
||||
}
|
||||
|
||||
static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void update_curr_idle(struct rq *rq)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Simple, special scheduling class for the per-CPU idle tasks:
|
||||
*/
|
||||
const struct sched_class idle_sched_class = {
|
||||
/* .next is NULL */
|
||||
/* no enqueue/yield_task for idle tasks */
|
||||
|
||||
/* dequeue is not valid, we print a debug message there: */
|
||||
.dequeue_task = dequeue_task_idle,
|
||||
|
||||
.check_preempt_curr = check_preempt_curr_idle,
|
||||
|
||||
.pick_next_task = pick_next_task_idle,
|
||||
.put_prev_task = put_prev_task_idle,
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
.select_task_rq = select_task_rq_idle,
|
||||
.set_cpus_allowed = set_cpus_allowed_common,
|
||||
#endif
|
||||
|
||||
.set_curr_task = set_curr_task_idle,
|
||||
.task_tick = task_tick_idle,
|
||||
|
||||
.get_rr_interval = get_rr_interval_idle,
|
||||
|
||||
.prio_changed = prio_changed_idle,
|
||||
.switched_to = switched_to_idle,
|
||||
.update_curr = update_curr_idle,
|
||||
};
|
@ -3,15 +3,10 @@
|
||||
* any CPU: unbound workqueues, timers, kthreads and any offloadable work.
|
||||
*
|
||||
* Copyright (C) 2017 Red Hat, Inc., Frederic Weisbecker
|
||||
* Copyright (C) 2017-2018 SUSE, Frederic Weisbecker
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/static_key.h>
|
||||
#include <linux/ctype.h>
|
||||
#include "sched.h"
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(housekeeping_overriden);
|
||||
EXPORT_SYMBOL_GPL(housekeeping_overriden);
|
||||
@ -60,6 +55,9 @@ void __init housekeeping_init(void)
|
||||
|
||||
static_branch_enable(&housekeeping_overriden);
|
||||
|
||||
if (housekeeping_flags & HK_FLAG_TICK)
|
||||
sched_tick_offload_init();
|
||||
|
||||
/* We need at least one CPU to handle housekeeping work */
|
||||
WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
|
||||
}
|
||||
@ -119,7 +117,7 @@ static int __init housekeeping_nohz_full_setup(char *str)
|
||||
{
|
||||
unsigned int flags;
|
||||
|
||||
flags = HK_FLAG_TICK | HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC;
|
||||
flags = HK_FLAG_TICK | HK_FLAG_WQ | HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC;
|
||||
|
||||
return housekeeping_setup(str, flags);
|
||||
}
|
||||
|
@ -6,10 +6,6 @@
|
||||
* figure. Its a silly number but people think its important. We go through
|
||||
* great pains to make it work on big machines and tickless kernels.
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/sched/loadavg.h>
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
/*
|
||||
@ -32,29 +28,29 @@
|
||||
* Due to a number of reasons the above turns in the mess below:
|
||||
*
|
||||
* - for_each_possible_cpu() is prohibitively expensive on machines with
|
||||
* serious number of cpus, therefore we need to take a distributed approach
|
||||
* serious number of CPUs, therefore we need to take a distributed approach
|
||||
* to calculating nr_active.
|
||||
*
|
||||
* \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0
|
||||
* = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) }
|
||||
*
|
||||
* So assuming nr_active := 0 when we start out -- true per definition, we
|
||||
* can simply take per-cpu deltas and fold those into a global accumulate
|
||||
* can simply take per-CPU deltas and fold those into a global accumulate
|
||||
* to obtain the same result. See calc_load_fold_active().
|
||||
*
|
||||
* Furthermore, in order to avoid synchronizing all per-cpu delta folding
|
||||
* Furthermore, in order to avoid synchronizing all per-CPU delta folding
|
||||
* across the machine, we assume 10 ticks is sufficient time for every
|
||||
* cpu to have completed this task.
|
||||
* CPU to have completed this task.
|
||||
*
|
||||
* This places an upper-bound on the IRQ-off latency of the machine. Then
|
||||
* again, being late doesn't loose the delta, just wrecks the sample.
|
||||
*
|
||||
* - cpu_rq()->nr_uninterruptible isn't accurately tracked per-cpu because
|
||||
* this would add another cross-cpu cacheline miss and atomic operation
|
||||
* to the wakeup path. Instead we increment on whatever cpu the task ran
|
||||
* when it went into uninterruptible state and decrement on whatever cpu
|
||||
* - cpu_rq()->nr_uninterruptible isn't accurately tracked per-CPU because
|
||||
* this would add another cross-CPU cacheline miss and atomic operation
|
||||
* to the wakeup path. Instead we increment on whatever CPU the task ran
|
||||
* when it went into uninterruptible state and decrement on whatever CPU
|
||||
* did the wakeup. This means that only the sum of nr_uninterruptible over
|
||||
* all cpus yields the correct result.
|
||||
* all CPUs yields the correct result.
|
||||
*
|
||||
* This covers the NO_HZ=n code, for extra head-aches, see the comment below.
|
||||
*/
|
||||
@ -115,11 +111,11 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
|
||||
* Handle NO_HZ for the global load-average.
|
||||
*
|
||||
* Since the above described distributed algorithm to compute the global
|
||||
* load-average relies on per-cpu sampling from the tick, it is affected by
|
||||
* load-average relies on per-CPU sampling from the tick, it is affected by
|
||||
* NO_HZ.
|
||||
*
|
||||
* The basic idea is to fold the nr_active delta into a global NO_HZ-delta upon
|
||||
* entering NO_HZ state such that we can include this as an 'extra' cpu delta
|
||||
* entering NO_HZ state such that we can include this as an 'extra' CPU delta
|
||||
* when we read the global state.
|
||||
*
|
||||
* Obviously reality has to ruin such a delightfully simple scheme:
|
||||
@ -146,9 +142,9 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
|
||||
* busy state.
|
||||
*
|
||||
* This is solved by pushing the window forward, and thus skipping the
|
||||
* sample, for this cpu (effectively using the NO_HZ-delta for this cpu which
|
||||
* sample, for this CPU (effectively using the NO_HZ-delta for this CPU which
|
||||
* was in effect at the time the window opened). This also solves the issue
|
||||
* of having to deal with a cpu having been in NO_HZ for multiple LOAD_FREQ
|
||||
* of having to deal with a CPU having been in NO_HZ for multiple LOAD_FREQ
|
||||
* intervals.
|
||||
*
|
||||
* When making the ILB scale, we should try to pull this in as well.
|
||||
@ -299,7 +295,7 @@ calc_load_n(unsigned long load, unsigned long exp,
|
||||
}
|
||||
|
||||
/*
|
||||
* NO_HZ can leave us missing all per-cpu ticks calling
|
||||
* NO_HZ can leave us missing all per-CPU ticks calling
|
||||
* calc_load_fold_active(), but since a NO_HZ CPU folds its delta into
|
||||
* calc_load_nohz per calc_load_nohz_start(), all we need to do is fold
|
||||
* in the pending NO_HZ delta if our NO_HZ period crossed a load cycle boundary.
|
||||
@ -363,7 +359,7 @@ void calc_global_load(unsigned long ticks)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Fold the 'old' NO_HZ-delta to include all NO_HZ cpus.
|
||||
* Fold the 'old' NO_HZ-delta to include all NO_HZ CPUs.
|
||||
*/
|
||||
delta = calc_load_nohz_fold();
|
||||
if (delta)
|
||||
|
@ -13,32 +13,25 @@
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/membarrier.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/atomic.h>
|
||||
|
||||
#include "sched.h" /* for cpu_rq(). */
|
||||
#include "sched.h"
|
||||
|
||||
/*
|
||||
* Bitmask made from a "or" of all commands within enum membarrier_cmd,
|
||||
* except MEMBARRIER_CMD_QUERY.
|
||||
*/
|
||||
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE
|
||||
#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \
|
||||
(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE \
|
||||
#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \
|
||||
(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE \
|
||||
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)
|
||||
#else
|
||||
#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK 0
|
||||
#endif
|
||||
|
||||
#define MEMBARRIER_CMD_BITMASK \
|
||||
(MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \
|
||||
| MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
|
||||
| MEMBARRIER_CMD_PRIVATE_EXPEDITED \
|
||||
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
|
||||
#define MEMBARRIER_CMD_BITMASK \
|
||||
(MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \
|
||||
| MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
|
||||
| MEMBARRIER_CMD_PRIVATE_EXPEDITED \
|
||||
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
|
||||
| MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)
|
||||
|
||||
static void ipi_mb(void *info)
|
||||
@ -85,6 +78,7 @@ static int membarrier_global_expedited(void)
|
||||
*/
|
||||
if (cpu == raw_smp_processor_id())
|
||||
continue;
|
||||
|
||||
rcu_read_lock();
|
||||
p = task_rcu_dereference(&cpu_rq(cpu)->curr);
|
||||
if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &
|
||||
@ -188,6 +182,7 @@ static int membarrier_private_expedited(int flags)
|
||||
* rq->curr modification in scheduler.
|
||||
*/
|
||||
smp_mb(); /* exit from system call is not a mb */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -219,6 +214,7 @@ static int membarrier_register_global_expedited(void)
|
||||
}
|
||||
atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
|
||||
&mm->membarrier_state);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -253,6 +249,7 @@ static int membarrier_register_private_expedited(int flags)
|
||||
synchronize_sched();
|
||||
}
|
||||
atomic_or(state, &mm->membarrier_state);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -3,12 +3,8 @@
|
||||
* Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
|
||||
* policies)
|
||||
*/
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/irq_work.h>
|
||||
|
||||
int sched_rr_timeslice = RR_TIMESLICE;
|
||||
int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
|
||||
|
||||
@ -359,7 +355,7 @@ static DEFINE_PER_CPU(struct callback_head, rt_pull_head);
|
||||
static void push_rt_tasks(struct rq *);
|
||||
static void pull_rt_task(struct rq *);
|
||||
|
||||
static inline void queue_push_tasks(struct rq *rq)
|
||||
static inline void rt_queue_push_tasks(struct rq *rq)
|
||||
{
|
||||
if (!has_pushable_tasks(rq))
|
||||
return;
|
||||
@ -367,7 +363,7 @@ static inline void queue_push_tasks(struct rq *rq)
|
||||
queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
|
||||
}
|
||||
|
||||
static inline void queue_pull_task(struct rq *rq)
|
||||
static inline void rt_queue_pull_task(struct rq *rq)
|
||||
{
|
||||
queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
|
||||
}
|
||||
@ -425,7 +421,7 @@ static inline void pull_rt_task(struct rq *this_rq)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void queue_push_tasks(struct rq *rq)
|
||||
static inline void rt_queue_push_tasks(struct rq *rq)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
@ -961,9 +957,6 @@ static void update_curr_rt(struct rq *rq)
|
||||
if (unlikely((s64)delta_exec <= 0))
|
||||
return;
|
||||
|
||||
/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
|
||||
cpufreq_update_util(rq, SCHED_CPUFREQ_RT);
|
||||
|
||||
schedstat_set(curr->se.statistics.exec_max,
|
||||
max(curr->se.statistics.exec_max, delta_exec));
|
||||
|
||||
@ -1005,6 +998,9 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)
|
||||
|
||||
sub_nr_running(rq, rt_rq->rt_nr_running);
|
||||
rt_rq->rt_queued = 0;
|
||||
|
||||
/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
|
||||
cpufreq_update_util(rq, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -1021,6 +1017,9 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq)
|
||||
|
||||
add_nr_running(rq, rt_rq->rt_nr_running);
|
||||
rt_rq->rt_queued = 1;
|
||||
|
||||
/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
|
||||
cpufreq_update_util(rq, 0);
|
||||
}
|
||||
|
||||
#if defined CONFIG_SMP
|
||||
@ -1453,9 +1452,9 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
|
||||
return;
|
||||
|
||||
/*
|
||||
* There appears to be other cpus that can accept
|
||||
* current and none to run 'p', so lets reschedule
|
||||
* to try and push current away:
|
||||
* There appear to be other CPUs that can accept
|
||||
* the current task but none can run 'p', so lets reschedule
|
||||
* to try and push the current task away:
|
||||
*/
|
||||
requeue_task_rt(rq, p, 1);
|
||||
resched_curr(rq);
|
||||
@ -1569,7 +1568,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
/* The running task is never eligible for pushing */
|
||||
dequeue_pushable_task(rq, p);
|
||||
|
||||
queue_push_tasks(rq);
|
||||
rt_queue_push_tasks(rq);
|
||||
|
||||
return p;
|
||||
}
|
||||
@ -1596,12 +1595,13 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
|
||||
if (!task_running(rq, p) &&
|
||||
cpumask_test_cpu(cpu, &p->cpus_allowed))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the highest pushable rq's task, which is suitable to be executed
|
||||
* on the cpu, NULL otherwise
|
||||
* on the CPU, NULL otherwise
|
||||
*/
|
||||
static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
|
||||
{
|
||||
@ -1639,11 +1639,11 @@ static int find_lowest_rq(struct task_struct *task)
|
||||
return -1; /* No targets found */
|
||||
|
||||
/*
|
||||
* At this point we have built a mask of cpus representing the
|
||||
* At this point we have built a mask of CPUs representing the
|
||||
* lowest priority tasks in the system. Now we want to elect
|
||||
* the best one based on our affinity and topology.
|
||||
*
|
||||
* We prioritize the last cpu that the task executed on since
|
||||
* We prioritize the last CPU that the task executed on since
|
||||
* it is most likely cache-hot in that location.
|
||||
*/
|
||||
if (cpumask_test_cpu(cpu, lowest_mask))
|
||||
@ -1651,7 +1651,7 @@ static int find_lowest_rq(struct task_struct *task)
|
||||
|
||||
/*
|
||||
* Otherwise, we consult the sched_domains span maps to figure
|
||||
* out which cpu is logically closest to our hot cache data.
|
||||
* out which CPU is logically closest to our hot cache data.
|
||||
*/
|
||||
if (!cpumask_test_cpu(this_cpu, lowest_mask))
|
||||
this_cpu = -1; /* Skip this_cpu opt if not among lowest */
|
||||
@ -1692,6 +1692,7 @@ static int find_lowest_rq(struct task_struct *task)
|
||||
cpu = cpumask_any(lowest_mask);
|
||||
if (cpu < nr_cpu_ids)
|
||||
return cpu;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -1827,7 +1828,7 @@ retry:
|
||||
* The task hasn't migrated, and is still the next
|
||||
* eligible task, but we failed to find a run-queue
|
||||
* to push it to. Do not retry in this case, since
|
||||
* other cpus will pull from us when ready.
|
||||
* other CPUs will pull from us when ready.
|
||||
*/
|
||||
goto out;
|
||||
}
|
||||
@ -1919,7 +1920,7 @@ static int rto_next_cpu(struct root_domain *rd)
|
||||
* rt_next_cpu() will simply return the first CPU found in
|
||||
* the rto_mask.
|
||||
*
|
||||
* If rto_next_cpu() is called with rto_cpu is a valid cpu, it
|
||||
* If rto_next_cpu() is called with rto_cpu is a valid CPU, it
|
||||
* will return the next CPU found in the rto_mask.
|
||||
*
|
||||
* If there are no more CPUs left in the rto_mask, then a check is made
|
||||
@ -1980,7 +1981,7 @@ static void tell_cpu_to_push(struct rq *rq)
|
||||
raw_spin_lock(&rq->rd->rto_lock);
|
||||
|
||||
/*
|
||||
* The rto_cpu is updated under the lock, if it has a valid cpu
|
||||
* The rto_cpu is updated under the lock, if it has a valid CPU
|
||||
* then the IPI is still running and will continue due to the
|
||||
* update to loop_next, and nothing needs to be done here.
|
||||
* Otherwise it is finishing up and an ipi needs to be sent.
|
||||
@ -2105,7 +2106,7 @@ static void pull_rt_task(struct rq *this_rq)
|
||||
|
||||
/*
|
||||
* There's a chance that p is higher in priority
|
||||
* than what's currently running on its cpu.
|
||||
* than what's currently running on its CPU.
|
||||
* This is just that p is wakeing up and hasn't
|
||||
* had a chance to schedule. We only pull
|
||||
* p if it is lower in priority than the
|
||||
@ -2187,7 +2188,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
|
||||
if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
|
||||
return;
|
||||
|
||||
queue_pull_task(rq);
|
||||
rt_queue_pull_task(rq);
|
||||
}
|
||||
|
||||
void __init init_sched_rt_class(void)
|
||||
@ -2218,7 +2219,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
|
||||
if (task_on_rq_queued(p) && rq->curr != p) {
|
||||
#ifdef CONFIG_SMP
|
||||
if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
|
||||
queue_push_tasks(rq);
|
||||
rt_queue_push_tasks(rq);
|
||||
#endif /* CONFIG_SMP */
|
||||
if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq)))
|
||||
resched_curr(rq);
|
||||
@ -2242,7 +2243,7 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
|
||||
* may need to pull tasks to this runqueue.
|
||||
*/
|
||||
if (oldprio < p->prio)
|
||||
queue_pull_task(rq);
|
||||
rt_queue_pull_task(rq);
|
||||
|
||||
/*
|
||||
* If there's a higher priority task waiting to run
|
||||
@ -2292,6 +2293,14 @@ static void watchdog(struct rq *rq, struct task_struct *p)
|
||||
static inline void watchdog(struct rq *rq, struct task_struct *p) { }
|
||||
#endif
|
||||
|
||||
/*
|
||||
* scheduler tick hitting a task of our scheduling class.
|
||||
*
|
||||
* NOTE: This function can be called remotely by the tick offload that
|
||||
* goes along full dynticks. Therefore no local assumption can be made
|
||||
* and everything must be accessed through the @rq and @curr passed in
|
||||
* parameters.
|
||||
*/
|
||||
static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
|
||||
{
|
||||
struct sched_rt_entity *rt_se = &p->rt;
|
||||
@ -2685,6 +2694,7 @@ int sched_rr_handler(struct ctl_table *table, int write,
|
||||
msecs_to_jiffies(sysctl_sched_rr_timeslice);
|
||||
}
|
||||
mutex_unlock(&mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,14 +1,13 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/proc_fs.h>
|
||||
|
||||
/*
|
||||
* /proc/schedstat implementation
|
||||
*/
|
||||
#include "sched.h"
|
||||
|
||||
/*
|
||||
* bump this up when changing the output format or the meaning of an existing
|
||||
* Current schedstat API version.
|
||||
*
|
||||
* Bump this up when changing the output format or the meaning of an existing
|
||||
* format, so that tools can adapt (or abort)
|
||||
*/
|
||||
#define SCHEDSTAT_VERSION 15
|
||||
@ -78,8 +77,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
|
||||
* This itererator needs some explanation.
|
||||
* It returns 1 for the header position.
|
||||
* This means 2 is cpu 0.
|
||||
* In a hotplugged system some cpus, including cpu 0, may be missing so we have
|
||||
* to use cpumask_* to iterate over the cpus.
|
||||
* In a hotplugged system some CPUs, including cpu 0, may be missing so we have
|
||||
* to use cpumask_* to iterate over the CPUs.
|
||||
*/
|
||||
static void *schedstat_start(struct seq_file *file, loff_t *offset)
|
||||
{
|
||||
@ -99,12 +98,14 @@ static void *schedstat_start(struct seq_file *file, loff_t *offset)
|
||||
|
||||
if (n < nr_cpu_ids)
|
||||
return (void *)(unsigned long)(n + 2);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *schedstat_next(struct seq_file *file, void *data, loff_t *offset)
|
||||
{
|
||||
(*offset)++;
|
||||
|
||||
return schedstat_start(file, offset);
|
||||
}
|
||||
|
||||
@ -134,6 +135,7 @@ static const struct file_operations proc_schedstat_operations = {
|
||||
static int __init proc_schedstat_init(void)
|
||||
{
|
||||
proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
|
||||
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(proc_schedstat_init);
|
||||
|
@ -30,35 +30,29 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
|
||||
if (rq)
|
||||
rq->rq_sched_info.run_delay += delta;
|
||||
}
|
||||
#define schedstat_enabled() static_branch_unlikely(&sched_schedstats)
|
||||
#define schedstat_enabled() static_branch_unlikely(&sched_schedstats)
|
||||
#define __schedstat_inc(var) do { var++; } while (0)
|
||||
#define schedstat_inc(var) do { if (schedstat_enabled()) { var++; } } while (0)
|
||||
#define schedstat_inc(var) do { if (schedstat_enabled()) { var++; } } while (0)
|
||||
#define __schedstat_add(var, amt) do { var += (amt); } while (0)
|
||||
#define schedstat_add(var, amt) do { if (schedstat_enabled()) { var += (amt); } } while (0)
|
||||
#define __schedstat_set(var, val) do { var = (val); } while (0)
|
||||
#define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0)
|
||||
#define schedstat_val(var) (var)
|
||||
#define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0)
|
||||
#define schedstat_add(var, amt) do { if (schedstat_enabled()) { var += (amt); } } while (0)
|
||||
#define __schedstat_set(var, val) do { var = (val); } while (0)
|
||||
#define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0)
|
||||
#define schedstat_val(var) (var)
|
||||
#define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0)
|
||||
|
||||
#else /* !CONFIG_SCHEDSTATS */
|
||||
static inline void
|
||||
rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
|
||||
{}
|
||||
static inline void
|
||||
rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
|
||||
{}
|
||||
static inline void
|
||||
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
||||
{}
|
||||
#define schedstat_enabled() 0
|
||||
#define __schedstat_inc(var) do { } while (0)
|
||||
#define schedstat_inc(var) do { } while (0)
|
||||
#define __schedstat_add(var, amt) do { } while (0)
|
||||
#define schedstat_add(var, amt) do { } while (0)
|
||||
#define __schedstat_set(var, val) do { } while (0)
|
||||
#define schedstat_set(var, val) do { } while (0)
|
||||
#define schedstat_val(var) 0
|
||||
#define schedstat_val_or_zero(var) 0
|
||||
#else /* !CONFIG_SCHEDSTATS: */
|
||||
static inline void rq_sched_info_arrive (struct rq *rq, unsigned long long delta) { }
|
||||
static inline void rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) { }
|
||||
static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delta) { }
|
||||
# define schedstat_enabled() 0
|
||||
# define __schedstat_inc(var) do { } while (0)
|
||||
# define schedstat_inc(var) do { } while (0)
|
||||
# define __schedstat_add(var, amt) do { } while (0)
|
||||
# define schedstat_add(var, amt) do { } while (0)
|
||||
# define __schedstat_set(var, val) do { } while (0)
|
||||
# define schedstat_set(var, val) do { } while (0)
|
||||
# define schedstat_val(var) 0
|
||||
# define schedstat_val_or_zero(var) 0
|
||||
#endif /* CONFIG_SCHEDSTATS */
|
||||
|
||||
#ifdef CONFIG_SCHED_INFO
|
||||
@ -69,9 +63,9 @@ static inline void sched_info_reset_dequeued(struct task_struct *t)
|
||||
|
||||
/*
|
||||
* We are interested in knowing how long it was from the *first* time a
|
||||
* task was queued to the time that it finally hit a cpu, we call this routine
|
||||
* from dequeue_task() to account for possible rq->clock skew across cpus. The
|
||||
* delta taken on each cpu would annul the skew.
|
||||
* task was queued to the time that it finally hit a CPU, we call this routine
|
||||
* from dequeue_task() to account for possible rq->clock skew across CPUs. The
|
||||
* delta taken on each CPU would annul the skew.
|
||||
*/
|
||||
static inline void sched_info_dequeued(struct rq *rq, struct task_struct *t)
|
||||
{
|
||||
@ -87,7 +81,7 @@ static inline void sched_info_dequeued(struct rq *rq, struct task_struct *t)
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when a task finally hits the cpu. We can now calculate how
|
||||
* Called when a task finally hits the CPU. We can now calculate how
|
||||
* long it was waiting to run. We also note when it began so that we
|
||||
* can keep stats on how long its timeslice is.
|
||||
*/
|
||||
@ -112,9 +106,10 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t)
|
||||
*/
|
||||
static inline void sched_info_queued(struct rq *rq, struct task_struct *t)
|
||||
{
|
||||
if (unlikely(sched_info_on()))
|
||||
if (unlikely(sched_info_on())) {
|
||||
if (!t->sched_info.last_queued)
|
||||
t->sched_info.last_queued = rq_clock(rq);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -127,8 +122,7 @@ static inline void sched_info_queued(struct rq *rq, struct task_struct *t)
|
||||
*/
|
||||
static inline void sched_info_depart(struct rq *rq, struct task_struct *t)
|
||||
{
|
||||
unsigned long long delta = rq_clock(rq) -
|
||||
t->sched_info.last_arrival;
|
||||
unsigned long long delta = rq_clock(rq) - t->sched_info.last_arrival;
|
||||
|
||||
rq_sched_info_depart(rq, delta);
|
||||
|
||||
@ -142,11 +136,10 @@ static inline void sched_info_depart(struct rq *rq, struct task_struct *t)
|
||||
* the idle task.) We are only called when prev != next.
|
||||
*/
|
||||
static inline void
|
||||
__sched_info_switch(struct rq *rq,
|
||||
struct task_struct *prev, struct task_struct *next)
|
||||
__sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next)
|
||||
{
|
||||
/*
|
||||
* prev now departs the cpu. It's not interesting to record
|
||||
* prev now departs the CPU. It's not interesting to record
|
||||
* stats about how efficient we were at scheduling the idle
|
||||
* process, however.
|
||||
*/
|
||||
@ -156,18 +149,19 @@ __sched_info_switch(struct rq *rq,
|
||||
if (next != rq->idle)
|
||||
sched_info_arrive(rq, next);
|
||||
}
|
||||
|
||||
static inline void
|
||||
sched_info_switch(struct rq *rq,
|
||||
struct task_struct *prev, struct task_struct *next)
|
||||
sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next)
|
||||
{
|
||||
if (unlikely(sched_info_on()))
|
||||
__sched_info_switch(rq, prev, next);
|
||||
}
|
||||
#else
|
||||
#define sched_info_queued(rq, t) do { } while (0)
|
||||
#define sched_info_reset_dequeued(t) do { } while (0)
|
||||
#define sched_info_dequeued(rq, t) do { } while (0)
|
||||
#define sched_info_depart(rq, t) do { } while (0)
|
||||
#define sched_info_arrive(rq, next) do { } while (0)
|
||||
#define sched_info_switch(rq, t, next) do { } while (0)
|
||||
|
||||
#else /* !CONFIG_SCHED_INFO: */
|
||||
# define sched_info_queued(rq, t) do { } while (0)
|
||||
# define sched_info_reset_dequeued(t) do { } while (0)
|
||||
# define sched_info_dequeued(rq, t) do { } while (0)
|
||||
# define sched_info_depart(rq, t) do { } while (0)
|
||||
# define sched_info_arrive(rq, next) do { } while (0)
|
||||
# define sched_info_switch(rq, t, next) do { } while (0)
|
||||
#endif /* CONFIG_SCHED_INFO */
|
||||
|
@ -1,6 +1,4 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include "sched.h"
|
||||
|
||||
/*
|
||||
* stop-task scheduling class.
|
||||
*
|
||||
@ -9,6 +7,7 @@
|
||||
*
|
||||
* See kernel/stop_machine.c
|
||||
*/
|
||||
#include "sched.h"
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int
|
||||
@ -75,6 +74,14 @@ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
|
||||
cgroup_account_cputime(curr, delta_exec);
|
||||
}
|
||||
|
||||
/*
|
||||
* scheduler tick hitting a task of our scheduling class.
|
||||
*
|
||||
* NOTE: This function can be called remotely by the tick offload that
|
||||
* goes along full dynticks. Therefore no local assumption can be made
|
||||
* and everything must be accessed through the @rq and @curr passed in
|
||||
* parameters.
|
||||
*/
|
||||
static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
|
||||
{
|
||||
}
|
||||
|
@ -1,6 +1,8 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/swait.h>
|
||||
/*
|
||||
* <linux/swait.h> (simple wait queues ) implementation:
|
||||
*/
|
||||
#include "sched.h"
|
||||
|
||||
void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
|
||||
struct lock_class_key *key)
|
||||
|
@ -2,10 +2,6 @@
|
||||
/*
|
||||
* Scheduler topology setup/handling methods
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
DEFINE_MUTEX(sched_domains_mutex);
|
||||
@ -41,8 +37,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
|
||||
if (!(sd->flags & SD_LOAD_BALANCE)) {
|
||||
printk("does not load-balance\n");
|
||||
if (sd->parent)
|
||||
printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain"
|
||||
" has parent");
|
||||
printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent");
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -50,12 +45,10 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
|
||||
cpumask_pr_args(sched_domain_span(sd)), sd->name);
|
||||
|
||||
if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
|
||||
printk(KERN_ERR "ERROR: domain->span does not contain "
|
||||
"CPU%d\n", cpu);
|
||||
printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu);
|
||||
}
|
||||
if (!cpumask_test_cpu(cpu, sched_group_span(group))) {
|
||||
printk(KERN_ERR "ERROR: domain->groups does not contain"
|
||||
" CPU%d\n", cpu);
|
||||
printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu);
|
||||
}
|
||||
|
||||
printk(KERN_DEBUG "%*s groups:", level + 1, "");
|
||||
@ -115,8 +108,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
|
||||
|
||||
if (sd->parent &&
|
||||
!cpumask_subset(groupmask, sched_domain_span(sd->parent)))
|
||||
printk(KERN_ERR "ERROR: parent span is not a superset "
|
||||
"of domain->span\n");
|
||||
printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -595,7 +587,7 @@ int group_balance_cpu(struct sched_group *sg)
|
||||
* are not.
|
||||
*
|
||||
* This leads to a few particularly weird cases where the sched_domain's are
|
||||
* not of the same number for each cpu. Consider:
|
||||
* not of the same number for each CPU. Consider:
|
||||
*
|
||||
* NUMA-2 0-3 0-3
|
||||
* groups: {0-2},{1-3} {1-3},{0-2}
|
||||
@ -780,7 +772,7 @@ fail:
|
||||
* ^ ^ ^ ^
|
||||
* `-' `-'
|
||||
*
|
||||
* The sched_domains are per-cpu and have a two way link (parent & child) and
|
||||
* The sched_domains are per-CPU and have a two way link (parent & child) and
|
||||
* denote the ever growing mask of CPUs belonging to that level of topology.
|
||||
*
|
||||
* Each sched_domain has a circular (double) linked list of sched_group's, each
|
||||
@ -1021,6 +1013,7 @@ __visit_domain_allocation_hell(struct s_data *d, const struct cpumask *cpu_map)
|
||||
d->rd = alloc_rootdomain();
|
||||
if (!d->rd)
|
||||
return sa_sd;
|
||||
|
||||
return sa_rootdomain;
|
||||
}
|
||||
|
||||
@ -1047,12 +1040,14 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
static int sched_domains_numa_levels;
|
||||
enum numa_topology_type sched_numa_topology_type;
|
||||
static int *sched_domains_numa_distance;
|
||||
int sched_max_numa_distance;
|
||||
static struct cpumask ***sched_domains_numa_masks;
|
||||
static int sched_domains_curr_level;
|
||||
|
||||
static int sched_domains_numa_levels;
|
||||
static int sched_domains_curr_level;
|
||||
|
||||
int sched_max_numa_distance;
|
||||
static int *sched_domains_numa_distance;
|
||||
static struct cpumask ***sched_domains_numa_masks;
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -1074,11 +1069,11 @@ static int sched_domains_curr_level;
|
||||
* SD_ASYM_PACKING - describes SMT quirks
|
||||
*/
|
||||
#define TOPOLOGY_SD_FLAGS \
|
||||
(SD_SHARE_CPUCAPACITY | \
|
||||
(SD_SHARE_CPUCAPACITY | \
|
||||
SD_SHARE_PKG_RESOURCES | \
|
||||
SD_NUMA | \
|
||||
SD_ASYM_PACKING | \
|
||||
SD_ASYM_CPUCAPACITY | \
|
||||
SD_NUMA | \
|
||||
SD_ASYM_PACKING | \
|
||||
SD_ASYM_CPUCAPACITY | \
|
||||
SD_SHARE_POWERDOMAIN)
|
||||
|
||||
static struct sched_domain *
|
||||
@ -1628,7 +1623,7 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve
|
||||
pr_err(" the %s domain not a subset of the %s domain\n",
|
||||
child->name, sd->name);
|
||||
#endif
|
||||
/* Fixup, ensure @sd has at least @child cpus. */
|
||||
/* Fixup, ensure @sd has at least @child CPUs. */
|
||||
cpumask_or(sched_domain_span(sd),
|
||||
sched_domain_span(sd),
|
||||
sched_domain_span(child));
|
||||
@ -1720,6 +1715,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
|
||||
ret = 0;
|
||||
error:
|
||||
__free_domain_allocs(&d, alloc_state, cpu_map);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1824,6 +1820,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
|
||||
return 1;
|
||||
|
||||
tmp = SD_ATTR_INIT;
|
||||
|
||||
return !memcmp(cur ? (cur + idx_cur) : &tmp,
|
||||
new ? (new + idx_new) : &tmp,
|
||||
sizeof(struct sched_domain_attr));
|
||||
@ -1929,4 +1926,3 @@ match2:
|
||||
|
||||
mutex_unlock(&sched_domains_mutex);
|
||||
}
|
||||
|
||||
|
@ -3,14 +3,7 @@
|
||||
*
|
||||
* (C) 2004 Nadia Yvette Chambers, Oracle
|
||||
*/
|
||||
#include <linux/init.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/kthread.h>
|
||||
#include "sched.h"
|
||||
|
||||
void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *key)
|
||||
{
|
||||
@ -107,6 +100,7 @@ static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return nr_exclusive;
|
||||
}
|
||||
|
||||
@ -317,6 +311,7 @@ int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait)
|
||||
spin_unlock(&wq->lock);
|
||||
schedule();
|
||||
spin_lock(&wq->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(do_wait_intr);
|
||||
@ -333,6 +328,7 @@ int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait)
|
||||
spin_unlock_irq(&wq->lock);
|
||||
schedule();
|
||||
spin_lock_irq(&wq->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(do_wait_intr_irq);
|
||||
@ -378,6 +374,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i
|
||||
|
||||
if (ret)
|
||||
list_del_init(&wq_entry->entry);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(autoremove_wake_function);
|
||||
|
@ -1,10 +1,7 @@
|
||||
/*
|
||||
* The implementation of the wait_bit*() and related waiting APIs:
|
||||
*/
|
||||
#include <linux/wait_bit.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/hash.h>
|
||||
#include "sched.h"
|
||||
|
||||
#define WAIT_TABLE_BITS 8
|
||||
#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
|
||||
@ -29,8 +26,8 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync
|
||||
wait_bit->key.bit_nr != key->bit_nr ||
|
||||
test_bit(key->bit_nr, key->flags))
|
||||
return 0;
|
||||
else
|
||||
return autoremove_wake_function(wq_entry, mode, sync, key);
|
||||
|
||||
return autoremove_wake_function(wq_entry, mode, sync, key);
|
||||
}
|
||||
EXPORT_SYMBOL(wake_bit_function);
|
||||
|
||||
@ -50,7 +47,9 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_
|
||||
if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags))
|
||||
ret = (*action)(&wbq_entry->key, mode);
|
||||
} while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
|
||||
|
||||
finish_wait(wq_head, &wbq_entry->wq_entry);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(__wait_on_bit);
|
||||
@ -73,6 +72,7 @@ int __sched out_of_line_wait_on_bit_timeout(
|
||||
DEFINE_WAIT_BIT(wq_entry, word, bit);
|
||||
|
||||
wq_entry.key.timeout = jiffies + timeout;
|
||||
|
||||
return __wait_on_bit(wq_head, &wq_entry, action, mode);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
|
||||
@ -120,6 +120,7 @@ EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
|
||||
void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit)
|
||||
{
|
||||
struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
|
||||
|
||||
if (waitqueue_active(wq_head))
|
||||
__wake_up(wq_head, TASK_NORMAL, 1, &key);
|
||||
}
|
||||
@ -157,6 +158,7 @@ static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
|
||||
{
|
||||
if (BITS_PER_LONG == 64) {
|
||||
unsigned long q = (unsigned long)p;
|
||||
|
||||
return bit_waitqueue((void *)(q & ~1), q & 1);
|
||||
}
|
||||
return bit_waitqueue(p, 0);
|
||||
@ -173,6 +175,7 @@ static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mo
|
||||
wait_bit->key.bit_nr != key->bit_nr ||
|
||||
atomic_read(val) != 0)
|
||||
return 0;
|
||||
|
||||
return autoremove_wake_function(wq_entry, mode, sync, key);
|
||||
}
|
||||
|
||||
@ -196,6 +199,7 @@ int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_en
|
||||
ret = (*action)(val, mode);
|
||||
} while (!ret && atomic_read(val) != 0);
|
||||
finish_wait(wq_head, &wbq_entry->wq_entry);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -226,6 +230,7 @@ __sched int atomic_t_wait(atomic_t *counter, unsigned int mode)
|
||||
schedule();
|
||||
if (signal_pending_state(mode, current))
|
||||
return -EINTR;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(atomic_t_wait);
|
||||
@ -250,6 +255,7 @@ __sched int bit_wait(struct wait_bit_key *word, int mode)
|
||||
schedule();
|
||||
if (signal_pending_state(mode, current))
|
||||
return -EINTR;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(bit_wait);
|
||||
@ -259,6 +265,7 @@ __sched int bit_wait_io(struct wait_bit_key *word, int mode)
|
||||
io_schedule();
|
||||
if (signal_pending_state(mode, current))
|
||||
return -EINTR;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(bit_wait_io);
|
||||
@ -266,11 +273,13 @@ EXPORT_SYMBOL(bit_wait_io);
|
||||
__sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
|
||||
{
|
||||
unsigned long now = READ_ONCE(jiffies);
|
||||
|
||||
if (time_after_eq(now, word->timeout))
|
||||
return -EAGAIN;
|
||||
schedule_timeout(word->timeout - now);
|
||||
if (signal_pending_state(mode, current))
|
||||
return -EINTR;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bit_wait_timeout);
|
||||
@ -278,11 +287,13 @@ EXPORT_SYMBOL_GPL(bit_wait_timeout);
|
||||
__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
|
||||
{
|
||||
unsigned long now = READ_ONCE(jiffies);
|
||||
|
||||
if (time_after_eq(now, word->timeout))
|
||||
return -EAGAIN;
|
||||
io_schedule_timeout(word->timeout - now);
|
||||
if (signal_pending_state(mode, current))
|
||||
return -EINTR;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
|
||||
|
@ -463,11 +463,18 @@ static int __init setup_tick_nohz(char *str)
|
||||
|
||||
__setup("nohz=", setup_tick_nohz);
|
||||
|
||||
int tick_nohz_tick_stopped(void)
|
||||
bool tick_nohz_tick_stopped(void)
|
||||
{
|
||||
return __this_cpu_read(tick_cpu_sched.tick_stopped);
|
||||
}
|
||||
|
||||
bool tick_nohz_tick_stopped_cpu(int cpu)
|
||||
{
|
||||
struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
|
||||
|
||||
return ts->tick_stopped;
|
||||
}
|
||||
|
||||
/**
|
||||
* tick_nohz_update_jiffies - update jiffies when idle was interrupted
|
||||
*
|
||||
@ -723,12 +730,6 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
|
||||
delta = KTIME_MAX;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
/* Limit the tick delta to the maximum scheduler deferment */
|
||||
if (!ts->inidle)
|
||||
delta = min(delta, scheduler_tick_max_deferment());
|
||||
#endif
|
||||
|
||||
/* Calculate the next expiry time */
|
||||
if (delta < (KTIME_MAX - basemono))
|
||||
expires = basemono + delta;
|
||||
@ -935,13 +936,6 @@ void tick_nohz_idle_enter(void)
|
||||
struct tick_sched *ts;
|
||||
|
||||
lockdep_assert_irqs_enabled();
|
||||
/*
|
||||
* Update the idle state in the scheduler domain hierarchy
|
||||
* when tick_nohz_stop_sched_tick() is called from the idle loop.
|
||||
* State will be updated to busy during the first busy tick after
|
||||
* exiting idle.
|
||||
*/
|
||||
set_cpu_sd_state_idle();
|
||||
|
||||
local_irq_disable();
|
||||
|
||||
|
@ -5573,12 +5573,13 @@ static void __init wq_numa_init(void)
|
||||
int __init workqueue_init_early(void)
|
||||
{
|
||||
int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
|
||||
int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
|
||||
int i, cpu;
|
||||
|
||||
WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
|
||||
|
||||
BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
|
||||
cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_FLAG_DOMAIN));
|
||||
cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(hk_flags));
|
||||
|
||||
pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user