diff --git a/Documentation/admin-guide/cgroup-v1/cpusets.rst b/Documentation/admin-guide/cgroup-v1/cpusets.rst index 7d3415eea05d..f401af5e2f09 100644 --- a/Documentation/admin-guide/cgroup-v1/cpusets.rst +++ b/Documentation/admin-guide/cgroup-v1/cpusets.rst @@ -568,7 +568,7 @@ on the next tick. For some applications in special situation, waiting The 'cpuset.sched_relax_domain_level' file allows you to request changing this searching range as you like. This file takes int value which -indicates size of searching range in levels ideally as follows, +indicates size of searching range in levels approximately as follows, otherwise initial value -1 that indicates the cpuset has no request. ====== =========================================================== @@ -581,6 +581,11 @@ otherwise initial value -1 that indicates the cpuset has no request. 5 search system wide [on NUMA system] ====== =========================================================== +Not all levels can be present and values can change depending on the +system architecture and kernel configuration. Check +/sys/kernel/debug/sched/domains/cpu*/domain*/ for system-specific +details. + The system default is architecture dependent. The system default can be changed using the relax_domain_level= boot parameter. diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 0248912ff687..c66d070207a0 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -179,7 +179,7 @@ DEFINE_PER_CPU(unsigned long, hw_pressure); void topology_update_hw_pressure(const struct cpumask *cpus, unsigned long capped_freq) { - unsigned long max_capacity, capacity, hw_pressure; + unsigned long max_capacity, capacity, pressure; u32 max_freq; int cpu; @@ -196,12 +196,12 @@ void topology_update_hw_pressure(const struct cpumask *cpus, else capacity = mult_frac(max_capacity, capped_freq, max_freq); - hw_pressure = max_capacity - capacity; + pressure = max_capacity - capacity; - trace_hw_pressure_update(cpu, hw_pressure); + trace_hw_pressure_update(cpu, pressure); for_each_cpu(cpu, cpus) - WRITE_ONCE(per_cpu(hw_pressure, cpu), hw_pressure); + WRITE_ONCE(per_cpu(hw_pressure, cpu), pressure); } EXPORT_SYMBOL_GPL(topology_update_hw_pressure); diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index a10e4bd0c0c1..c12b9fdb22a4 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2941,7 +2941,7 @@ bool current_cpuset_is_being_rebound(void) static int update_relax_domain_level(struct cpuset *cs, s64 val) { #ifdef CONFIG_SMP - if (val < -1 || val >= sched_domain_level_max) + if (val < -1 || val > sched_domain_level_max + 1) return -EINVAL; #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 373eaeaf63b8..bcf2c4cc0522 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -11401,7 +11401,7 @@ static ssize_t cpu_max_write(struct kernfs_open_file *of, { struct task_group *tg = css_tg(of_css(of)); u64 period = tg_get_cfs_period(tg); - u64 burst = tg_get_cfs_burst(tg); + u64 burst = tg->cfs_bandwidth.burst; u64 quota; int ret; diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 8d5d98a5834d..c1eb9a1afd13 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -425,6 +425,7 @@ static void register_sd(struct sched_domain *sd, struct dentry *parent) debugfs_create_file("flags", 0444, parent, &sd->flags, &sd_flags_fops); debugfs_create_file("groups_flags", 0444, parent, &sd->groups->flags, &sd_flags_fops); + debugfs_create_u32("level", 0444, parent, (u32 *)&sd->level); } void update_sched_domain_debugfs(void) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4214df32ba45..8a5b1ae0aa55 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1030,7 +1030,8 @@ void init_entity_runnable_average(struct sched_entity *se) * With new tasks being created, their initial util_avgs are extrapolated * based on the cfs_rq's current util_avg: * - * util_avg = cfs_rq->util_avg / (cfs_rq->load_avg + 1) * se.load.weight + * util_avg = cfs_rq->avg.util_avg / (cfs_rq->avg.load_avg + 1) + * * se_weight(se) * * However, in many cases, the above util_avg does not give a desired * value. Moreover, the sum of the util_avgs may be divergent, such @@ -1077,7 +1078,7 @@ void post_init_entity_util_avg(struct task_struct *p) if (cap > 0) { if (cfs_rq->avg.util_avg != 0) { - sa->util_avg = cfs_rq->avg.util_avg * se->load.weight; + sa->util_avg = cfs_rq->avg.util_avg * se_weight(se); sa->util_avg /= (cfs_rq->avg.load_avg + 1); if (sa->util_avg > cap) @@ -7898,8 +7899,8 @@ eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus, * Performance domain frequency: utilization clamping * must be considered since it affects the selection * of the performance domain frequency. - * NOTE: in case RT tasks are running, by default the - * FREQUENCY_UTIL's utilization can be max OPP. + * NOTE: in case RT tasks are running, by default the min + * utilization can be max OPP. */ eff_util = effective_cpu_util(cpu, util, &min, &max); diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 683559831656..329c82faca9b 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1474,7 +1474,7 @@ static void set_domain_attribute(struct sched_domain *sd, } else request = attr->relax_domain_level; - if (sd->level > request) { + if (sd->level >= request) { /* Turn off idle balance on this domain: */ sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); }