d4dbc99171
Now that cpufreq provides a pressure value to the scheduler, rename arch_update_thermal_pressure into HW pressure to reflect that it returns a pressure applied by HW (i.e. with a high frequency change) and not always related to thermal mitigation but also generated by max current limitation as an example. Such high frequency signal needs filtering to be smoothed and provide an value that reflects the average available capacity into the scheduler time scale. Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Tested-by: Lukasz Luba <lukasz.luba@arm.com> Reviewed-by: Qais Yousef <qyousef@layalina.io> Reviewed-by: Lukasz Luba <lukasz.luba@arm.com> Link: https://lore.kernel.org/r/20240326091616.3696851-5-vincent.guittot@linaro.org
236 lines
6.0 KiB
C
236 lines
6.0 KiB
C
#ifdef CONFIG_SMP
|
|
#include "sched-pelt.h"
|
|
|
|
int __update_load_avg_blocked_se(u64 now, struct sched_entity *se);
|
|
int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se);
|
|
int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq);
|
|
int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);
|
|
int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);
|
|
|
|
#ifdef CONFIG_SCHED_HW_PRESSURE
|
|
int update_hw_load_avg(u64 now, struct rq *rq, u64 capacity);
|
|
|
|
static inline u64 hw_load_avg(struct rq *rq)
|
|
{
|
|
return READ_ONCE(rq->avg_hw.load_avg);
|
|
}
|
|
#else
|
|
static inline int
|
|
update_hw_load_avg(u64 now, struct rq *rq, u64 capacity)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline u64 hw_load_avg(struct rq *rq)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
|
|
int update_irq_load_avg(struct rq *rq, u64 running);
|
|
#else
|
|
static inline int
|
|
update_irq_load_avg(struct rq *rq, u64 running)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#define PELT_MIN_DIVIDER (LOAD_AVG_MAX - 1024)
|
|
|
|
static inline u32 get_pelt_divider(struct sched_avg *avg)
|
|
{
|
|
return PELT_MIN_DIVIDER + avg->period_contrib;
|
|
}
|
|
|
|
static inline void cfs_se_util_change(struct sched_avg *avg)
|
|
{
|
|
unsigned int enqueued;
|
|
|
|
if (!sched_feat(UTIL_EST))
|
|
return;
|
|
|
|
/* Avoid store if the flag has been already reset */
|
|
enqueued = avg->util_est;
|
|
if (!(enqueued & UTIL_AVG_UNCHANGED))
|
|
return;
|
|
|
|
/* Reset flag to report util_avg has been updated */
|
|
enqueued &= ~UTIL_AVG_UNCHANGED;
|
|
WRITE_ONCE(avg->util_est, enqueued);
|
|
}
|
|
|
|
static inline u64 rq_clock_pelt(struct rq *rq)
|
|
{
|
|
lockdep_assert_rq_held(rq);
|
|
assert_clock_updated(rq);
|
|
|
|
return rq->clock_pelt - rq->lost_idle_time;
|
|
}
|
|
|
|
/* The rq is idle, we can sync to clock_task */
|
|
static inline void _update_idle_rq_clock_pelt(struct rq *rq)
|
|
{
|
|
rq->clock_pelt = rq_clock_task(rq);
|
|
|
|
u64_u32_store(rq->clock_idle, rq_clock(rq));
|
|
/* Paired with smp_rmb in migrate_se_pelt_lag() */
|
|
smp_wmb();
|
|
u64_u32_store(rq->clock_pelt_idle, rq_clock_pelt(rq));
|
|
}
|
|
|
|
/*
|
|
* The clock_pelt scales the time to reflect the effective amount of
|
|
* computation done during the running delta time but then sync back to
|
|
* clock_task when rq is idle.
|
|
*
|
|
*
|
|
* absolute time | 1| 2| 3| 4| 5| 6| 7| 8| 9|10|11|12|13|14|15|16
|
|
* @ max capacity ------******---------------******---------------
|
|
* @ half capacity ------************---------************---------
|
|
* clock pelt | 1| 2| 3| 4| 7| 8| 9| 10| 11|14|15|16
|
|
*
|
|
*/
|
|
static inline void update_rq_clock_pelt(struct rq *rq, s64 delta)
|
|
{
|
|
if (unlikely(is_idle_task(rq->curr))) {
|
|
_update_idle_rq_clock_pelt(rq);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* When a rq runs at a lower compute capacity, it will need
|
|
* more time to do the same amount of work than at max
|
|
* capacity. In order to be invariant, we scale the delta to
|
|
* reflect how much work has been really done.
|
|
* Running longer results in stealing idle time that will
|
|
* disturb the load signal compared to max capacity. This
|
|
* stolen idle time will be automatically reflected when the
|
|
* rq will be idle and the clock will be synced with
|
|
* rq_clock_task.
|
|
*/
|
|
|
|
/*
|
|
* Scale the elapsed time to reflect the real amount of
|
|
* computation
|
|
*/
|
|
delta = cap_scale(delta, arch_scale_cpu_capacity(cpu_of(rq)));
|
|
delta = cap_scale(delta, arch_scale_freq_capacity(cpu_of(rq)));
|
|
|
|
rq->clock_pelt += delta;
|
|
}
|
|
|
|
/*
|
|
* When rq becomes idle, we have to check if it has lost idle time
|
|
* because it was fully busy. A rq is fully used when the /Sum util_sum
|
|
* is greater or equal to:
|
|
* (LOAD_AVG_MAX - 1024 + rq->cfs.avg.period_contrib) << SCHED_CAPACITY_SHIFT;
|
|
* For optimization and computing rounding purpose, we don't take into account
|
|
* the position in the current window (period_contrib) and we use the higher
|
|
* bound of util_sum to decide.
|
|
*/
|
|
static inline void update_idle_rq_clock_pelt(struct rq *rq)
|
|
{
|
|
u32 divider = ((LOAD_AVG_MAX - 1024) << SCHED_CAPACITY_SHIFT) - LOAD_AVG_MAX;
|
|
u32 util_sum = rq->cfs.avg.util_sum;
|
|
util_sum += rq->avg_rt.util_sum;
|
|
util_sum += rq->avg_dl.util_sum;
|
|
|
|
/*
|
|
* Reflecting stolen time makes sense only if the idle
|
|
* phase would be present at max capacity. As soon as the
|
|
* utilization of a rq has reached the maximum value, it is
|
|
* considered as an always running rq without idle time to
|
|
* steal. This potential idle time is considered as lost in
|
|
* this case. We keep track of this lost idle time compare to
|
|
* rq's clock_task.
|
|
*/
|
|
if (util_sum >= divider)
|
|
rq->lost_idle_time += rq_clock_task(rq) - rq->clock_pelt;
|
|
|
|
_update_idle_rq_clock_pelt(rq);
|
|
}
|
|
|
|
#ifdef CONFIG_CFS_BANDWIDTH
|
|
static inline void update_idle_cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
|
|
{
|
|
u64 throttled;
|
|
|
|
if (unlikely(cfs_rq->throttle_count))
|
|
throttled = U64_MAX;
|
|
else
|
|
throttled = cfs_rq->throttled_clock_pelt_time;
|
|
|
|
u64_u32_store(cfs_rq->throttled_pelt_idle, throttled);
|
|
}
|
|
|
|
/* rq->task_clock normalized against any time this cfs_rq has spent throttled */
|
|
static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
|
|
{
|
|
if (unlikely(cfs_rq->throttle_count))
|
|
return cfs_rq->throttled_clock_pelt - cfs_rq->throttled_clock_pelt_time;
|
|
|
|
return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_pelt_time;
|
|
}
|
|
#else
|
|
static inline void update_idle_cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) { }
|
|
static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
|
|
{
|
|
return rq_clock_pelt(rq_of(cfs_rq));
|
|
}
|
|
#endif
|
|
|
|
#else
|
|
|
|
static inline int
|
|
update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int
|
|
update_rt_rq_load_avg(u64 now, struct rq *rq, int running)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int
|
|
update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int
|
|
update_hw_load_avg(u64 now, struct rq *rq, u64 capacity)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline u64 hw_load_avg(struct rq *rq)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int
|
|
update_irq_load_avg(struct rq *rq, u64 running)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline u64 rq_clock_pelt(struct rq *rq)
|
|
{
|
|
return rq_clock_task(rq);
|
|
}
|
|
|
|
static inline void
|
|
update_rq_clock_pelt(struct rq *rq, s64 delta) { }
|
|
|
|
static inline void
|
|
update_idle_rq_clock_pelt(struct rq *rq) { }
|
|
|
|
static inline void update_idle_cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) { }
|
|
#endif
|
|
|
|
|