sched/psi: Remove NR_ONCPU task accounting
We put all fields updated by the scheduler in the first cacheline of struct psi_group_cpu for performance. Since we want add another PSI_IRQ_FULL to track IRQ/SOFTIRQ pressure, we need to reclaim space first. This patch remove NR_ONCPU task accounting in struct psi_group_cpu, use one bit in state_mask to track instead. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Chengming Zhou <zhouchengming@bytedance.com> Tested-by: Chengming Zhou <zhouchengming@bytedance.com> Link: https://lore.kernel.org/r/20220825164111.29534-7-zhouchengming@bytedance.com
This commit is contained in:
parent
65176f59a1
commit
71dbdde791
@ -15,13 +15,6 @@ enum psi_task_count {
|
|||||||
NR_IOWAIT,
|
NR_IOWAIT,
|
||||||
NR_MEMSTALL,
|
NR_MEMSTALL,
|
||||||
NR_RUNNING,
|
NR_RUNNING,
|
||||||
/*
|
|
||||||
* This can't have values other than 0 or 1 and could be
|
|
||||||
* implemented as a bit flag. But for now we still have room
|
|
||||||
* in the first cacheline of psi_group_cpu, and this way we
|
|
||||||
* don't have to special case any state tracking for it.
|
|
||||||
*/
|
|
||||||
NR_ONCPU,
|
|
||||||
/*
|
/*
|
||||||
* For IO and CPU stalls the presence of running/oncpu tasks
|
* For IO and CPU stalls the presence of running/oncpu tasks
|
||||||
* in the domain means a partial rather than a full stall.
|
* in the domain means a partial rather than a full stall.
|
||||||
@ -32,16 +25,18 @@ enum psi_task_count {
|
|||||||
* threads and memstall ones.
|
* threads and memstall ones.
|
||||||
*/
|
*/
|
||||||
NR_MEMSTALL_RUNNING,
|
NR_MEMSTALL_RUNNING,
|
||||||
NR_PSI_TASK_COUNTS = 5,
|
NR_PSI_TASK_COUNTS = 4,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Task state bitmasks */
|
/* Task state bitmasks */
|
||||||
#define TSK_IOWAIT (1 << NR_IOWAIT)
|
#define TSK_IOWAIT (1 << NR_IOWAIT)
|
||||||
#define TSK_MEMSTALL (1 << NR_MEMSTALL)
|
#define TSK_MEMSTALL (1 << NR_MEMSTALL)
|
||||||
#define TSK_RUNNING (1 << NR_RUNNING)
|
#define TSK_RUNNING (1 << NR_RUNNING)
|
||||||
#define TSK_ONCPU (1 << NR_ONCPU)
|
|
||||||
#define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING)
|
#define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING)
|
||||||
|
|
||||||
|
/* Only one task can be scheduled, no corresponding task count */
|
||||||
|
#define TSK_ONCPU (1 << NR_PSI_TASK_COUNTS)
|
||||||
|
|
||||||
/* Resources that workloads could be stalled on */
|
/* Resources that workloads could be stalled on */
|
||||||
enum psi_res {
|
enum psi_res {
|
||||||
PSI_IO,
|
PSI_IO,
|
||||||
@ -68,6 +63,9 @@ enum psi_states {
|
|||||||
NR_PSI_STATES = 7,
|
NR_PSI_STATES = 7,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Use one bit in the state mask to track TSK_ONCPU */
|
||||||
|
#define PSI_ONCPU (1 << NR_PSI_STATES)
|
||||||
|
|
||||||
enum psi_aggregators {
|
enum psi_aggregators {
|
||||||
PSI_AVGS = 0,
|
PSI_AVGS = 0,
|
||||||
PSI_POLL,
|
PSI_POLL,
|
||||||
|
@ -212,7 +212,7 @@ void __init psi_init(void)
|
|||||||
group_init(&psi_system);
|
group_init(&psi_system);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool test_state(unsigned int *tasks, enum psi_states state)
|
static bool test_state(unsigned int *tasks, enum psi_states state, bool oncpu)
|
||||||
{
|
{
|
||||||
switch (state) {
|
switch (state) {
|
||||||
case PSI_IO_SOME:
|
case PSI_IO_SOME:
|
||||||
@ -225,9 +225,9 @@ static bool test_state(unsigned int *tasks, enum psi_states state)
|
|||||||
return unlikely(tasks[NR_MEMSTALL] &&
|
return unlikely(tasks[NR_MEMSTALL] &&
|
||||||
tasks[NR_RUNNING] == tasks[NR_MEMSTALL_RUNNING]);
|
tasks[NR_RUNNING] == tasks[NR_MEMSTALL_RUNNING]);
|
||||||
case PSI_CPU_SOME:
|
case PSI_CPU_SOME:
|
||||||
return unlikely(tasks[NR_RUNNING] > tasks[NR_ONCPU]);
|
return unlikely(tasks[NR_RUNNING] > oncpu);
|
||||||
case PSI_CPU_FULL:
|
case PSI_CPU_FULL:
|
||||||
return unlikely(tasks[NR_RUNNING] && !tasks[NR_ONCPU]);
|
return unlikely(tasks[NR_RUNNING] && !oncpu);
|
||||||
case PSI_NONIDLE:
|
case PSI_NONIDLE:
|
||||||
return tasks[NR_IOWAIT] || tasks[NR_MEMSTALL] ||
|
return tasks[NR_IOWAIT] || tasks[NR_MEMSTALL] ||
|
||||||
tasks[NR_RUNNING];
|
tasks[NR_RUNNING];
|
||||||
@ -689,9 +689,9 @@ static void psi_group_change(struct psi_group *group, int cpu,
|
|||||||
bool wake_clock)
|
bool wake_clock)
|
||||||
{
|
{
|
||||||
struct psi_group_cpu *groupc;
|
struct psi_group_cpu *groupc;
|
||||||
u32 state_mask = 0;
|
|
||||||
unsigned int t, m;
|
unsigned int t, m;
|
||||||
enum psi_states s;
|
enum psi_states s;
|
||||||
|
u32 state_mask;
|
||||||
|
|
||||||
groupc = per_cpu_ptr(group->pcpu, cpu);
|
groupc = per_cpu_ptr(group->pcpu, cpu);
|
||||||
|
|
||||||
@ -707,17 +707,36 @@ static void psi_group_change(struct psi_group *group, int cpu,
|
|||||||
|
|
||||||
record_times(groupc, now);
|
record_times(groupc, now);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Start with TSK_ONCPU, which doesn't have a corresponding
|
||||||
|
* task count - it's just a boolean flag directly encoded in
|
||||||
|
* the state mask. Clear, set, or carry the current state if
|
||||||
|
* no changes are requested.
|
||||||
|
*/
|
||||||
|
if (unlikely(clear & TSK_ONCPU)) {
|
||||||
|
state_mask = 0;
|
||||||
|
clear &= ~TSK_ONCPU;
|
||||||
|
} else if (unlikely(set & TSK_ONCPU)) {
|
||||||
|
state_mask = PSI_ONCPU;
|
||||||
|
set &= ~TSK_ONCPU;
|
||||||
|
} else {
|
||||||
|
state_mask = groupc->state_mask & PSI_ONCPU;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The rest of the state mask is calculated based on the task
|
||||||
|
* counts. Update those first, then construct the mask.
|
||||||
|
*/
|
||||||
for (t = 0, m = clear; m; m &= ~(1 << t), t++) {
|
for (t = 0, m = clear; m; m &= ~(1 << t), t++) {
|
||||||
if (!(m & (1 << t)))
|
if (!(m & (1 << t)))
|
||||||
continue;
|
continue;
|
||||||
if (groupc->tasks[t]) {
|
if (groupc->tasks[t]) {
|
||||||
groupc->tasks[t]--;
|
groupc->tasks[t]--;
|
||||||
} else if (!psi_bug) {
|
} else if (!psi_bug) {
|
||||||
printk_deferred(KERN_ERR "psi: task underflow! cpu=%d t=%d tasks=[%u %u %u %u %u] clear=%x set=%x\n",
|
printk_deferred(KERN_ERR "psi: task underflow! cpu=%d t=%d tasks=[%u %u %u %u] clear=%x set=%x\n",
|
||||||
cpu, t, groupc->tasks[0],
|
cpu, t, groupc->tasks[0],
|
||||||
groupc->tasks[1], groupc->tasks[2],
|
groupc->tasks[1], groupc->tasks[2],
|
||||||
groupc->tasks[3], groupc->tasks[4],
|
groupc->tasks[3], clear, set);
|
||||||
clear, set);
|
|
||||||
psi_bug = 1;
|
psi_bug = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -726,9 +745,8 @@ static void psi_group_change(struct psi_group *group, int cpu,
|
|||||||
if (set & (1 << t))
|
if (set & (1 << t))
|
||||||
groupc->tasks[t]++;
|
groupc->tasks[t]++;
|
||||||
|
|
||||||
/* Calculate state mask representing active states */
|
|
||||||
for (s = 0; s < NR_PSI_STATES; s++) {
|
for (s = 0; s < NR_PSI_STATES; s++) {
|
||||||
if (test_state(groupc->tasks, s))
|
if (test_state(groupc->tasks, s, state_mask & PSI_ONCPU))
|
||||||
state_mask |= (1 << s);
|
state_mask |= (1 << s);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -740,7 +758,7 @@ static void psi_group_change(struct psi_group *group, int cpu,
|
|||||||
* task in a cgroup is in_memstall, the corresponding groupc
|
* task in a cgroup is in_memstall, the corresponding groupc
|
||||||
* on that cpu is in PSI_MEM_FULL state.
|
* on that cpu is in PSI_MEM_FULL state.
|
||||||
*/
|
*/
|
||||||
if (unlikely(groupc->tasks[NR_ONCPU] && cpu_curr(cpu)->in_memstall))
|
if (unlikely((state_mask & PSI_ONCPU) && cpu_curr(cpu)->in_memstall))
|
||||||
state_mask |= (1 << PSI_MEM_FULL);
|
state_mask |= (1 << PSI_MEM_FULL);
|
||||||
|
|
||||||
groupc->state_mask = state_mask;
|
groupc->state_mask = state_mask;
|
||||||
@ -828,7 +846,8 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
|
|||||||
*/
|
*/
|
||||||
iter = NULL;
|
iter = NULL;
|
||||||
while ((group = iterate_groups(next, &iter))) {
|
while ((group = iterate_groups(next, &iter))) {
|
||||||
if (per_cpu_ptr(group->pcpu, cpu)->tasks[NR_ONCPU]) {
|
if (per_cpu_ptr(group->pcpu, cpu)->state_mask &
|
||||||
|
PSI_ONCPU) {
|
||||||
common = group;
|
common = group;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user