sched/syscalls: Split out kernel/sched/syscalls.c from kernel/sched/core.c
core.c has become rather large, move most scheduler syscall related functionality into a separate file, syscalls.c. This is about ~15% of core.c's raw linecount. Move the alloc_user_cpus_ptr(), __rt_effective_prio(), rt_effective_prio(), uclamp_none(), uclamp_se_set() and uclamp_bucket_id() inlines to kernel/sched/sched.h. Internally export the __sched_setscheduler(), __sched_setaffinity(), __setscheduler_prio(), set_load_weight(), enqueue_task(), dequeue_task(), check_class_changed(), splice_balance_callbacks() and balance_callbacks() methods to better facilitate this. Move the new file's build to sched_policy.c, because it fits there semantically, but also because it's the smallest of the 4 build units under an allmodconfig build: -rw-rw-r-- 1 mingo mingo 7.3M May 27 12:35 kernel/sched/core.i -rw-rw-r-- 1 mingo mingo 6.4M May 27 12:36 kernel/sched/build_utility.i -rw-rw-r-- 1 mingo mingo 6.3M May 27 12:36 kernel/sched/fair.i -rw-rw-r-- 1 mingo mingo 5.8M May 27 12:36 kernel/sched/build_policy.i This better balances build time for scheduler subsystem rebuilds. I build-tested this new file as a standalone syscalls.o file for a bit, to make sure all the encapsulations & abstractions are robust. Also update/add my copyright notices to these files. Build time measurements: # -Before/+After: kepler:~/tip> perf stat -e 'cycles,instructions,duration_time' --sync --repeat 5 --pre 'rm -f kernel/sched/*.o' m kernel/sched/built-in.a >/dev/null Performance counter stats for 'm kernel/sched/built-in.a' (5 runs): - 71,938,508,607 cycles ( +- 0.17% ) + 71,992,916,493 cycles ( +- 0.22% ) - 106,214,780,964 instructions # 1.48 insn per cycle ( +- 0.01% ) + 105,450,231,154 instructions # 1.46 insn per cycle ( +- 0.01% ) - 5,878,232,620 ns duration_time ( +- 0.38% ) + 5,290,085,069 ns duration_time ( +- 0.21% ) - 5.8782 +- 0.0221 seconds time elapsed ( +- 0.38% ) + 5.2901 +- 0.0111 seconds time elapsed ( +- 0.21% ) Build time improvement of -11.1% (duration_time) is expected: the parallel build time of the scheduler subsystem is determined by the largest, slowest to build object file, which is kernel/sched/core.o. By moving ~15% of its complexity into another build unit, we reduced build time by -11%. Measured cycles spent on building is within its ~0.2% stddev noise envelope. The -0.7% reduction in instructions spent on building the scheduler is statistically reliable and somewhat surprising - I can only speculate: maybe compilers aren't that efficient at building & optimizing 10+ KLOC files (core.c), and it's an overall win to balance the linecount a bit. Anyway, this might be a data point that suggests that reducing the linecount of our largest files will improve not just code readability and maintainability, but might also improve build times a bit. Code generation got a bit worse, by 0.5kb text on an x86 defconfig build: # -Before/+After: kepler:~/tip> size vmlinux text data bss dec hex filename -26475475 10439178 1740804 38655457 24dd5e1 vmlinux +26476003 10439178 1740804 38655985 24dd7f1 vmlinux kepler:~/tip> size kernel/sched/built-in.a text data bss dec hex filename - 76056 30025 489 106570 1a04a kernel/sched/core.o (ex kernel/sched/built-in.a) + 63452 29453 489 93394 16cd2 kernel/sched/core.o (ex kernel/sched/built-in.a) 44299 2181 104 46584 b5f8 kernel/sched/fair.o (ex kernel/sched/built-in.a) - 42764 3424 120 46308 b4e4 kernel/sched/build_policy.o (ex kernel/sched/built-in.a) + 55651 4044 120 59815 e9a7 kernel/sched/build_policy.o (ex kernel/sched/built-in.a) 44866 12655 2192 59713 e941 kernel/sched/build_utility.o (ex kernel/sched/built-in.a) 44866 12655 2192 59713 e941 kernel/sched/build_utility.o (ex kernel/sched/built-in.a) This is primarily due to the extra functions exported, and the size gets exaggerated somewhat by __pfx CFI function padding: ffffffff810cc710 <__pfx_enqueue_task>: ffffffff810cc710: 90 nop ffffffff810cc711: 90 nop ffffffff810cc712: 90 nop ffffffff810cc713: 90 nop ffffffff810cc714: 90 nop ffffffff810cc715: 90 nop ffffffff810cc716: 90 nop ffffffff810cc717: 90 nop ffffffff810cc718: 90 nop ffffffff810cc719: 90 nop ffffffff810cc71a: 90 nop ffffffff810cc71b: 90 nop ffffffff810cc71c: 90 nop ffffffff810cc71d: 90 nop ffffffff810cc71e: 90 nop ffffffff810cc71f: 90 nop AFAICS the cost is primarily not to core.o and fair.o though (which contain most performance sensitive scheduler functions), only to syscalls.o that get called with much lower frequency - so I think this is an acceptable trade-off for better code separation. Signed-off-by: Ingo Molnar <mingo@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mel Gorman <mgorman@suse.de> Link: https://lore.kernel.org/r/20240407084319.1462211-2-mingo@kernel.org
This commit is contained in:
parent
1613e604df
commit
04746ed80b
@ -52,3 +52,4 @@
|
||||
#include "cputime.c"
|
||||
#include "deadline.c"
|
||||
|
||||
#include "syscalls.c"
|
||||
|
1781
kernel/sched/core.c
1781
kernel/sched/core.c
File diff suppressed because it is too large
Load Diff
@ -2402,8 +2402,19 @@ extern void update_group_capacity(struct sched_domain *sd, int cpu);
|
||||
|
||||
extern void sched_balance_trigger(struct rq *rq);
|
||||
|
||||
extern int __set_cpus_allowed_ptr(struct task_struct *p, struct affinity_context *ctx);
|
||||
extern void set_cpus_allowed_common(struct task_struct *p, struct affinity_context *ctx);
|
||||
|
||||
static inline cpumask_t *alloc_user_cpus_ptr(int node)
|
||||
{
|
||||
/*
|
||||
* See do_set_cpus_allowed() above for the rcu_head usage.
|
||||
*/
|
||||
int size = max_t(int, cpumask_size(), sizeof(struct rcu_head));
|
||||
|
||||
return kmalloc_node(size, GFP_KERNEL, node);
|
||||
}
|
||||
|
||||
static inline struct task_struct *get_push_task(struct rq *rq)
|
||||
{
|
||||
struct task_struct *p = rq->curr;
|
||||
@ -2425,7 +2436,20 @@ static inline struct task_struct *get_push_task(struct rq *rq)
|
||||
|
||||
extern int push_cpu_stop(void *arg);
|
||||
|
||||
#endif
|
||||
#else /* !CONFIG_SMP: */
|
||||
|
||||
static inline int __set_cpus_allowed_ptr(struct task_struct *p,
|
||||
struct affinity_context *ctx)
|
||||
{
|
||||
return set_cpus_allowed_ptr(p, ctx->new_mask);
|
||||
}
|
||||
|
||||
static inline cpumask_t *alloc_user_cpus_ptr(int node)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif /* !CONFIG_SMP */
|
||||
|
||||
#ifdef CONFIG_CPU_IDLE
|
||||
static inline void idle_set_state(struct rq *rq,
|
||||
@ -3097,6 +3121,36 @@ static inline bool uclamp_is_used(void)
|
||||
{
|
||||
return static_branch_likely(&sched_uclamp_used);
|
||||
}
|
||||
|
||||
#define for_each_clamp_id(clamp_id) \
|
||||
for ((clamp_id) = 0; (clamp_id) < UCLAMP_CNT; (clamp_id)++)
|
||||
|
||||
extern unsigned int sysctl_sched_uclamp_util_min_rt_default;
|
||||
|
||||
|
||||
static inline unsigned int uclamp_none(enum uclamp_id clamp_id)
|
||||
{
|
||||
if (clamp_id == UCLAMP_MIN)
|
||||
return 0;
|
||||
return SCHED_CAPACITY_SCALE;
|
||||
}
|
||||
|
||||
/* Integer rounded range for each bucket */
|
||||
#define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS)
|
||||
|
||||
static inline unsigned int uclamp_bucket_id(unsigned int clamp_value)
|
||||
{
|
||||
return min_t(unsigned int, clamp_value / UCLAMP_BUCKET_DELTA, UCLAMP_BUCKETS - 1);
|
||||
}
|
||||
|
||||
static inline void uclamp_se_set(struct uclamp_se *uc_se,
|
||||
unsigned int value, bool user_defined)
|
||||
{
|
||||
uc_se->value = value;
|
||||
uc_se->bucket_id = uclamp_bucket_id(value);
|
||||
uc_se->user_defined = user_defined;
|
||||
}
|
||||
|
||||
#else /* CONFIG_UCLAMP_TASK */
|
||||
static inline unsigned long uclamp_eff_value(struct task_struct *p,
|
||||
enum uclamp_id clamp_id)
|
||||
@ -3132,6 +3186,7 @@ static inline bool uclamp_rq_is_idle(struct rq *rq)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_UCLAMP_TASK */
|
||||
|
||||
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
|
||||
@ -3480,4 +3535,53 @@ static inline void init_sched_mm_cid(struct task_struct *t) { }
|
||||
extern u64 avg_vruntime(struct cfs_rq *cfs_rq);
|
||||
extern int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se);
|
||||
|
||||
#ifdef CONFIG_RT_MUTEXES
|
||||
static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
|
||||
{
|
||||
if (pi_task)
|
||||
prio = min(prio, pi_task->prio);
|
||||
|
||||
return prio;
|
||||
}
|
||||
|
||||
static inline int rt_effective_prio(struct task_struct *p, int prio)
|
||||
{
|
||||
struct task_struct *pi_task = rt_mutex_get_top_task(p);
|
||||
|
||||
return __rt_effective_prio(pi_task, prio);
|
||||
}
|
||||
#else
|
||||
static inline int rt_effective_prio(struct task_struct *p, int prio)
|
||||
{
|
||||
return prio;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user, bool pi);
|
||||
extern int __sched_setaffinity(struct task_struct *p, struct affinity_context *ctx);
|
||||
extern void __setscheduler_prio(struct task_struct *p, int prio);
|
||||
extern void set_load_weight(struct task_struct *p, bool update_load);
|
||||
extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
|
||||
extern void dequeue_task(struct rq *rq, struct task_struct *p, int flags);
|
||||
|
||||
extern void check_class_changed(struct rq *rq, struct task_struct *p,
|
||||
const struct sched_class *prev_class,
|
||||
int oldprio);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
extern struct balance_callback *splice_balance_callbacks(struct rq *rq);
|
||||
extern void balance_callbacks(struct rq *rq, struct balance_callback *head);
|
||||
#else
|
||||
|
||||
static inline struct balance_callback *splice_balance_callbacks(struct rq *rq)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void balance_callbacks(struct rq *rq, struct balance_callback *head)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _KERNEL_SCHED_SCHED_H */
|
||||
|
1699
kernel/sched/syscalls.c
Normal file
1699
kernel/sched/syscalls.c
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user