2007-07-09 20:51:58 +04:00
/*
* Real - Time Scheduling Class ( mapped to the SCHED_FIFO and SCHED_RR
* policies )
*/
2008-01-25 23:08:06 +03:00
# ifdef CONFIG_SMP
2008-01-25 23:08:15 +03:00
2008-01-25 23:08:18 +03:00
static inline int rt_overloaded ( struct rq * rq )
2008-01-25 23:08:06 +03:00
{
2008-01-25 23:08:18 +03:00
return atomic_read ( & rq - > rd - > rto_count ) ;
2008-01-25 23:08:06 +03:00
}
2008-01-25 23:08:15 +03:00
2008-01-25 23:08:06 +03:00
static inline void rt_set_overload ( struct rq * rq )
{
2008-01-25 23:08:18 +03:00
cpu_set ( rq - > cpu , rq - > rd - > rto_mask ) ;
2008-01-25 23:08:06 +03:00
/*
* Make sure the mask is visible before we set
* the overload count . That is checked to determine
* if we should look at the mask . It would be a shame
* if we looked at the mask , but the mask was not
* updated yet .
*/
wmb ( ) ;
2008-01-25 23:08:18 +03:00
atomic_inc ( & rq - > rd - > rto_count ) ;
2008-01-25 23:08:06 +03:00
}
2008-01-25 23:08:15 +03:00
2008-01-25 23:08:06 +03:00
static inline void rt_clear_overload ( struct rq * rq )
{
/* the order here really doesn't matter */
2008-01-25 23:08:18 +03:00
atomic_dec ( & rq - > rd - > rto_count ) ;
cpu_clear ( rq - > cpu , rq - > rd - > rto_mask ) ;
2008-01-25 23:08:06 +03:00
}
2008-01-25 23:08:07 +03:00
static void update_rt_migration ( struct rq * rq )
{
2008-01-25 23:08:18 +03:00
if ( rq - > rt . rt_nr_migratory & & ( rq - > rt . rt_nr_running > 1 ) ) {
2008-01-25 23:08:23 +03:00
if ( ! rq - > rt . overloaded ) {
rt_set_overload ( rq ) ;
rq - > rt . overloaded = 1 ;
}
} else if ( rq - > rt . overloaded ) {
2008-01-25 23:08:07 +03:00
rt_clear_overload ( rq ) ;
2008-01-25 23:08:18 +03:00
rq - > rt . overloaded = 0 ;
}
2008-01-25 23:08:07 +03:00
}
2008-01-25 23:08:06 +03:00
# endif /* CONFIG_SMP */
2008-01-25 23:08:30 +03:00
static inline struct task_struct * rt_task_of ( struct sched_rt_entity * rt_se )
2008-01-25 23:08:29 +03:00
{
2008-01-25 23:08:30 +03:00
return container_of ( rt_se , struct task_struct , rt ) ;
}
static inline int on_rt_rq ( struct sched_rt_entity * rt_se )
{
return ! list_empty ( & rt_se - > run_list ) ;
}
2008-02-13 17:45:40 +03:00
# ifdef CONFIG_RT_GROUP_SCHED
2008-01-25 23:08:30 +03:00
2008-02-13 17:45:39 +03:00
static inline u64 sched_rt_runtime ( struct rt_rq * rt_rq )
2008-01-25 23:08:30 +03:00
{
if ( ! rt_rq - > tg )
2008-02-13 17:45:39 +03:00
return RUNTIME_INF ;
2008-01-25 23:08:30 +03:00
2008-04-19 21:44:58 +04:00
return rt_rq - > rt_runtime ;
}
static inline u64 sched_rt_period ( struct rt_rq * rt_rq )
{
return ktime_to_ns ( rt_rq - > tg - > rt_bandwidth . rt_period ) ;
2008-01-25 23:08:30 +03:00
}
# define for_each_leaf_rt_rq(rt_rq, rq) \
list_for_each_entry ( rt_rq , & rq - > leaf_rt_rq_list , leaf_rt_rq_list )
static inline struct rq * rq_of_rt_rq ( struct rt_rq * rt_rq )
{
return rt_rq - > rq ;
}
static inline struct rt_rq * rt_rq_of_se ( struct sched_rt_entity * rt_se )
{
return rt_se - > rt_rq ;
}
# define for_each_sched_rt_entity(rt_se) \
for ( ; rt_se ; rt_se = rt_se - > parent )
static inline struct rt_rq * group_rt_rq ( struct sched_rt_entity * rt_se )
{
return rt_se - > my_q ;
}
static void enqueue_rt_entity ( struct sched_rt_entity * rt_se ) ;
static void dequeue_rt_entity ( struct sched_rt_entity * rt_se ) ;
2008-02-13 17:45:39 +03:00
static void sched_rt_rq_enqueue ( struct rt_rq * rt_rq )
2008-01-25 23:08:30 +03:00
{
struct sched_rt_entity * rt_se = rt_rq - > rt_se ;
if ( rt_se & & ! on_rt_rq ( rt_se ) & & rt_rq - > rt_nr_running ) {
2008-01-25 23:08:32 +03:00
struct task_struct * curr = rq_of_rt_rq ( rt_rq ) - > curr ;
2008-01-25 23:08:30 +03:00
enqueue_rt_entity ( rt_se ) ;
2008-01-25 23:08:32 +03:00
if ( rt_rq - > highest_prio < curr - > prio )
resched_task ( curr ) ;
2008-01-25 23:08:30 +03:00
}
}
2008-02-13 17:45:39 +03:00
static void sched_rt_rq_dequeue ( struct rt_rq * rt_rq )
2008-01-25 23:08:30 +03:00
{
struct sched_rt_entity * rt_se = rt_rq - > rt_se ;
if ( rt_se & & on_rt_rq ( rt_se ) )
dequeue_rt_entity ( rt_se ) ;
}
2008-02-13 17:45:39 +03:00
static inline int rt_rq_throttled ( struct rt_rq * rt_rq )
{
return rt_rq - > rt_throttled & & ! rt_rq - > rt_nr_boosted ;
}
static int rt_se_boosted ( struct sched_rt_entity * rt_se )
{
struct rt_rq * rt_rq = group_rt_rq ( rt_se ) ;
struct task_struct * p ;
if ( rt_rq )
return ! ! rt_rq - > rt_nr_boosted ;
p = rt_task_of ( rt_se ) ;
return p - > prio ! = p - > normal_prio ;
}
2008-04-19 21:44:57 +04:00
# ifdef CONFIG_SMP
static inline cpumask_t sched_rt_period_mask ( void )
{
return cpu_rq ( smp_processor_id ( ) ) - > rd - > span ;
}
2008-01-25 23:08:30 +03:00
# else
2008-04-19 21:44:57 +04:00
static inline cpumask_t sched_rt_period_mask ( void )
{
return cpu_online_map ;
}
# endif
2008-01-25 23:08:30 +03:00
2008-04-19 21:44:57 +04:00
static inline
struct rt_rq * sched_rt_period_rt_rq ( struct rt_bandwidth * rt_b , int cpu )
2008-01-25 23:08:30 +03:00
{
2008-04-19 21:44:57 +04:00
return container_of ( rt_b , struct task_group , rt_bandwidth ) - > rt_rq [ cpu ] ;
}
2008-02-13 17:45:39 +03:00
2008-04-19 21:44:58 +04:00
static inline struct rt_bandwidth * sched_rt_bandwidth ( struct rt_rq * rt_rq )
{
return & rt_rq - > tg - > rt_bandwidth ;
}
2008-04-19 21:44:57 +04:00
# else
static inline u64 sched_rt_runtime ( struct rt_rq * rt_rq )
{
2008-04-19 21:44:58 +04:00
return rt_rq - > rt_runtime ;
}
static inline u64 sched_rt_period ( struct rt_rq * rt_rq )
{
return ktime_to_ns ( def_rt_bandwidth . rt_period ) ;
2008-01-25 23:08:30 +03:00
}
# define for_each_leaf_rt_rq(rt_rq, rq) \
for ( rt_rq = & rq - > rt ; rt_rq ; rt_rq = NULL )
static inline struct rq * rq_of_rt_rq ( struct rt_rq * rt_rq )
{
return container_of ( rt_rq , struct rq , rt ) ;
}
static inline struct rt_rq * rt_rq_of_se ( struct sched_rt_entity * rt_se )
{
struct task_struct * p = rt_task_of ( rt_se ) ;
struct rq * rq = task_rq ( p ) ;
return & rq - > rt ;
}
# define for_each_sched_rt_entity(rt_se) \
for ( ; rt_se ; rt_se = NULL )
static inline struct rt_rq * group_rt_rq ( struct sched_rt_entity * rt_se )
{
return NULL ;
}
2008-02-13 17:45:39 +03:00
static inline void sched_rt_rq_enqueue ( struct rt_rq * rt_rq )
2008-01-25 23:08:30 +03:00
{
}
2008-02-13 17:45:39 +03:00
static inline void sched_rt_rq_dequeue ( struct rt_rq * rt_rq )
2008-01-25 23:08:30 +03:00
{
}
2008-02-13 17:45:39 +03:00
static inline int rt_rq_throttled ( struct rt_rq * rt_rq )
{
return rt_rq - > rt_throttled ;
}
2008-04-19 21:44:57 +04:00
static inline cpumask_t sched_rt_period_mask ( void )
{
return cpu_online_map ;
}
static inline
struct rt_rq * sched_rt_period_rt_rq ( struct rt_bandwidth * rt_b , int cpu )
{
return & cpu_rq ( cpu ) - > rt ;
}
2008-04-19 21:44:58 +04:00
static inline struct rt_bandwidth * sched_rt_bandwidth ( struct rt_rq * rt_rq )
{
return & def_rt_bandwidth ;
}
2008-01-25 23:08:30 +03:00
# endif
2008-04-19 21:44:57 +04:00
static int do_sched_rt_period_timer ( struct rt_bandwidth * rt_b , int overrun )
{
int i , idle = 1 ;
cpumask_t span ;
if ( rt_b - > rt_runtime = = RUNTIME_INF )
return 1 ;
span = sched_rt_period_mask ( ) ;
for_each_cpu_mask ( i , span ) {
int enqueue = 0 ;
struct rt_rq * rt_rq = sched_rt_period_rt_rq ( rt_b , i ) ;
struct rq * rq = rq_of_rt_rq ( rt_rq ) ;
spin_lock ( & rq - > lock ) ;
if ( rt_rq - > rt_time ) {
2008-04-19 21:44:58 +04:00
u64 runtime ;
2008-04-19 21:44:57 +04:00
2008-04-19 21:44:58 +04:00
spin_lock ( & rt_rq - > rt_runtime_lock ) ;
runtime = rt_rq - > rt_runtime ;
2008-04-19 21:44:57 +04:00
rt_rq - > rt_time - = min ( rt_rq - > rt_time , overrun * runtime ) ;
if ( rt_rq - > rt_throttled & & rt_rq - > rt_time < runtime ) {
rt_rq - > rt_throttled = 0 ;
enqueue = 1 ;
}
if ( rt_rq - > rt_time | | rt_rq - > rt_nr_running )
idle = 0 ;
2008-04-19 21:44:58 +04:00
spin_unlock ( & rt_rq - > rt_runtime_lock ) ;
2008-04-19 21:44:57 +04:00
}
if ( enqueue )
sched_rt_rq_enqueue ( rt_rq ) ;
spin_unlock ( & rq - > lock ) ;
}
return idle ;
}
2008-04-19 21:44:58 +04:00
# ifdef CONFIG_SMP
static int balance_runtime ( struct rt_rq * rt_rq )
{
struct rt_bandwidth * rt_b = sched_rt_bandwidth ( rt_rq ) ;
struct root_domain * rd = cpu_rq ( smp_processor_id ( ) ) - > rd ;
int i , weight , more = 0 ;
u64 rt_period ;
weight = cpus_weight ( rd - > span ) ;
spin_lock ( & rt_b - > rt_runtime_lock ) ;
rt_period = ktime_to_ns ( rt_b - > rt_period ) ;
for_each_cpu_mask ( i , rd - > span ) {
struct rt_rq * iter = sched_rt_period_rt_rq ( rt_b , i ) ;
s64 diff ;
if ( iter = = rt_rq )
continue ;
spin_lock ( & iter - > rt_runtime_lock ) ;
diff = iter - > rt_runtime - iter - > rt_time ;
if ( diff > 0 ) {
do_div ( diff , weight ) ;
if ( rt_rq - > rt_runtime + diff > rt_period )
diff = rt_period - rt_rq - > rt_runtime ;
iter - > rt_runtime - = diff ;
rt_rq - > rt_runtime + = diff ;
more = 1 ;
if ( rt_rq - > rt_runtime = = rt_period ) {
spin_unlock ( & iter - > rt_runtime_lock ) ;
break ;
}
}
spin_unlock ( & iter - > rt_runtime_lock ) ;
}
spin_unlock ( & rt_b - > rt_runtime_lock ) ;
return more ;
}
# endif
2008-01-25 23:08:30 +03:00
static inline int rt_se_prio ( struct sched_rt_entity * rt_se )
{
2008-02-13 17:45:40 +03:00
# ifdef CONFIG_RT_GROUP_SCHED
2008-01-25 23:08:30 +03:00
struct rt_rq * rt_rq = group_rt_rq ( rt_se ) ;
if ( rt_rq )
return rt_rq - > highest_prio ;
# endif
return rt_task_of ( rt_se ) - > prio ;
}
2008-02-13 17:45:39 +03:00
static int sched_rt_runtime_exceeded ( struct rt_rq * rt_rq )
2008-01-25 23:08:30 +03:00
{
2008-02-13 17:45:39 +03:00
u64 runtime = sched_rt_runtime ( rt_rq ) ;
2008-01-25 23:08:29 +03:00
2008-02-13 17:45:39 +03:00
if ( runtime = = RUNTIME_INF )
2008-01-25 23:08:29 +03:00
return 0 ;
if ( rt_rq - > rt_throttled )
2008-02-13 17:45:39 +03:00
return rt_rq_throttled ( rt_rq ) ;
2008-01-25 23:08:29 +03:00
2008-04-19 21:44:58 +04:00
if ( sched_rt_runtime ( rt_rq ) > = sched_rt_period ( rt_rq ) )
return 0 ;
# ifdef CONFIG_SMP
if ( rt_rq - > rt_time > runtime ) {
int more ;
spin_unlock ( & rt_rq - > rt_runtime_lock ) ;
more = balance_runtime ( rt_rq ) ;
spin_lock ( & rt_rq - > rt_runtime_lock ) ;
if ( more )
runtime = sched_rt_runtime ( rt_rq ) ;
}
# endif
2008-02-13 17:45:39 +03:00
if ( rt_rq - > rt_time > runtime ) {
2008-01-25 23:08:30 +03:00
rt_rq - > rt_throttled = 1 ;
2008-02-13 17:45:39 +03:00
if ( rt_rq_throttled ( rt_rq ) ) {
2008-02-13 17:45:39 +03:00
sched_rt_rq_dequeue ( rt_rq ) ;
2008-02-13 17:45:39 +03:00
return 1 ;
}
2008-01-25 23:08:29 +03:00
}
return 0 ;
}
2007-07-09 20:51:58 +04:00
/*
* Update the current task ' s runtime statistics . Skip current tasks that
* are not in our scheduling class .
*/
2007-10-15 19:00:13 +04:00
static void update_curr_rt ( struct rq * rq )
2007-07-09 20:51:58 +04:00
{
struct task_struct * curr = rq - > curr ;
2008-01-25 23:08:30 +03:00
struct sched_rt_entity * rt_se = & curr - > rt ;
struct rt_rq * rt_rq = rt_rq_of_se ( rt_se ) ;
2007-07-09 20:51:58 +04:00
u64 delta_exec ;
if ( ! task_has_rt_policy ( curr ) )
return ;
2007-08-09 13:16:47 +04:00
delta_exec = rq - > clock - curr - > se . exec_start ;
2007-07-09 20:51:58 +04:00
if ( unlikely ( ( s64 ) delta_exec < 0 ) )
delta_exec = 0 ;
2007-08-02 19:41:40 +04:00
schedstat_set ( curr - > se . exec_max , max ( curr - > se . exec_max , delta_exec ) ) ;
2007-07-09 20:51:58 +04:00
curr - > se . sum_exec_runtime + = delta_exec ;
2007-08-09 13:16:47 +04:00
curr - > se . exec_start = rq - > clock ;
2007-12-02 22:04:49 +03:00
cpuacct_charge ( curr , delta_exec ) ;
2008-01-25 23:08:29 +03:00
2008-04-19 21:44:59 +04:00
for_each_sched_rt_entity ( rt_se ) {
rt_rq = rt_rq_of_se ( rt_se ) ;
spin_lock ( & rt_rq - > rt_runtime_lock ) ;
rt_rq - > rt_time + = delta_exec ;
if ( sched_rt_runtime_exceeded ( rt_rq ) )
resched_task ( curr ) ;
spin_unlock ( & rt_rq - > rt_runtime_lock ) ;
}
2007-07-09 20:51:58 +04:00
}
2008-01-25 23:08:30 +03:00
static inline
void inc_rt_tasks ( struct sched_rt_entity * rt_se , struct rt_rq * rt_rq )
2008-01-25 23:08:03 +03:00
{
2008-01-25 23:08:30 +03:00
WARN_ON ( ! rt_prio ( rt_se_prio ( rt_se ) ) ) ;
rt_rq - > rt_nr_running + + ;
2008-02-13 17:45:40 +03:00
# if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
2008-01-25 23:08:30 +03:00
if ( rt_se_prio ( rt_se ) < rt_rq - > highest_prio )
rt_rq - > highest_prio = rt_se_prio ( rt_se ) ;
# endif
2008-01-25 23:08:04 +03:00
# ifdef CONFIG_SMP
2008-01-25 23:08:30 +03:00
if ( rt_se - > nr_cpus_allowed > 1 ) {
struct rq * rq = rq_of_rt_rq ( rt_rq ) ;
2008-01-25 23:08:07 +03:00
rq - > rt . rt_nr_migratory + + ;
2008-01-25 23:08:30 +03:00
}
2008-01-25 23:08:07 +03:00
2008-01-25 23:08:30 +03:00
update_rt_migration ( rq_of_rt_rq ( rt_rq ) ) ;
# endif
2008-02-13 17:45:40 +03:00
# ifdef CONFIG_RT_GROUP_SCHED
2008-02-13 17:45:39 +03:00
if ( rt_se_boosted ( rt_se ) )
rt_rq - > rt_nr_boosted + + ;
2008-04-19 21:44:57 +04:00
if ( rt_rq - > tg )
start_rt_bandwidth ( & rt_rq - > tg - > rt_bandwidth ) ;
# else
start_rt_bandwidth ( & def_rt_bandwidth ) ;
2008-02-13 17:45:39 +03:00
# endif
2008-01-25 23:08:03 +03:00
}
2008-01-25 23:08:30 +03:00
static inline
void dec_rt_tasks ( struct sched_rt_entity * rt_se , struct rt_rq * rt_rq )
2008-01-25 23:08:03 +03:00
{
2008-01-25 23:08:30 +03:00
WARN_ON ( ! rt_prio ( rt_se_prio ( rt_se ) ) ) ;
WARN_ON ( ! rt_rq - > rt_nr_running ) ;
rt_rq - > rt_nr_running - - ;
2008-02-13 17:45:40 +03:00
# if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
2008-01-25 23:08:30 +03:00
if ( rt_rq - > rt_nr_running ) {
2008-01-25 23:08:04 +03:00
struct rt_prio_array * array ;
2008-01-25 23:08:30 +03:00
WARN_ON ( rt_se_prio ( rt_se ) < rt_rq - > highest_prio ) ;
if ( rt_se_prio ( rt_se ) = = rt_rq - > highest_prio ) {
2008-01-25 23:08:04 +03:00
/* recalculate */
2008-01-25 23:08:30 +03:00
array = & rt_rq - > active ;
rt_rq - > highest_prio =
2008-01-25 23:08:04 +03:00
sched_find_first_bit ( array - > bitmap ) ;
} /* otherwise leave rq->highest prio alone */
} else
2008-01-25 23:08:30 +03:00
rt_rq - > highest_prio = MAX_RT_PRIO ;
# endif
# ifdef CONFIG_SMP
if ( rt_se - > nr_cpus_allowed > 1 ) {
struct rq * rq = rq_of_rt_rq ( rt_rq ) ;
2008-01-25 23:08:07 +03:00
rq - > rt . rt_nr_migratory - - ;
2008-01-25 23:08:30 +03:00
}
2008-01-25 23:08:07 +03:00
2008-01-25 23:08:30 +03:00
update_rt_migration ( rq_of_rt_rq ( rt_rq ) ) ;
2008-01-25 23:08:04 +03:00
# endif /* CONFIG_SMP */
2008-02-13 17:45:40 +03:00
# ifdef CONFIG_RT_GROUP_SCHED
2008-02-13 17:45:39 +03:00
if ( rt_se_boosted ( rt_se ) )
rt_rq - > rt_nr_boosted - - ;
WARN_ON ( ! rt_rq - > rt_nr_running & & rt_rq - > rt_nr_boosted ) ;
# endif
2008-01-25 23:08:03 +03:00
}
2008-01-25 23:08:30 +03:00
static void enqueue_rt_entity ( struct sched_rt_entity * rt_se )
2007-07-09 20:51:58 +04:00
{
2008-01-25 23:08:30 +03:00
struct rt_rq * rt_rq = rt_rq_of_se ( rt_se ) ;
struct rt_prio_array * array = & rt_rq - > active ;
struct rt_rq * group_rq = group_rt_rq ( rt_se ) ;
2007-07-09 20:51:58 +04:00
2008-02-13 17:45:39 +03:00
if ( group_rq & & rt_rq_throttled ( group_rq ) )
2008-01-25 23:08:30 +03:00
return ;
2008-01-25 23:08:03 +03:00
2008-01-25 23:08:30 +03:00
list_add_tail ( & rt_se - > run_list , array - > queue + rt_se_prio ( rt_se ) ) ;
__set_bit ( rt_se_prio ( rt_se ) , array - > bitmap ) ;
2008-01-25 23:08:27 +03:00
2008-01-25 23:08:30 +03:00
inc_rt_tasks ( rt_se , rt_rq ) ;
}
static void dequeue_rt_entity ( struct sched_rt_entity * rt_se )
{
struct rt_rq * rt_rq = rt_rq_of_se ( rt_se ) ;
struct rt_prio_array * array = & rt_rq - > active ;
list_del_init ( & rt_se - > run_list ) ;
if ( list_empty ( array - > queue + rt_se_prio ( rt_se ) ) )
__clear_bit ( rt_se_prio ( rt_se ) , array - > bitmap ) ;
dec_rt_tasks ( rt_se , rt_rq ) ;
}
/*
* Because the prio of an upper entry depends on the lower
* entries , we must remove entries top - down .
*/
static void dequeue_rt_stack ( struct task_struct * p )
{
2008-04-19 21:45:00 +04:00
struct sched_rt_entity * rt_se , * back = NULL ;
2008-01-25 23:08:30 +03:00
2008-04-19 21:45:00 +04:00
rt_se = & p - > rt ;
for_each_sched_rt_entity ( rt_se ) {
rt_se - > back = back ;
back = rt_se ;
}
for ( rt_se = back ; rt_se ; rt_se = rt_se - > back ) {
if ( on_rt_rq ( rt_se ) )
dequeue_rt_entity ( rt_se ) ;
}
2007-07-09 20:51:58 +04:00
}
/*
* Adding / removing a task to / from a priority array :
*/
2008-01-25 23:08:30 +03:00
static void enqueue_task_rt ( struct rq * rq , struct task_struct * p , int wakeup )
{
struct sched_rt_entity * rt_se = & p - > rt ;
if ( wakeup )
rt_se - > timeout = 0 ;
dequeue_rt_stack ( p ) ;
/*
* enqueue everybody , bottom - up .
*/
for_each_sched_rt_entity ( rt_se )
enqueue_rt_entity ( rt_se ) ;
2008-04-19 21:45:00 +04:00
inc_cpu_load ( rq , p - > se . load . weight ) ;
2008-01-25 23:08:30 +03:00
}
2007-08-09 13:16:48 +04:00
static void dequeue_task_rt ( struct rq * rq , struct task_struct * p , int sleep )
2007-07-09 20:51:58 +04:00
{
2008-01-25 23:08:30 +03:00
struct sched_rt_entity * rt_se = & p - > rt ;
struct rt_rq * rt_rq ;
2007-07-09 20:51:58 +04:00
2007-08-09 13:16:48 +04:00
update_curr_rt ( rq ) ;
2007-07-09 20:51:58 +04:00
2008-01-25 23:08:30 +03:00
dequeue_rt_stack ( p ) ;
/*
* re - enqueue all non - empty rt_rq entities .
*/
for_each_sched_rt_entity ( rt_se ) {
rt_rq = group_rt_rq ( rt_se ) ;
if ( rt_rq & & rt_rq - > rt_nr_running )
enqueue_rt_entity ( rt_se ) ;
}
2008-04-19 21:45:00 +04:00
dec_cpu_load ( rq , p - > se . load . weight ) ;
2007-07-09 20:51:58 +04:00
}
/*
* Put task to the end of the run list without the overhead of dequeue
* followed by enqueue .
*/
2008-01-25 23:08:30 +03:00
static
void requeue_rt_entity ( struct rt_rq * rt_rq , struct sched_rt_entity * rt_se )
{
struct rt_prio_array * array = & rt_rq - > active ;
list_move_tail ( & rt_se - > run_list , array - > queue + rt_se_prio ( rt_se ) ) ;
}
2007-07-09 20:51:58 +04:00
static void requeue_task_rt ( struct rq * rq , struct task_struct * p )
{
2008-01-25 23:08:30 +03:00
struct sched_rt_entity * rt_se = & p - > rt ;
struct rt_rq * rt_rq ;
2007-07-09 20:51:58 +04:00
2008-01-25 23:08:30 +03:00
for_each_sched_rt_entity ( rt_se ) {
rt_rq = rt_rq_of_se ( rt_se ) ;
requeue_rt_entity ( rt_rq , rt_se ) ;
}
2007-07-09 20:51:58 +04:00
}
2008-01-25 23:08:30 +03:00
static void yield_task_rt ( struct rq * rq )
2007-07-09 20:51:58 +04:00
{
2007-10-15 19:00:08 +04:00
requeue_task_rt ( rq , rq - > curr ) ;
2007-07-09 20:51:58 +04:00
}
2008-01-25 23:08:09 +03:00
# ifdef CONFIG_SMP
2008-01-25 23:08:10 +03:00
static int find_lowest_rq ( struct task_struct * task ) ;
2008-01-25 23:08:09 +03:00
static int select_task_rq_rt ( struct task_struct * p , int sync )
{
2008-01-25 23:08:10 +03:00
struct rq * rq = task_rq ( p ) ;
/*
2008-01-25 23:08:12 +03:00
* If the current task is an RT task , then
* try to see if we can wake this RT task up on another
* runqueue . Otherwise simply start this RT task
* on its current runqueue .
*
* We want to avoid overloading runqueues . Even if
* the RT task is of higher priority than the current RT task .
* RT tasks behave differently than other tasks . If
* one gets preempted , we try to push it off to another queue .
* So trying to keep a preempting RT task on the same
* cache hot CPU will force the running RT task to
* a cold CPU . So we waste all the cache for the lower
* RT task in hopes of saving some of a RT task
* that is just being woken and probably will have
* cold cache anyway .
2008-01-25 23:08:10 +03:00
*/
2008-01-25 23:08:13 +03:00
if ( unlikely ( rt_task ( rq - > curr ) ) & &
2008-01-25 23:08:30 +03:00
( p - > rt . nr_cpus_allowed > 1 ) ) {
2008-01-25 23:08:10 +03:00
int cpu = find_lowest_rq ( p ) ;
return ( cpu = = - 1 ) ? task_cpu ( p ) : cpu ;
}
/*
* Otherwise , just let it ride on the affined RQ and the
* post - schedule router will push the preempted task away
*/
2008-01-25 23:08:09 +03:00
return task_cpu ( p ) ;
}
# endif /* CONFIG_SMP */
2007-07-09 20:51:58 +04:00
/*
* Preempt the current task with a newly woken task if needed :
*/
static void check_preempt_curr_rt ( struct rq * rq , struct task_struct * p )
{
if ( p - > prio < rq - > curr - > prio )
resched_task ( rq - > curr ) ;
}
2008-01-25 23:08:30 +03:00
static struct sched_rt_entity * pick_next_rt_entity ( struct rq * rq ,
struct rt_rq * rt_rq )
2007-07-09 20:51:58 +04:00
{
2008-01-25 23:08:30 +03:00
struct rt_prio_array * array = & rt_rq - > active ;
struct sched_rt_entity * next = NULL ;
2007-07-09 20:51:58 +04:00
struct list_head * queue ;
int idx ;
idx = sched_find_first_bit ( array - > bitmap ) ;
2008-01-25 23:08:30 +03:00
BUG_ON ( idx > = MAX_RT_PRIO ) ;
2007-07-09 20:51:58 +04:00
queue = array - > queue + idx ;
2008-01-25 23:08:30 +03:00
next = list_entry ( queue - > next , struct sched_rt_entity , run_list ) ;
2008-01-25 23:08:34 +03:00
2008-01-25 23:08:30 +03:00
return next ;
}
2007-07-09 20:51:58 +04:00
2008-01-25 23:08:30 +03:00
static struct task_struct * pick_next_task_rt ( struct rq * rq )
{
struct sched_rt_entity * rt_se ;
struct task_struct * p ;
struct rt_rq * rt_rq ;
2007-07-09 20:51:58 +04:00
2008-01-25 23:08:30 +03:00
rt_rq = & rq - > rt ;
if ( unlikely ( ! rt_rq - > rt_nr_running ) )
return NULL ;
2008-02-13 17:45:39 +03:00
if ( rt_rq_throttled ( rt_rq ) )
2008-01-25 23:08:30 +03:00
return NULL ;
do {
rt_se = pick_next_rt_entity ( rq , rt_rq ) ;
2008-01-25 23:08:34 +03:00
BUG_ON ( ! rt_se ) ;
2008-01-25 23:08:30 +03:00
rt_rq = group_rt_rq ( rt_se ) ;
} while ( rt_rq ) ;
p = rt_task_of ( rt_se ) ;
p - > se . exec_start = rq - > clock ;
return p ;
2007-07-09 20:51:58 +04:00
}
2007-08-09 13:16:49 +04:00
static void put_prev_task_rt ( struct rq * rq , struct task_struct * p )
2007-07-09 20:51:58 +04:00
{
2007-08-09 13:16:48 +04:00
update_curr_rt ( rq ) ;
2007-07-09 20:51:58 +04:00
p - > se . exec_start = 0 ;
}
2007-10-24 20:23:51 +04:00
# ifdef CONFIG_SMP
2008-01-25 23:08:30 +03:00
2008-01-25 23:08:05 +03:00
/* Only try algorithms three times */
# define RT_MAX_TRIES 3
static int double_lock_balance ( struct rq * this_rq , struct rq * busiest ) ;
static void deactivate_task ( struct rq * rq , struct task_struct * p , int sleep ) ;
2008-01-25 23:08:07 +03:00
static int pick_rt_task ( struct rq * rq , struct task_struct * p , int cpu )
{
if ( ! task_running ( rq , p ) & &
2008-01-25 23:08:07 +03:00
( cpu < 0 | | cpu_isset ( cpu , p - > cpus_allowed ) ) & &
2008-01-25 23:08:30 +03:00
( p - > rt . nr_cpus_allowed > 1 ) )
2008-01-25 23:08:07 +03:00
return 1 ;
return 0 ;
}
2008-01-25 23:08:05 +03:00
/* Return the second highest RT task, NULL otherwise */
2008-01-25 23:08:14 +03:00
static struct task_struct * pick_next_highest_task_rt ( struct rq * rq , int cpu )
2008-01-25 23:08:05 +03:00
{
2008-01-25 23:08:30 +03:00
struct task_struct * next = NULL ;
struct sched_rt_entity * rt_se ;
struct rt_prio_array * array ;
struct rt_rq * rt_rq ;
2008-01-25 23:08:05 +03:00
int idx ;
2008-01-25 23:08:30 +03:00
for_each_leaf_rt_rq ( rt_rq , rq ) {
array = & rt_rq - > active ;
idx = sched_find_first_bit ( array - > bitmap ) ;
next_idx :
if ( idx > = MAX_RT_PRIO )
continue ;
if ( next & & next - > prio < idx )
continue ;
list_for_each_entry ( rt_se , array - > queue + idx , run_list ) {
struct task_struct * p = rt_task_of ( rt_se ) ;
if ( pick_rt_task ( rq , p , cpu ) ) {
next = p ;
break ;
}
}
if ( ! next ) {
idx = find_next_bit ( array - > bitmap , MAX_RT_PRIO , idx + 1 ) ;
goto next_idx ;
}
2008-01-25 23:08:07 +03:00
}
2008-01-25 23:08:05 +03:00
return next ;
}
static DEFINE_PER_CPU ( cpumask_t , local_cpu_mask ) ;
2008-01-25 23:08:11 +03:00
static int find_lowest_cpus ( struct task_struct * task , cpumask_t * lowest_mask )
2008-01-25 23:08:05 +03:00
{
2008-01-25 23:08:11 +03:00
int lowest_prio = - 1 ;
2008-01-25 23:08:13 +03:00
int lowest_cpu = - 1 ;
2008-01-25 23:08:13 +03:00
int count = 0 ;
2008-01-25 23:08:13 +03:00
int cpu ;
2008-01-25 23:08:05 +03:00
2008-01-25 23:08:18 +03:00
cpus_and ( * lowest_mask , task_rq ( task ) - > rd - > online , task - > cpus_allowed ) ;
2008-01-25 23:08:05 +03:00
2008-01-25 23:08:10 +03:00
/*
* Scan each rq for the lowest prio .
*/
2008-01-25 23:08:13 +03:00
for_each_cpu_mask ( cpu , * lowest_mask ) {
2008-01-25 23:08:10 +03:00
struct rq * rq = cpu_rq ( cpu ) ;
2008-01-25 23:08:05 +03:00
2008-01-25 23:08:10 +03:00
/* We look for lowest RT prio or non-rt CPU */
if ( rq - > rt . highest_prio > = MAX_RT_PRIO ) {
2008-01-25 23:08:13 +03:00
/*
* if we already found a low RT queue
* and now we found this non - rt queue
* clear the mask and set our bit .
* Otherwise just return the queue as is
* and the count = = 1 will cause the algorithm
* to use the first bit found .
*/
if ( lowest_cpu ! = - 1 ) {
2008-01-25 23:08:11 +03:00
cpus_clear ( * lowest_mask ) ;
2008-01-25 23:08:13 +03:00
cpu_set ( rq - > cpu , * lowest_mask ) ;
}
2008-01-25 23:08:11 +03:00
return 1 ;
2008-01-25 23:08:10 +03:00
}
/* no locking for now */
2008-01-25 23:08:11 +03:00
if ( ( rq - > rt . highest_prio > task - > prio )
& & ( rq - > rt . highest_prio > = lowest_prio ) ) {
if ( rq - > rt . highest_prio > lowest_prio ) {
/* new low - clear old data */
lowest_prio = rq - > rt . highest_prio ;
2008-01-25 23:08:13 +03:00
lowest_cpu = cpu ;
count = 0 ;
2008-01-25 23:08:11 +03:00
}
2008-01-25 23:08:13 +03:00
count + + ;
2008-01-25 23:08:13 +03:00
} else
cpu_clear ( cpu , * lowest_mask ) ;
}
/*
* Clear out all the set bits that represent
* runqueues that were of higher prio than
* the lowest_prio .
*/
if ( lowest_cpu > 0 ) {
/*
* Perhaps we could add another cpumask op to
* zero out bits . Like cpu_zero_bits ( cpumask , nrbits ) ;
* Then that could be optimized to use memset and such .
*/
for_each_cpu_mask ( cpu , * lowest_mask ) {
if ( cpu > = lowest_cpu )
break ;
cpu_clear ( cpu , * lowest_mask ) ;
2008-01-25 23:08:05 +03:00
}
2008-01-25 23:08:10 +03:00
}
2008-01-25 23:08:13 +03:00
return count ;
2008-01-25 23:08:11 +03:00
}
static inline int pick_optimal_cpu ( int this_cpu , cpumask_t * mask )
{
int first ;
/* "this_cpu" is cheaper to preempt than a remote processor */
if ( ( this_cpu ! = - 1 ) & & cpu_isset ( this_cpu , * mask ) )
return this_cpu ;
first = first_cpu ( * mask ) ;
if ( first ! = NR_CPUS )
return first ;
return - 1 ;
}
static int find_lowest_rq ( struct task_struct * task )
{
struct sched_domain * sd ;
cpumask_t * lowest_mask = & __get_cpu_var ( local_cpu_mask ) ;
int this_cpu = smp_processor_id ( ) ;
int cpu = task_cpu ( task ) ;
2008-01-25 23:08:13 +03:00
int count = find_lowest_cpus ( task , lowest_mask ) ;
if ( ! count )
return - 1 ; /* No targets found */
2008-01-25 23:08:11 +03:00
2008-01-25 23:08:13 +03:00
/*
* There is no sense in performing an optimal search if only one
* target is found .
*/
if ( count = = 1 )
return first_cpu ( * lowest_mask ) ;
2008-01-25 23:08:11 +03:00
/*
* At this point we have built a mask of cpus representing the
* lowest priority tasks in the system . Now we want to elect
* the best one based on our affinity and topology .
*
* We prioritize the last cpu that the task executed on since
* it is most likely cache - hot in that location .
*/
if ( cpu_isset ( cpu , * lowest_mask ) )
return cpu ;
/*
* Otherwise , we consult the sched_domains span maps to figure
* out which cpu is logically closest to our hot cache data .
*/
if ( this_cpu = = cpu )
this_cpu = - 1 ; /* Skip this_cpu opt if the same */
for_each_domain ( cpu , sd ) {
if ( sd - > flags & SD_WAKE_AFFINE ) {
cpumask_t domain_mask ;
int best_cpu ;
cpus_and ( domain_mask , sd - > span , * lowest_mask ) ;
best_cpu = pick_optimal_cpu ( this_cpu ,
& domain_mask ) ;
if ( best_cpu ! = - 1 )
return best_cpu ;
}
}
/*
* And finally , if there were no matches within the domains
* just give the caller * something * to work with from the compatible
* locations .
*/
return pick_optimal_cpu ( this_cpu , lowest_mask ) ;
2008-01-25 23:08:10 +03:00
}
/* Will lock the rq it finds */
2008-01-25 23:08:15 +03:00
static struct rq * find_lock_lowest_rq ( struct task_struct * task , struct rq * rq )
2008-01-25 23:08:10 +03:00
{
struct rq * lowest_rq = NULL ;
int tries ;
2008-01-25 23:08:15 +03:00
int cpu ;
2008-01-25 23:08:05 +03:00
2008-01-25 23:08:10 +03:00
for ( tries = 0 ; tries < RT_MAX_TRIES ; tries + + ) {
cpu = find_lowest_rq ( task ) ;
2008-01-25 23:08:10 +03:00
if ( ( cpu = = - 1 ) | | ( cpu = = rq - > cpu ) )
2008-01-25 23:08:05 +03:00
break ;
2008-01-25 23:08:10 +03:00
lowest_rq = cpu_rq ( cpu ) ;
2008-01-25 23:08:05 +03:00
/* if the prio of this runqueue changed, try again */
2008-01-25 23:08:10 +03:00
if ( double_lock_balance ( rq , lowest_rq ) ) {
2008-01-25 23:08:05 +03:00
/*
* We had to unlock the run queue . In
* the mean time , task could have
* migrated already or had its affinity changed .
* Also make sure that it wasn ' t scheduled on its rq .
*/
2008-01-25 23:08:10 +03:00
if ( unlikely ( task_rq ( task ) ! = rq | |
2008-01-25 23:08:15 +03:00
! cpu_isset ( lowest_rq - > cpu ,
task - > cpus_allowed ) | |
2008-01-25 23:08:10 +03:00
task_running ( rq , task ) | |
2008-01-25 23:08:05 +03:00
! task - > se . on_rq ) ) {
2008-01-25 23:08:15 +03:00
2008-01-25 23:08:05 +03:00
spin_unlock ( & lowest_rq - > lock ) ;
lowest_rq = NULL ;
break ;
}
}
/* If this rq is still suitable use it. */
if ( lowest_rq - > rt . highest_prio > task - > prio )
break ;
/* try again */
spin_unlock ( & lowest_rq - > lock ) ;
lowest_rq = NULL ;
}
return lowest_rq ;
}
/*
* If the current CPU has more than one RT task , see if the non
* running task can migrate over to a CPU that is running a task
* of lesser priority .
*/
2008-01-25 23:08:09 +03:00
static int push_rt_task ( struct rq * rq )
2008-01-25 23:08:05 +03:00
{
struct task_struct * next_task ;
struct rq * lowest_rq ;
int ret = 0 ;
int paranoid = RT_MAX_TRIES ;
2008-01-25 23:08:12 +03:00
if ( ! rq - > rt . overloaded )
return 0 ;
2008-01-25 23:08:09 +03:00
next_task = pick_next_highest_task_rt ( rq , - 1 ) ;
2008-01-25 23:08:05 +03:00
if ( ! next_task )
return 0 ;
retry :
2008-01-25 23:08:09 +03:00
if ( unlikely ( next_task = = rq - > curr ) ) {
2008-01-25 23:08:07 +03:00
WARN_ON ( 1 ) ;
2008-01-25 23:08:05 +03:00
return 0 ;
2008-01-25 23:08:07 +03:00
}
2008-01-25 23:08:05 +03:00
/*
* It ' s possible that the next_task slipped in of
* higher priority than current . If that ' s the case
* just reschedule current .
*/
2008-01-25 23:08:09 +03:00
if ( unlikely ( next_task - > prio < rq - > curr - > prio ) ) {
resched_task ( rq - > curr ) ;
2008-01-25 23:08:05 +03:00
return 0 ;
}
2008-01-25 23:08:09 +03:00
/* We might release rq lock */
2008-01-25 23:08:05 +03:00
get_task_struct ( next_task ) ;
/* find_lock_lowest_rq locks the rq if found */
2008-01-25 23:08:09 +03:00
lowest_rq = find_lock_lowest_rq ( next_task , rq ) ;
2008-01-25 23:08:05 +03:00
if ( ! lowest_rq ) {
struct task_struct * task ;
/*
2008-01-25 23:08:09 +03:00
* find lock_lowest_rq releases rq - > lock
2008-01-25 23:08:05 +03:00
* so it is possible that next_task has changed .
* If it has , then try again .
*/
2008-01-25 23:08:09 +03:00
task = pick_next_highest_task_rt ( rq , - 1 ) ;
2008-01-25 23:08:05 +03:00
if ( unlikely ( task ! = next_task ) & & task & & paranoid - - ) {
put_task_struct ( next_task ) ;
next_task = task ;
goto retry ;
}
goto out ;
}
2008-01-25 23:08:09 +03:00
deactivate_task ( rq , next_task , 0 ) ;
2008-01-25 23:08:05 +03:00
set_task_cpu ( next_task , lowest_rq - > cpu ) ;
activate_task ( lowest_rq , next_task , 0 ) ;
resched_task ( lowest_rq - > curr ) ;
spin_unlock ( & lowest_rq - > lock ) ;
ret = 1 ;
out :
put_task_struct ( next_task ) ;
return ret ;
}
/*
* TODO : Currently we just use the second highest prio task on
* the queue , and stop when it can ' t migrate ( or there ' s
* no more RT tasks ) . There may be a case where a lower
* priority RT task has a different affinity than the
* higher RT task . In this case the lower RT task could
* possibly be able to migrate where as the higher priority
* RT task could not . We currently ignore this issue .
* Enhancements are welcome !
*/
static void push_rt_tasks ( struct rq * rq )
{
/* push_rt_task will return true if it moved an RT */
while ( push_rt_task ( rq ) )
;
}
2008-01-25 23:08:07 +03:00
static int pull_rt_task ( struct rq * this_rq )
{
2008-01-25 23:08:17 +03:00
int this_cpu = this_rq - > cpu , ret = 0 , cpu ;
struct task_struct * p , * next ;
2008-01-25 23:08:07 +03:00
struct rq * src_rq ;
2008-01-25 23:08:18 +03:00
if ( likely ( ! rt_overloaded ( this_rq ) ) )
2008-01-25 23:08:07 +03:00
return 0 ;
next = pick_next_task_rt ( this_rq ) ;
2008-01-25 23:08:18 +03:00
for_each_cpu_mask ( cpu , this_rq - > rd - > rto_mask ) {
2008-01-25 23:08:07 +03:00
if ( this_cpu = = cpu )
continue ;
src_rq = cpu_rq ( cpu ) ;
/*
* We can potentially drop this_rq ' s lock in
* double_lock_balance , and another CPU could
* steal our next task - hence we must cause
* the caller to recalculate the next task
* in that case :
*/
if ( double_lock_balance ( this_rq , src_rq ) ) {
struct task_struct * old_next = next ;
2008-01-25 23:08:17 +03:00
2008-01-25 23:08:07 +03:00
next = pick_next_task_rt ( this_rq ) ;
if ( next ! = old_next )
ret = 1 ;
}
/*
* Are there still pullable RT tasks ?
*/
2008-01-25 23:08:30 +03:00
if ( src_rq - > rt . rt_nr_running < = 1 )
goto skip ;
2008-01-25 23:08:07 +03:00
p = pick_next_highest_task_rt ( src_rq , this_cpu ) ;
/*
* Do we have an RT task that preempts
* the to - be - scheduled task ?
*/
if ( p & & ( ! next | | ( p - > prio < next - > prio ) ) ) {
WARN_ON ( p = = src_rq - > curr ) ;
WARN_ON ( ! p - > se . on_rq ) ;
/*
* There ' s a chance that p is higher in priority
* than what ' s currently running on its cpu .
* This is just that p is wakeing up and hasn ' t
* had a chance to schedule . We only pull
* p if it is lower in priority than the
* current task on the run queue or
* this_rq next task is lower in prio than
* the current task on that rq .
*/
if ( p - > prio < src_rq - > curr - > prio | |
( next & & next - > prio < src_rq - > curr - > prio ) )
2008-01-25 23:08:30 +03:00
goto skip ;
2008-01-25 23:08:07 +03:00
ret = 1 ;
deactivate_task ( src_rq , p , 0 ) ;
set_task_cpu ( p , this_cpu ) ;
activate_task ( this_rq , p , 0 ) ;
/*
* We continue with the search , just in
* case there ' s an even higher prio task
* in another runqueue . ( low likelyhood
* but possible )
2008-01-25 23:08:17 +03:00
*
2008-01-25 23:08:07 +03:00
* Update next so that we won ' t pick a task
* on another cpu with a priority lower ( or equal )
* than the one we just picked .
*/
next = p ;
}
2008-01-25 23:08:30 +03:00
skip :
2008-01-25 23:08:07 +03:00
spin_unlock ( & src_rq - > lock ) ;
}
return ret ;
}
2008-01-25 23:08:22 +03:00
static void pre_schedule_rt ( struct rq * rq , struct task_struct * prev )
2008-01-25 23:08:07 +03:00
{
/* Try to pull RT tasks here if we lower this rq's prio */
2008-01-25 23:08:17 +03:00
if ( unlikely ( rt_task ( prev ) ) & & rq - > rt . highest_prio > prev - > prio )
2008-01-25 23:08:07 +03:00
pull_rt_task ( rq ) ;
}
2008-01-25 23:08:22 +03:00
static void post_schedule_rt ( struct rq * rq )
2008-01-25 23:08:05 +03:00
{
/*
* If we have more than one rt_task queued , then
* see if we can push the other rt_tasks off to other CPUS .
* Note we may release the rq lock , and since
* the lock was owned by prev , we need to release it
* first via finish_lock_switch and then reaquire it here .
*/
2008-01-25 23:08:12 +03:00
if ( unlikely ( rq - > rt . overloaded ) ) {
2008-01-25 23:08:05 +03:00
spin_lock_irq ( & rq - > lock ) ;
push_rt_tasks ( rq ) ;
spin_unlock_irq ( & rq - > lock ) ;
}
}
2008-01-25 23:08:07 +03:00
2008-01-25 23:08:22 +03:00
static void task_wake_up_rt ( struct rq * rq , struct task_struct * p )
2008-01-25 23:08:07 +03:00
{
2008-01-25 23:08:22 +03:00
if ( ! task_running ( rq , p ) & &
2008-01-25 23:08:12 +03:00
( p - > prio > = rq - > rt . highest_prio ) & &
rq - > rt . overloaded )
2008-01-25 23:08:07 +03:00
push_rt_tasks ( rq ) ;
}
sched: simplify move_tasks()
The move_tasks() function is currently multiplexed with two distinct
capabilities:
1. attempt to move a specified amount of weighted load from one run
queue to another; and
2. attempt to move a specified number of tasks from one run queue to
another.
The first of these capabilities is used in two places, load_balance()
and load_balance_idle(), and in both of these cases the return value of
move_tasks() is used purely to decide if tasks/load were moved and no
notice of the actual number of tasks moved is taken.
The second capability is used in exactly one place,
active_load_balance(), to attempt to move exactly one task and, as
before, the return value is only used as an indicator of success or failure.
This multiplexing of sched_task() was introduced, by me, as part of the
smpnice patches and was motivated by the fact that the alternative, one
function to move specified load and one to move a single task, would
have led to two functions of roughly the same complexity as the old
move_tasks() (or the new balance_tasks()). However, the new modular
design of the new CFS scheduler allows a simpler solution to be adopted
and this patch addresses that solution by:
1. adding a new function, move_one_task(), to be used by
active_load_balance(); and
2. making move_tasks() a single purpose function that tries to move a
specified weighted load and returns 1 for success and 0 for failure.
One of the consequences of these changes is that neither move_one_task()
or the new move_tasks() care how many tasks sched_class.load_balance()
moves and this enables its interface to be simplified by returning the
amount of load moved as its result and removing the load_moved pointer
from the argument list. This helps simplify the new move_tasks() and
slightly reduces the amount of work done in each of
sched_class.load_balance()'s implementations.
Further simplification, e.g. changes to balance_tasks(), are possible
but (slightly) complicated by the special needs of load_balance_fair()
so I've left them to a later patch (if this one gets accepted).
NB Since move_tasks() gets called with two run queue locks held even
small reductions in overhead are worthwhile.
[ mingo@elte.hu ]
this change also reduces code size nicely:
text data bss dec hex filename
39216 3618 24 42858 a76a sched.o.before
39173 3618 24 42815 a73f sched.o.after
Signed-off-by: Peter Williams <pwil3058@bigpond.net.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2007-08-09 13:16:46 +04:00
static unsigned long
2007-07-09 20:51:58 +04:00
load_balance_rt ( struct rq * this_rq , int this_cpu , struct rq * busiest ,
2007-10-24 20:23:51 +04:00
unsigned long max_load_move ,
struct sched_domain * sd , enum cpu_idle_type idle ,
int * all_pinned , int * this_best_prio )
2007-07-09 20:51:58 +04:00
{
2008-01-25 23:08:07 +03:00
/* don't touch RT tasks */
return 0 ;
2007-10-24 20:23:51 +04:00
}
static int
move_one_task_rt ( struct rq * this_rq , int this_cpu , struct rq * busiest ,
struct sched_domain * sd , enum cpu_idle_type idle )
{
2008-01-25 23:08:07 +03:00
/* don't touch RT tasks */
return 0 ;
2007-07-09 20:51:58 +04:00
}
2008-01-25 23:08:15 +03:00
2008-03-27 00:23:49 +03:00
static void set_cpus_allowed_rt ( struct task_struct * p ,
const cpumask_t * new_mask )
2008-01-25 23:08:07 +03:00
{
int weight = cpus_weight ( * new_mask ) ;
BUG_ON ( ! rt_task ( p ) ) ;
/*
* Update the migration status of the RQ if we have an RT task
* which is running AND changing its weight value .
*/
2008-01-25 23:08:30 +03:00
if ( p - > se . on_rq & & ( weight ! = p - > rt . nr_cpus_allowed ) ) {
2008-01-25 23:08:07 +03:00
struct rq * rq = task_rq ( p ) ;
2008-01-25 23:08:30 +03:00
if ( ( p - > rt . nr_cpus_allowed < = 1 ) & & ( weight > 1 ) ) {
2008-01-25 23:08:07 +03:00
rq - > rt . rt_nr_migratory + + ;
2008-01-25 23:08:30 +03:00
} else if ( ( p - > rt . nr_cpus_allowed > 1 ) & & ( weight < = 1 ) ) {
2008-01-25 23:08:07 +03:00
BUG_ON ( ! rq - > rt . rt_nr_migratory ) ;
rq - > rt . rt_nr_migratory - - ;
}
update_rt_migration ( rq ) ;
}
p - > cpus_allowed = * new_mask ;
2008-01-25 23:08:30 +03:00
p - > rt . nr_cpus_allowed = weight ;
2008-01-25 23:08:07 +03:00
}
2008-01-25 23:08:15 +03:00
2008-01-25 23:08:18 +03:00
/* Assumes rq->lock is held */
static void join_domain_rt ( struct rq * rq )
{
if ( rq - > rt . overloaded )
rt_set_overload ( rq ) ;
}
/* Assumes rq->lock is held */
static void leave_domain_rt ( struct rq * rq )
{
if ( rq - > rt . overloaded )
rt_clear_overload ( rq ) ;
}
2008-01-25 23:08:22 +03:00
/*
* When switch from the rt queue , we bring ourselves to a position
* that we might want to pull RT tasks from other runqueues .
*/
static void switched_from_rt ( struct rq * rq , struct task_struct * p ,
int running )
{
/*
* If there are other RT tasks then we will reschedule
* and the scheduling of the other RT tasks will handle
* the balancing . But if we are the last RT task
* we may need to handle the pulling of RT tasks
* now .
*/
if ( ! rq - > rt . rt_nr_running )
pull_rt_task ( rq ) ;
}
# endif /* CONFIG_SMP */
/*
* When switching a task to RT , we may overload the runqueue
* with RT tasks . In this case we try to push them off to
* other runqueues .
*/
static void switched_to_rt ( struct rq * rq , struct task_struct * p ,
int running )
{
int check_resched = 1 ;
/*
* If we are already running , then there ' s nothing
* that needs to be done . But if we are not running
* we may need to preempt the current running task .
* If that current running task is also an RT task
* then see if we can move to another run queue .
*/
if ( ! running ) {
# ifdef CONFIG_SMP
if ( rq - > rt . overloaded & & push_rt_task ( rq ) & &
/* Don't resched if we changed runqueues */
rq ! = task_rq ( p ) )
check_resched = 0 ;
# endif /* CONFIG_SMP */
if ( check_resched & & p - > prio < rq - > curr - > prio )
resched_task ( rq - > curr ) ;
}
}
/*
* Priority of the task has changed . This may cause
* us to initiate a push or pull .
*/
static void prio_changed_rt ( struct rq * rq , struct task_struct * p ,
int oldprio , int running )
{
if ( running ) {
# ifdef CONFIG_SMP
/*
* If our priority decreases while running , we
* may need to pull tasks to this runqueue .
*/
if ( oldprio < p - > prio )
pull_rt_task ( rq ) ;
/*
* If there ' s a higher priority task waiting to run
2008-03-05 18:00:12 +03:00
* then reschedule . Note , the above pull_rt_task
* can release the rq lock and p could migrate .
* Only reschedule if p is still on the same runqueue .
2008-01-25 23:08:22 +03:00
*/
2008-03-05 18:00:12 +03:00
if ( p - > prio > rq - > rt . highest_prio & & rq - > curr = = p )
2008-01-25 23:08:22 +03:00
resched_task ( p ) ;
# else
/* For UP simply resched on drop of prio */
if ( oldprio < p - > prio )
resched_task ( p ) ;
2008-01-25 23:08:05 +03:00
# endif /* CONFIG_SMP */
2008-01-25 23:08:22 +03:00
} else {
/*
* This task is not running , but if it is
* greater than the current running task
* then reschedule .
*/
if ( p - > prio < rq - > curr - > prio )
resched_task ( rq - > curr ) ;
}
}
2008-01-25 23:08:27 +03:00
static void watchdog ( struct rq * rq , struct task_struct * p )
{
unsigned long soft , hard ;
if ( ! p - > signal )
return ;
soft = p - > signal - > rlim [ RLIMIT_RTTIME ] . rlim_cur ;
hard = p - > signal - > rlim [ RLIMIT_RTTIME ] . rlim_max ;
if ( soft ! = RLIM_INFINITY ) {
unsigned long next ;
p - > rt . timeout + + ;
next = DIV_ROUND_UP ( min ( soft , hard ) , USEC_PER_SEC / HZ ) ;
2008-01-25 23:08:32 +03:00
if ( p - > rt . timeout > next )
2008-01-25 23:08:27 +03:00
p - > it_sched_expires = p - > se . sum_exec_runtime ;
}
}
2007-07-09 20:51:58 +04:00
2008-01-25 23:08:29 +03:00
static void task_tick_rt ( struct rq * rq , struct task_struct * p , int queued )
2007-07-09 20:51:58 +04:00
{
2007-12-20 17:01:17 +03:00
update_curr_rt ( rq ) ;
2008-01-25 23:08:27 +03:00
watchdog ( rq , p ) ;
2007-07-09 20:51:58 +04:00
/*
* RR tasks need a special form of timeslice management .
* FIFO tasks have no timeslices .
*/
if ( p - > policy ! = SCHED_RR )
return ;
2008-01-25 23:08:27 +03:00
if ( - - p - > rt . time_slice )
2007-07-09 20:51:58 +04:00
return ;
2008-01-25 23:08:27 +03:00
p - > rt . time_slice = DEF_TIMESLICE ;
2007-07-09 20:51:58 +04:00
2007-08-24 22:39:10 +04:00
/*
* Requeue to the end of queue if we are not the only element
* on the queue :
*/
2008-01-25 23:08:27 +03:00
if ( p - > rt . run_list . prev ! = p - > rt . run_list . next ) {
2007-08-24 22:39:10 +04:00
requeue_task_rt ( rq , p ) ;
set_tsk_need_resched ( p ) ;
}
2007-07-09 20:51:58 +04:00
}
2007-10-15 19:00:08 +04:00
static void set_curr_task_rt ( struct rq * rq )
{
struct task_struct * p = rq - > curr ;
p - > se . exec_start = rq - > clock ;
}
2007-10-15 19:00:12 +04:00
const struct sched_class rt_sched_class = {
. next = & fair_sched_class ,
2007-07-09 20:51:58 +04:00
. enqueue_task = enqueue_task_rt ,
. dequeue_task = dequeue_task_rt ,
. yield_task = yield_task_rt ,
2008-01-25 23:08:09 +03:00
# ifdef CONFIG_SMP
. select_task_rq = select_task_rq_rt ,
# endif /* CONFIG_SMP */
2007-07-09 20:51:58 +04:00
. check_preempt_curr = check_preempt_curr_rt ,
. pick_next_task = pick_next_task_rt ,
. put_prev_task = put_prev_task_rt ,
2007-10-24 20:23:51 +04:00
# ifdef CONFIG_SMP
2007-07-09 20:51:58 +04:00
. load_balance = load_balance_rt ,
2007-10-24 20:23:51 +04:00
. move_one_task = move_one_task_rt ,
2008-01-25 23:08:07 +03:00
. set_cpus_allowed = set_cpus_allowed_rt ,
2008-01-25 23:08:18 +03:00
. join_domain = join_domain_rt ,
. leave_domain = leave_domain_rt ,
2008-01-25 23:08:22 +03:00
. pre_schedule = pre_schedule_rt ,
. post_schedule = post_schedule_rt ,
. task_wake_up = task_wake_up_rt ,
2008-01-25 23:08:22 +03:00
. switched_from = switched_from_rt ,
2007-10-24 20:23:51 +04:00
# endif
2007-07-09 20:51:58 +04:00
2007-10-15 19:00:08 +04:00
. set_curr_task = set_curr_task_rt ,
2007-07-09 20:51:58 +04:00
. task_tick = task_tick_rt ,
2008-01-25 23:08:22 +03:00
. prio_changed = prio_changed_rt ,
. switched_to = switched_to_rt ,
2007-07-09 20:51:58 +04:00
} ;