2012-06-16 17:57:37 +04:00
# include <linux/export.h>
# include <linux/sched.h>
# include <linux/tsacct_kern.h>
# include <linux/kernel_stat.h>
# include <linux/static_key.h>
# include "sched.h"
# ifdef CONFIG_IRQ_TIME_ACCOUNTING
/*
* There are no locks covering percpu hardirq / softirq time .
2012-09-08 17:23:11 +04:00
* They are only modified in vtime_account , on corresponding CPU
2012-06-16 17:57:37 +04:00
* with interrupts disabled . So , writes are safe .
* They are read and saved off onto struct rq in update_rq_clock ( ) .
* This may result in other CPU reading this CPU ' s irq time and can
2012-09-08 17:23:11 +04:00
* race with irq / vtime_account on this CPU . We would either get old
2012-06-16 17:57:37 +04:00
* or new value with a side effect of accounting a slice of irq time to wrong
* task when irq is in progress while we read rq - > clock . That is a worthy
* compromise in place of having locks on each irq in account_system_time .
*/
DEFINE_PER_CPU ( u64 , cpu_hardirq_time ) ;
DEFINE_PER_CPU ( u64 , cpu_softirq_time ) ;
static DEFINE_PER_CPU ( u64 , irq_start_time ) ;
static int sched_clock_irqtime ;
void enable_sched_clock_irqtime ( void )
{
sched_clock_irqtime = 1 ;
}
void disable_sched_clock_irqtime ( void )
{
sched_clock_irqtime = 0 ;
}
# ifndef CONFIG_64BIT
DEFINE_PER_CPU ( seqcount_t , irq_time_seq ) ;
# endif /* CONFIG_64BIT */
/*
* Called before incrementing preempt_count on { soft , } irq_enter
* and before decrementing preempt_count on { soft , } irq_exit .
*/
2012-10-06 07:23:22 +04:00
void irqtime_account_irq ( struct task_struct * curr )
2012-06-16 17:57:37 +04:00
{
unsigned long flags ;
s64 delta ;
int cpu ;
if ( ! sched_clock_irqtime )
return ;
local_irq_save ( flags ) ;
cpu = smp_processor_id ( ) ;
delta = sched_clock_cpu ( cpu ) - __this_cpu_read ( irq_start_time ) ;
__this_cpu_add ( irq_start_time , delta ) ;
irq_time_write_begin ( ) ;
/*
* We do not account for softirq time from ksoftirqd here .
* We want to continue accounting softirq time to ksoftirqd thread
* in that case , so as not to confuse scheduler with a special task
* that do not consume any time , but still wants to run .
*/
if ( hardirq_count ( ) )
__this_cpu_add ( cpu_hardirq_time , delta ) ;
else if ( in_serving_softirq ( ) & & curr ! = this_cpu_ksoftirqd ( ) )
__this_cpu_add ( cpu_softirq_time , delta ) ;
irq_time_write_end ( ) ;
local_irq_restore ( flags ) ;
}
2012-10-06 07:23:22 +04:00
EXPORT_SYMBOL_GPL ( irqtime_account_irq ) ;
2012-06-16 17:57:37 +04:00
static int irqtime_account_hi_update ( void )
{
u64 * cpustat = kcpustat_this_cpu - > cpustat ;
unsigned long flags ;
u64 latest_ns ;
int ret = 0 ;
local_irq_save ( flags ) ;
latest_ns = this_cpu_read ( cpu_hardirq_time ) ;
if ( nsecs_to_cputime64 ( latest_ns ) > cpustat [ CPUTIME_IRQ ] )
ret = 1 ;
local_irq_restore ( flags ) ;
return ret ;
}
static int irqtime_account_si_update ( void )
{
u64 * cpustat = kcpustat_this_cpu - > cpustat ;
unsigned long flags ;
u64 latest_ns ;
int ret = 0 ;
local_irq_save ( flags ) ;
latest_ns = this_cpu_read ( cpu_softirq_time ) ;
if ( nsecs_to_cputime64 ( latest_ns ) > cpustat [ CPUTIME_SOFTIRQ ] )
ret = 1 ;
local_irq_restore ( flags ) ;
return ret ;
}
# else /* CONFIG_IRQ_TIME_ACCOUNTING */
# define sched_clock_irqtime (0)
# endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
static inline void task_group_account_field ( struct task_struct * p , int index ,
u64 tmp )
{
# ifdef CONFIG_CGROUP_CPUACCT
struct kernel_cpustat * kcpustat ;
struct cpuacct * ca ;
# endif
/*
* Since all updates are sure to touch the root cgroup , we
* get ourselves ahead and touch it first . If the root cgroup
* is the only cgroup , then nothing else should be necessary .
*
*/
__get_cpu_var ( kernel_cpustat ) . cpustat [ index ] + = tmp ;
# ifdef CONFIG_CGROUP_CPUACCT
if ( unlikely ( ! cpuacct_subsys . active ) )
return ;
rcu_read_lock ( ) ;
ca = task_ca ( p ) ;
while ( ca & & ( ca ! = & root_cpuacct ) ) {
kcpustat = this_cpu_ptr ( ca - > cpustat ) ;
kcpustat - > cpustat [ index ] + = tmp ;
ca = parent_ca ( ca ) ;
}
rcu_read_unlock ( ) ;
# endif
}
/*
* Account user cpu time to a process .
* @ p : the process that the cpu time gets accounted to
* @ cputime : the cpu time spent in user space since the last update
* @ cputime_scaled : cputime scaled by cpu frequency
*/
void account_user_time ( struct task_struct * p , cputime_t cputime ,
cputime_t cputime_scaled )
{
int index ;
/* Add user time to process. */
p - > utime + = cputime ;
p - > utimescaled + = cputime_scaled ;
account_group_user_time ( p , cputime ) ;
index = ( TASK_NICE ( p ) > 0 ) ? CPUTIME_NICE : CPUTIME_USER ;
/* Add user time to cpustat. */
task_group_account_field ( p , index , ( __force u64 ) cputime ) ;
/* Account for user time used */
acct_update_integrals ( p ) ;
}
/*
* Account guest cpu time to a process .
* @ p : the process that the cpu time gets accounted to
* @ cputime : the cpu time spent in virtual machine since the last update
* @ cputime_scaled : cputime scaled by cpu frequency
*/
static void account_guest_time ( struct task_struct * p , cputime_t cputime ,
cputime_t cputime_scaled )
{
u64 * cpustat = kcpustat_this_cpu - > cpustat ;
/* Add guest time to process. */
p - > utime + = cputime ;
p - > utimescaled + = cputime_scaled ;
account_group_user_time ( p , cputime ) ;
p - > gtime + = cputime ;
/* Add guest time to cpustat. */
if ( TASK_NICE ( p ) > 0 ) {
cpustat [ CPUTIME_NICE ] + = ( __force u64 ) cputime ;
cpustat [ CPUTIME_GUEST_NICE ] + = ( __force u64 ) cputime ;
} else {
cpustat [ CPUTIME_USER ] + = ( __force u64 ) cputime ;
cpustat [ CPUTIME_GUEST ] + = ( __force u64 ) cputime ;
}
}
/*
* Account system cpu time to a process and desired cpustat field
* @ p : the process that the cpu time gets accounted to
* @ cputime : the cpu time spent in kernel space since the last update
* @ cputime_scaled : cputime scaled by cpu frequency
* @ target_cputime64 : pointer to cpustat field that has to be updated
*/
static inline
void __account_system_time ( struct task_struct * p , cputime_t cputime ,
cputime_t cputime_scaled , int index )
{
/* Add system time to process. */
p - > stime + = cputime ;
p - > stimescaled + = cputime_scaled ;
account_group_system_time ( p , cputime ) ;
/* Add system time to cpustat. */
task_group_account_field ( p , index , ( __force u64 ) cputime ) ;
/* Account for system time used */
acct_update_integrals ( p ) ;
}
/*
* Account system cpu time to a process .
* @ p : the process that the cpu time gets accounted to
* @ hardirq_offset : the offset to subtract from hardirq_count ( )
* @ cputime : the cpu time spent in kernel space since the last update
* @ cputime_scaled : cputime scaled by cpu frequency
*/
void account_system_time ( struct task_struct * p , int hardirq_offset ,
cputime_t cputime , cputime_t cputime_scaled )
{
int index ;
if ( ( p - > flags & PF_VCPU ) & & ( irq_count ( ) - hardirq_offset = = 0 ) ) {
account_guest_time ( p , cputime , cputime_scaled ) ;
return ;
}
if ( hardirq_count ( ) - hardirq_offset )
index = CPUTIME_IRQ ;
else if ( in_serving_softirq ( ) )
index = CPUTIME_SOFTIRQ ;
else
index = CPUTIME_SYSTEM ;
__account_system_time ( p , cputime , cputime_scaled , index ) ;
}
/*
* Account for involuntary wait time .
* @ cputime : the cpu time spent in involuntary wait
*/
void account_steal_time ( cputime_t cputime )
{
u64 * cpustat = kcpustat_this_cpu - > cpustat ;
cpustat [ CPUTIME_STEAL ] + = ( __force u64 ) cputime ;
}
/*
* Account for idle time .
* @ cputime : the cpu time spent in idle wait
*/
void account_idle_time ( cputime_t cputime )
{
u64 * cpustat = kcpustat_this_cpu - > cpustat ;
struct rq * rq = this_rq ( ) ;
if ( atomic_read ( & rq - > nr_iowait ) > 0 )
cpustat [ CPUTIME_IOWAIT ] + = ( __force u64 ) cputime ;
else
cpustat [ CPUTIME_IDLE ] + = ( __force u64 ) cputime ;
}
static __always_inline bool steal_account_process_tick ( void )
{
# ifdef CONFIG_PARAVIRT
if ( static_key_false ( & paravirt_steal_enabled ) ) {
u64 steal , st = 0 ;
steal = paravirt_steal_clock ( smp_processor_id ( ) ) ;
steal - = this_rq ( ) - > prev_steal_time ;
st = steal_ticks ( steal ) ;
this_rq ( ) - > prev_steal_time + = st * TICK_NSEC ;
account_steal_time ( st ) ;
return st ;
}
# endif
return false ;
}
# ifndef CONFIG_VIRT_CPU_ACCOUNTING
# ifdef CONFIG_IRQ_TIME_ACCOUNTING
/*
* Account a tick to a process and cpustat
* @ p : the process that the cpu time gets accounted to
* @ user_tick : is the tick from userspace
* @ rq : the pointer to rq
*
* Tick demultiplexing follows the order
* - pending hardirq update
* - pending softirq update
* - user_time
* - idle_time
* - system time
* - check for guest_time
* - else account as system_time
*
* Check for hardirq is done both for system and user time as there is
* no timer going off while we are on hardirq and hence we may never get an
* opportunity to update it solely in system time .
* p - > stime and friends are only updated on system time and not on irq
* softirq as those do not count in task exec_runtime any more .
*/
static void irqtime_account_process_tick ( struct task_struct * p , int user_tick ,
struct rq * rq )
{
cputime_t one_jiffy_scaled = cputime_to_scaled ( cputime_one_jiffy ) ;
u64 * cpustat = kcpustat_this_cpu - > cpustat ;
if ( steal_account_process_tick ( ) )
return ;
if ( irqtime_account_hi_update ( ) ) {
cpustat [ CPUTIME_IRQ ] + = ( __force u64 ) cputime_one_jiffy ;
} else if ( irqtime_account_si_update ( ) ) {
cpustat [ CPUTIME_SOFTIRQ ] + = ( __force u64 ) cputime_one_jiffy ;
} else if ( this_cpu_ksoftirqd ( ) = = p ) {
/*
* ksoftirqd time do not get accounted in cpu_softirq_time .
* So , we have to handle it separately here .
* Also , p - > stime needs to be updated for ksoftirqd .
*/
__account_system_time ( p , cputime_one_jiffy , one_jiffy_scaled ,
CPUTIME_SOFTIRQ ) ;
} else if ( user_tick ) {
account_user_time ( p , cputime_one_jiffy , one_jiffy_scaled ) ;
} else if ( p = = rq - > idle ) {
account_idle_time ( cputime_one_jiffy ) ;
} else if ( p - > flags & PF_VCPU ) { /* System time or guest time */
account_guest_time ( p , cputime_one_jiffy , one_jiffy_scaled ) ;
} else {
__account_system_time ( p , cputime_one_jiffy , one_jiffy_scaled ,
CPUTIME_SYSTEM ) ;
}
}
static void irqtime_account_idle_ticks ( int ticks )
{
int i ;
struct rq * rq = this_rq ( ) ;
for ( i = 0 ; i < ticks ; i + + )
irqtime_account_process_tick ( current , 0 , rq ) ;
}
# else /* CONFIG_IRQ_TIME_ACCOUNTING */
static void irqtime_account_idle_ticks ( int ticks ) { }
static void irqtime_account_process_tick ( struct task_struct * p , int user_tick ,
struct rq * rq ) { }
# endif /* CONFIG_IRQ_TIME_ACCOUNTING */
/*
* Account a single tick of cpu time .
* @ p : the process that the cpu time gets accounted to
* @ user_tick : indicates if the tick is a user or a system tick
*/
void account_process_tick ( struct task_struct * p , int user_tick )
{
cputime_t one_jiffy_scaled = cputime_to_scaled ( cputime_one_jiffy ) ;
struct rq * rq = this_rq ( ) ;
if ( sched_clock_irqtime ) {
irqtime_account_process_tick ( p , user_tick , rq ) ;
return ;
}
if ( steal_account_process_tick ( ) )
return ;
if ( user_tick )
account_user_time ( p , cputime_one_jiffy , one_jiffy_scaled ) ;
else if ( ( p ! = rq - > idle ) | | ( irq_count ( ) ! = HARDIRQ_OFFSET ) )
account_system_time ( p , HARDIRQ_OFFSET , cputime_one_jiffy ,
one_jiffy_scaled ) ;
else
account_idle_time ( cputime_one_jiffy ) ;
}
/*
* Account multiple ticks of steal time .
* @ p : the process from which the cpu time has been stolen
* @ ticks : number of stolen ticks
*/
void account_steal_ticks ( unsigned long ticks )
{
account_steal_time ( jiffies_to_cputime ( ticks ) ) ;
}
/*
* Account multiple ticks of idle time .
* @ ticks : number of stolen ticks
*/
void account_idle_ticks ( unsigned long ticks )
{
if ( sched_clock_irqtime ) {
irqtime_account_idle_ticks ( ticks ) ;
return ;
}
account_idle_time ( jiffies_to_cputime ( ticks ) ) ;
}
# endif
/*
* Use precise platform statistics if available :
*/
# ifdef CONFIG_VIRT_CPU_ACCOUNTING
void task_times ( struct task_struct * p , cputime_t * ut , cputime_t * st )
{
* ut = p - > utime ;
* st = p - > stime ;
}
void thread_group_times ( struct task_struct * p , cputime_t * ut , cputime_t * st )
{
struct task_cputime cputime ;
thread_group_cputime ( p , & cputime ) ;
* ut = cputime . utime ;
* st = cputime . stime ;
}
2012-09-08 18:14:02 +04:00
2012-10-24 20:05:51 +04:00
void vtime_account_system ( struct task_struct * tsk )
{
unsigned long flags ;
local_irq_save ( flags ) ;
__vtime_account_system ( tsk ) ;
local_irq_restore ( flags ) ;
}
EXPORT_SYMBOL_GPL ( vtime_account_system ) ;
2012-09-08 18:14:02 +04:00
/*
* Archs that account the whole time spent in the idle task
* ( outside irq ) as idle time can rely on this and just implement
2012-10-24 20:05:51 +04:00
* __vtime_account_system ( ) and __vtime_account_idle ( ) . Archs that
2012-09-08 18:14:02 +04:00
* have other meaning of the idle time ( s390 only includes the
* time spent by the CPU when it ' s in low power mode ) must override
* vtime_account ( ) .
*/
# ifndef __ARCH_HAS_VTIME_ACCOUNT
void vtime_account ( struct task_struct * tsk )
{
unsigned long flags ;
local_irq_save ( flags ) ;
if ( in_interrupt ( ) | | ! is_idle_task ( tsk ) )
2012-10-24 20:05:51 +04:00
__vtime_account_system ( tsk ) ;
2012-09-08 18:14:02 +04:00
else
2012-10-24 20:05:51 +04:00
__vtime_account_idle ( tsk ) ;
2012-09-08 18:14:02 +04:00
local_irq_restore ( flags ) ;
}
EXPORT_SYMBOL_GPL ( vtime_account ) ;
# endif /* __ARCH_HAS_VTIME_ACCOUNT */
2012-06-16 17:57:37 +04:00
# else
# ifndef nsecs_to_cputime
# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
# endif
static cputime_t scale_utime ( cputime_t utime , cputime_t rtime , cputime_t total )
{
u64 temp = ( __force u64 ) rtime ;
temp * = ( __force u64 ) utime ;
if ( sizeof ( cputime_t ) = = 4 )
temp = div_u64 ( temp , ( __force u32 ) total ) ;
else
temp = div64_u64 ( temp , ( __force u64 ) total ) ;
return ( __force cputime_t ) temp ;
}
void task_times ( struct task_struct * p , cputime_t * ut , cputime_t * st )
{
cputime_t rtime , utime = p - > utime , total = utime + p - > stime ;
/*
* Use CFS ' s precise accounting :
*/
rtime = nsecs_to_cputime ( p - > se . sum_exec_runtime ) ;
if ( total )
utime = scale_utime ( utime , rtime , total ) ;
else
utime = rtime ;
/*
* Compare with previous values , to keep monotonicity :
*/
p - > prev_utime = max ( p - > prev_utime , utime ) ;
p - > prev_stime = max ( p - > prev_stime , rtime - p - > prev_utime ) ;
* ut = p - > prev_utime ;
* st = p - > prev_stime ;
}
/*
* Must be called with siglock held .
*/
void thread_group_times ( struct task_struct * p , cputime_t * ut , cputime_t * st )
{
struct signal_struct * sig = p - > signal ;
struct task_cputime cputime ;
cputime_t rtime , utime , total ;
thread_group_cputime ( p , & cputime ) ;
total = cputime . utime + cputime . stime ;
rtime = nsecs_to_cputime ( cputime . sum_exec_runtime ) ;
if ( total )
utime = scale_utime ( cputime . utime , rtime , total ) ;
else
utime = rtime ;
sig - > prev_utime = max ( sig - > prev_utime , utime ) ;
sig - > prev_stime = max ( sig - > prev_stime , rtime - sig - > prev_utime ) ;
* ut = sig - > prev_utime ;
* st = sig - > prev_stime ;
}
# endif