2013-03-29 14:36:31 +08:00
# include <linux/cgroup.h>
# include <linux/slab.h>
# include <linux/percpu.h>
# include <linux/spinlock.h>
# include <linux/cpumask.h>
# include <linux/seq_file.h>
# include <linux/rcupdate.h>
# include <linux/kernel_stat.h>
2013-04-10 15:10:50 +02:00
# include <linux/err.h>
2013-03-29 14:36:31 +08:00
# include "sched.h"
/*
* CPU accounting code for task groups .
*
* Based on the work by Paul Menage ( menage @ google . com ) and Balbir Singh
* ( balbir @ in . ibm . com ) .
*/
2013-03-29 14:38:13 +08:00
/* Time spent by the tasks of the cpu accounting group executing in ... */
enum cpuacct_stat_index {
CPUACCT_STAT_USER , /* ... user mode */
CPUACCT_STAT_SYSTEM , /* ... kernel mode */
CPUACCT_STAT_NSTATS ,
} ;
/* track cpu usage of a group of tasks and its child groups */
struct cpuacct {
struct cgroup_subsys_state css ;
/* cpuusage holds pointer to a u64-type object on every cpu */
u64 __percpu * cpuusage ;
struct kernel_cpustat __percpu * cpustat ;
} ;
2013-08-08 20:11:23 -04:00
static inline struct cpuacct * css_ca ( struct cgroup_subsys_state * css )
{
return css ? container_of ( css , struct cpuacct , css ) : NULL ;
}
2013-03-29 14:38:13 +08:00
/* return cpu accounting group to which this task belongs */
static inline struct cpuacct * task_ca ( struct task_struct * tsk )
{
2013-08-08 20:11:23 -04:00
return css_ca ( task_css ( tsk , cpuacct_subsys_id ) ) ;
2013-03-29 14:38:13 +08:00
}
static inline struct cpuacct * parent_ca ( struct cpuacct * ca )
{
cgroup: add css_parent()
Currently, controllers have to explicitly follow the cgroup hierarchy
to find the parent of a given css. cgroup is moving towards using
cgroup_subsys_state as the main controller interface construct, so
let's provide a way to climb the hierarchy using just csses.
This patch implements css_parent() which, given a css, returns its
parent. The function is guarnateed to valid non-NULL parent css as
long as the target css is not at the top of the hierarchy.
freezer, cpuset, cpu, cpuacct, hugetlb, memory, net_cls and devices
are converted to use css_parent() instead of accessing cgroup->parent
directly.
* __parent_ca() is dropped from cpuacct and its usage is replaced with
parent_ca(). The only difference between the two was NULL test on
cgroup->parent which is now embedded in css_parent() making the
distinction moot. Note that eventually a css->parent field will be
added to css and the NULL check in css_parent() will go away.
This patch shouldn't cause any behavior differences.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
2013-08-08 20:11:23 -04:00
return css_ca ( css_parent ( & ca - > css ) ) ;
2013-03-29 14:38:13 +08:00
}
2013-03-29 14:43:46 +08:00
static DEFINE_PER_CPU ( u64 , root_cpuacct_cpuusage ) ;
2013-03-29 14:44:04 +08:00
static struct cpuacct root_cpuacct = {
. cpustat = & kernel_cpustat ,
. cpuusage = & root_cpuacct_cpuusage ,
} ;
2013-03-29 14:36:31 +08:00
/* create a new cpu accounting group */
2013-08-08 20:11:23 -04:00
static struct cgroup_subsys_state *
cpuacct_css_alloc ( struct cgroup_subsys_state * parent_css )
2013-03-29 14:36:31 +08:00
{
struct cpuacct * ca ;
2013-08-08 20:11:23 -04:00
if ( ! parent_css )
2013-03-29 14:36:31 +08:00
return & root_cpuacct . css ;
ca = kzalloc ( sizeof ( * ca ) , GFP_KERNEL ) ;
if ( ! ca )
goto out ;
ca - > cpuusage = alloc_percpu ( u64 ) ;
if ( ! ca - > cpuusage )
goto out_free_ca ;
ca - > cpustat = alloc_percpu ( struct kernel_cpustat ) ;
if ( ! ca - > cpustat )
goto out_free_cpuusage ;
return & ca - > css ;
out_free_cpuusage :
free_percpu ( ca - > cpuusage ) ;
out_free_ca :
kfree ( ca ) ;
out :
return ERR_PTR ( - ENOMEM ) ;
}
/* destroy an existing cpu accounting group */
2013-08-08 20:11:23 -04:00
static void cpuacct_css_free ( struct cgroup_subsys_state * css )
2013-03-29 14:36:31 +08:00
{
2013-08-08 20:11:23 -04:00
struct cpuacct * ca = css_ca ( css ) ;
2013-03-29 14:36:31 +08:00
free_percpu ( ca - > cpustat ) ;
free_percpu ( ca - > cpuusage ) ;
kfree ( ca ) ;
}
static u64 cpuacct_cpuusage_read ( struct cpuacct * ca , int cpu )
{
u64 * cpuusage = per_cpu_ptr ( ca - > cpuusage , cpu ) ;
u64 data ;
# ifndef CONFIG_64BIT
/*
* Take rq - > lock to make 64 - bit read safe on 32 - bit platforms .
*/
raw_spin_lock_irq ( & cpu_rq ( cpu ) - > lock ) ;
data = * cpuusage ;
raw_spin_unlock_irq ( & cpu_rq ( cpu ) - > lock ) ;
# else
data = * cpuusage ;
# endif
return data ;
}
static void cpuacct_cpuusage_write ( struct cpuacct * ca , int cpu , u64 val )
{
u64 * cpuusage = per_cpu_ptr ( ca - > cpuusage , cpu ) ;
# ifndef CONFIG_64BIT
/*
* Take rq - > lock to make 64 - bit write safe on 32 - bit platforms .
*/
raw_spin_lock_irq ( & cpu_rq ( cpu ) - > lock ) ;
* cpuusage = val ;
raw_spin_unlock_irq ( & cpu_rq ( cpu ) - > lock ) ;
# else
* cpuusage = val ;
# endif
}
/* return total cpu usage (in nanoseconds) of a group */
2013-08-08 20:11:24 -04:00
static u64 cpuusage_read ( struct cgroup_subsys_state * css , struct cftype * cft )
2013-03-29 14:36:31 +08:00
{
2013-08-08 20:11:24 -04:00
struct cpuacct * ca = css_ca ( css ) ;
2013-03-29 14:36:31 +08:00
u64 totalcpuusage = 0 ;
int i ;
for_each_present_cpu ( i )
totalcpuusage + = cpuacct_cpuusage_read ( ca , i ) ;
return totalcpuusage ;
}
2013-08-08 20:11:24 -04:00
static int cpuusage_write ( struct cgroup_subsys_state * css , struct cftype * cft ,
u64 reset )
2013-03-29 14:36:31 +08:00
{
2013-08-08 20:11:24 -04:00
struct cpuacct * ca = css_ca ( css ) ;
2013-03-29 14:36:31 +08:00
int err = 0 ;
int i ;
if ( reset ) {
err = - EINVAL ;
goto out ;
}
for_each_present_cpu ( i )
cpuacct_cpuusage_write ( ca , i , 0 ) ;
out :
return err ;
}
2013-08-08 20:11:24 -04:00
static int cpuacct_percpu_seq_read ( struct cgroup_subsys_state * css ,
struct cftype * cft , struct seq_file * m )
2013-03-29 14:36:31 +08:00
{
2013-08-08 20:11:24 -04:00
struct cpuacct * ca = css_ca ( css ) ;
2013-03-29 14:36:31 +08:00
u64 percpu ;
int i ;
for_each_present_cpu ( i ) {
percpu = cpuacct_cpuusage_read ( ca , i ) ;
seq_printf ( m , " %llu " , ( unsigned long long ) percpu ) ;
}
seq_printf ( m , " \n " ) ;
return 0 ;
}
static const char * const cpuacct_stat_desc [ ] = {
[ CPUACCT_STAT_USER ] = " user " ,
[ CPUACCT_STAT_SYSTEM ] = " system " ,
} ;
2013-08-08 20:11:24 -04:00
static int cpuacct_stats_show ( struct cgroup_subsys_state * css ,
struct cftype * cft , struct cgroup_map_cb * cb )
2013-03-29 14:36:31 +08:00
{
2013-08-08 20:11:24 -04:00
struct cpuacct * ca = css_ca ( css ) ;
2013-03-29 14:36:31 +08:00
int cpu ;
s64 val = 0 ;
for_each_online_cpu ( cpu ) {
struct kernel_cpustat * kcpustat = per_cpu_ptr ( ca - > cpustat , cpu ) ;
val + = kcpustat - > cpustat [ CPUTIME_USER ] ;
val + = kcpustat - > cpustat [ CPUTIME_NICE ] ;
}
val = cputime64_to_clock_t ( val ) ;
cb - > fill ( cb , cpuacct_stat_desc [ CPUACCT_STAT_USER ] , val ) ;
val = 0 ;
for_each_online_cpu ( cpu ) {
struct kernel_cpustat * kcpustat = per_cpu_ptr ( ca - > cpustat , cpu ) ;
val + = kcpustat - > cpustat [ CPUTIME_SYSTEM ] ;
val + = kcpustat - > cpustat [ CPUTIME_IRQ ] ;
val + = kcpustat - > cpustat [ CPUTIME_SOFTIRQ ] ;
}
val = cputime64_to_clock_t ( val ) ;
cb - > fill ( cb , cpuacct_stat_desc [ CPUACCT_STAT_SYSTEM ] , val ) ;
return 0 ;
}
static struct cftype files [ ] = {
{
. name = " usage " ,
. read_u64 = cpuusage_read ,
. write_u64 = cpuusage_write ,
} ,
{
. name = " usage_percpu " ,
. read_seq_string = cpuacct_percpu_seq_read ,
} ,
{
. name = " stat " ,
. read_map = cpuacct_stats_show ,
} ,
{ } /* terminate */
} ;
/*
* charge this task ' s execution time to its accounting group .
*
* called with rq - > lock held .
*/
void cpuacct_charge ( struct task_struct * tsk , u64 cputime )
{
struct cpuacct * ca ;
int cpu ;
cpu = task_cpu ( tsk ) ;
rcu_read_lock ( ) ;
ca = task_ca ( tsk ) ;
2013-03-29 14:37:29 +08:00
while ( true ) {
2013-03-29 14:36:31 +08:00
u64 * cpuusage = per_cpu_ptr ( ca - > cpuusage , cpu ) ;
* cpuusage + = cputime ;
2013-03-29 14:37:29 +08:00
ca = parent_ca ( ca ) ;
if ( ! ca )
break ;
2013-03-29 14:36:31 +08:00
}
rcu_read_unlock ( ) ;
}
2013-03-29 14:37:06 +08:00
/*
* Add user / system time to cpuacct .
*
* Note : it ' s the caller that updates the account of the root cgroup .
*/
void cpuacct_account_field ( struct task_struct * p , int index , u64 val )
{
struct kernel_cpustat * kcpustat ;
struct cpuacct * ca ;
rcu_read_lock ( ) ;
ca = task_ca ( p ) ;
2013-03-29 14:37:43 +08:00
while ( ca ! = & root_cpuacct ) {
2013-03-29 14:37:06 +08:00
kcpustat = this_cpu_ptr ( ca - > cpustat ) ;
kcpustat - > cpustat [ index ] + = val ;
cgroup: add css_parent()
Currently, controllers have to explicitly follow the cgroup hierarchy
to find the parent of a given css. cgroup is moving towards using
cgroup_subsys_state as the main controller interface construct, so
let's provide a way to climb the hierarchy using just csses.
This patch implements css_parent() which, given a css, returns its
parent. The function is guarnateed to valid non-NULL parent css as
long as the target css is not at the top of the hierarchy.
freezer, cpuset, cpu, cpuacct, hugetlb, memory, net_cls and devices
are converted to use css_parent() instead of accessing cgroup->parent
directly.
* __parent_ca() is dropped from cpuacct and its usage is replaced with
parent_ca(). The only difference between the two was NULL test on
cgroup->parent which is now embedded in css_parent() making the
distinction moot. Note that eventually a css->parent field will be
added to css and the NULL check in css_parent() will go away.
This patch shouldn't cause any behavior differences.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
2013-08-08 20:11:23 -04:00
ca = parent_ca ( ca ) ;
2013-03-29 14:37:06 +08:00
}
rcu_read_unlock ( ) ;
}
2013-03-29 14:36:31 +08:00
struct cgroup_subsys cpuacct_subsys = {
2013-03-29 14:44:15 +08:00
. name = " cpuacct " ,
. css_alloc = cpuacct_css_alloc ,
. css_free = cpuacct_css_free ,
. subsys_id = cpuacct_subsys_id ,
. base_cftypes = files ,
. early_init = 1 ,
2013-03-29 14:36:31 +08:00
} ;