2009-12-03 12:59:42 -05:00
/*
* Common Block IO controller cgroup interface
*
* Based on ideas and code from CFQ , CFS and BFQ :
* Copyright ( C ) 2003 Jens Axboe < axboe @ kernel . dk >
*
* Copyright ( C ) 2008 Fabio Checconi < fabio @ gandalf . sssup . it >
* Paolo Valente < paolo . valente @ unimore . it >
*
* Copyright ( C ) 2009 Vivek Goyal < vgoyal @ redhat . com >
* Nauman Rafique < nauman @ google . com >
*/
# include <linux/ioprio.h>
2009-12-03 12:59:49 -05:00
# include <linux/seq_file.h>
# include <linux/kdev_t.h>
2009-12-04 10:36:41 -05:00
# include <linux/module.h>
2009-12-07 19:29:39 +11:00
# include <linux/err.h>
2009-12-03 12:59:42 -05:00
# include "blk-cgroup.h"
2009-12-04 10:36:42 -05:00
static DEFINE_SPINLOCK ( blkio_list_lock ) ;
static LIST_HEAD ( blkio_list ) ;
2009-12-03 12:59:47 -05:00
2009-12-03 12:59:42 -05:00
struct blkio_cgroup blkio_root_cgroup = { . weight = 2 * BLKIO_WEIGHT_DEFAULT } ;
2009-12-04 10:36:41 -05:00
EXPORT_SYMBOL_GPL ( blkio_root_cgroup ) ;
2009-12-03 12:59:42 -05:00
struct blkio_cgroup * cgroup_to_blkio_cgroup ( struct cgroup * cgroup )
{
return container_of ( cgroup_subsys_state ( cgroup , blkio_subsys_id ) ,
struct blkio_cgroup , css ) ;
}
2009-12-04 10:36:41 -05:00
EXPORT_SYMBOL_GPL ( cgroup_to_blkio_cgroup ) ;
2009-12-03 12:59:42 -05:00
2009-12-03 12:59:49 -05:00
void blkiocg_update_blkio_group_stats ( struct blkio_group * blkg ,
unsigned long time , unsigned long sectors )
{
blkg - > time + = time ;
blkg - > sectors + = sectors ;
}
2009-12-04 10:36:41 -05:00
EXPORT_SYMBOL_GPL ( blkiocg_update_blkio_group_stats ) ;
2009-12-03 12:59:49 -05:00
2009-12-03 12:59:42 -05:00
void blkiocg_add_blkio_group ( struct blkio_cgroup * blkcg ,
2009-12-03 12:59:49 -05:00
struct blkio_group * blkg , void * key , dev_t dev )
2009-12-03 12:59:42 -05:00
{
unsigned long flags ;
spin_lock_irqsave ( & blkcg - > lock , flags ) ;
rcu_assign_pointer ( blkg - > key , key ) ;
2009-12-03 12:59:47 -05:00
blkg - > blkcg_id = css_id ( & blkcg - > css ) ;
2009-12-03 12:59:42 -05:00
hlist_add_head_rcu ( & blkg - > blkcg_node , & blkcg - > blkg_list ) ;
spin_unlock_irqrestore ( & blkcg - > lock , flags ) ;
2009-12-03 12:59:48 -05:00
# ifdef CONFIG_DEBUG_BLK_CGROUP
/* Need to take css reference ? */
cgroup_path ( blkcg - > css . cgroup , blkg - > path , sizeof ( blkg - > path ) ) ;
# endif
2009-12-03 12:59:49 -05:00
blkg - > dev = dev ;
2009-12-03 12:59:42 -05:00
}
2009-12-04 10:36:41 -05:00
EXPORT_SYMBOL_GPL ( blkiocg_add_blkio_group ) ;
2009-12-03 12:59:42 -05:00
2009-12-03 12:59:47 -05:00
static void __blkiocg_del_blkio_group ( struct blkio_group * blkg )
{
hlist_del_init_rcu ( & blkg - > blkcg_node ) ;
blkg - > blkcg_id = 0 ;
}
/*
* returns 0 if blkio_group was still on cgroup list . Otherwise returns 1
* indicating that blk_group was unhashed by the time we got to it .
*/
2009-12-03 12:59:42 -05:00
int blkiocg_del_blkio_group ( struct blkio_group * blkg )
{
2009-12-03 12:59:47 -05:00
struct blkio_cgroup * blkcg ;
unsigned long flags ;
struct cgroup_subsys_state * css ;
int ret = 1 ;
rcu_read_lock ( ) ;
css = css_lookup ( & blkio_subsys , blkg - > blkcg_id ) ;
if ( ! css )
goto out ;
blkcg = container_of ( css , struct blkio_cgroup , css ) ;
spin_lock_irqsave ( & blkcg - > lock , flags ) ;
if ( ! hlist_unhashed ( & blkg - > blkcg_node ) ) {
__blkiocg_del_blkio_group ( blkg ) ;
ret = 0 ;
}
spin_unlock_irqrestore ( & blkcg - > lock , flags ) ;
out :
rcu_read_unlock ( ) ;
return ret ;
2009-12-03 12:59:42 -05:00
}
2009-12-04 10:36:41 -05:00
EXPORT_SYMBOL_GPL ( blkiocg_del_blkio_group ) ;
2009-12-03 12:59:42 -05:00
/* called under rcu_read_lock(). */
struct blkio_group * blkiocg_lookup_group ( struct blkio_cgroup * blkcg , void * key )
{
struct blkio_group * blkg ;
struct hlist_node * n ;
void * __key ;
hlist_for_each_entry_rcu ( blkg , n , & blkcg - > blkg_list , blkcg_node ) {
__key = blkg - > key ;
if ( __key = = key )
return blkg ;
}
return NULL ;
}
2009-12-04 10:36:41 -05:00
EXPORT_SYMBOL_GPL ( blkiocg_lookup_group ) ;
2009-12-03 12:59:42 -05:00
# define SHOW_FUNCTION(__VAR) \
static u64 blkiocg_ # # __VAR # # _read ( struct cgroup * cgroup , \
struct cftype * cftype ) \
{ \
struct blkio_cgroup * blkcg ; \
\
blkcg = cgroup_to_blkio_cgroup ( cgroup ) ; \
return ( u64 ) blkcg - > __VAR ; \
}
SHOW_FUNCTION ( weight ) ;
# undef SHOW_FUNCTION
static int
blkiocg_weight_write ( struct cgroup * cgroup , struct cftype * cftype , u64 val )
{
struct blkio_cgroup * blkcg ;
2009-12-03 12:59:52 -05:00
struct blkio_group * blkg ;
struct hlist_node * n ;
2009-12-04 10:36:42 -05:00
struct blkio_policy_type * blkiop ;
2009-12-03 12:59:42 -05:00
if ( val < BLKIO_WEIGHT_MIN | | val > BLKIO_WEIGHT_MAX )
return - EINVAL ;
blkcg = cgroup_to_blkio_cgroup ( cgroup ) ;
2010-02-01 09:58:54 +01:00
spin_lock ( & blkio_list_lock ) ;
2009-12-03 12:59:52 -05:00
spin_lock_irq ( & blkcg - > lock ) ;
2009-12-03 12:59:42 -05:00
blkcg - > weight = ( unsigned int ) val ;
2009-12-04 10:36:42 -05:00
hlist_for_each_entry ( blkg , n , & blkcg - > blkg_list , blkcg_node ) {
list_for_each_entry ( blkiop , & blkio_list , list )
blkiop - > ops . blkio_update_group_weight_fn ( blkg ,
blkcg - > weight ) ;
}
2009-12-03 12:59:52 -05:00
spin_unlock_irq ( & blkcg - > lock ) ;
2010-02-01 09:58:54 +01:00
spin_unlock ( & blkio_list_lock ) ;
2009-12-03 12:59:42 -05:00
return 0 ;
}
2009-12-03 12:59:49 -05:00
# define SHOW_FUNCTION_PER_GROUP(__VAR) \
static int blkiocg_ # # __VAR # # _read ( struct cgroup * cgroup , \
struct cftype * cftype , struct seq_file * m ) \
{ \
struct blkio_cgroup * blkcg ; \
struct blkio_group * blkg ; \
struct hlist_node * n ; \
\
if ( ! cgroup_lock_live_group ( cgroup ) ) \
return - ENODEV ; \
\
blkcg = cgroup_to_blkio_cgroup ( cgroup ) ; \
rcu_read_lock ( ) ; \
hlist_for_each_entry_rcu ( blkg , n , & blkcg - > blkg_list , blkcg_node ) { \
if ( blkg - > dev ) \
seq_printf ( m , " %u:%u %lu \n " , MAJOR ( blkg - > dev ) , \
MINOR ( blkg - > dev ) , blkg - > __VAR ) ; \
} \
rcu_read_unlock ( ) ; \
cgroup_unlock ( ) ; \
return 0 ; \
}
SHOW_FUNCTION_PER_GROUP ( time ) ;
SHOW_FUNCTION_PER_GROUP ( sectors ) ;
# ifdef CONFIG_DEBUG_BLK_CGROUP
SHOW_FUNCTION_PER_GROUP ( dequeue ) ;
# endif
# undef SHOW_FUNCTION_PER_GROUP
# ifdef CONFIG_DEBUG_BLK_CGROUP
void blkiocg_update_blkio_group_dequeue_stats ( struct blkio_group * blkg ,
unsigned long dequeue )
{
blkg - > dequeue + = dequeue ;
}
2009-12-04 10:36:41 -05:00
EXPORT_SYMBOL_GPL ( blkiocg_update_blkio_group_dequeue_stats ) ;
2009-12-03 12:59:49 -05:00
# endif
2009-12-03 12:59:42 -05:00
struct cftype blkio_files [ ] = {
{
. name = " weight " ,
. read_u64 = blkiocg_weight_read ,
. write_u64 = blkiocg_weight_write ,
} ,
2009-12-03 12:59:49 -05:00
{
. name = " time " ,
. read_seq_string = blkiocg_time_read ,
} ,
{
. name = " sectors " ,
. read_seq_string = blkiocg_sectors_read ,
} ,
# ifdef CONFIG_DEBUG_BLK_CGROUP
{
. name = " dequeue " ,
. read_seq_string = blkiocg_dequeue_read ,
} ,
# endif
2009-12-03 12:59:42 -05:00
} ;
static int blkiocg_populate ( struct cgroup_subsys * subsys , struct cgroup * cgroup )
{
return cgroup_add_files ( cgroup , subsys , blkio_files ,
ARRAY_SIZE ( blkio_files ) ) ;
}
static void blkiocg_destroy ( struct cgroup_subsys * subsys , struct cgroup * cgroup )
{
struct blkio_cgroup * blkcg = cgroup_to_blkio_cgroup ( cgroup ) ;
2009-12-03 12:59:47 -05:00
unsigned long flags ;
struct blkio_group * blkg ;
void * key ;
2009-12-04 10:36:42 -05:00
struct blkio_policy_type * blkiop ;
2009-12-03 12:59:47 -05:00
rcu_read_lock ( ) ;
remove_entry :
spin_lock_irqsave ( & blkcg - > lock , flags ) ;
if ( hlist_empty ( & blkcg - > blkg_list ) ) {
spin_unlock_irqrestore ( & blkcg - > lock , flags ) ;
goto done ;
}
blkg = hlist_entry ( blkcg - > blkg_list . first , struct blkio_group ,
blkcg_node ) ;
key = rcu_dereference ( blkg - > key ) ;
__blkiocg_del_blkio_group ( blkg ) ;
2009-12-03 12:59:42 -05:00
2009-12-03 12:59:47 -05:00
spin_unlock_irqrestore ( & blkcg - > lock , flags ) ;
/*
* This blkio_group is being unlinked as associated cgroup is going
* away . Let all the IO controlling policies know about this event .
*
* Currently this is static call to one io controlling policy . Once
* we have more policies in place , we need some dynamic registration
* of callback function .
*/
2009-12-04 10:36:42 -05:00
spin_lock ( & blkio_list_lock ) ;
list_for_each_entry ( blkiop , & blkio_list , list )
blkiop - > ops . blkio_unlink_group_fn ( key , blkg ) ;
spin_unlock ( & blkio_list_lock ) ;
2009-12-03 12:59:47 -05:00
goto remove_entry ;
done :
2009-12-03 12:59:42 -05:00
free_css_id ( & blkio_subsys , & blkcg - > css ) ;
2009-12-03 12:59:47 -05:00
rcu_read_unlock ( ) ;
2009-12-03 12:59:42 -05:00
kfree ( blkcg ) ;
}
static struct cgroup_subsys_state *
blkiocg_create ( struct cgroup_subsys * subsys , struct cgroup * cgroup )
{
struct blkio_cgroup * blkcg , * parent_blkcg ;
if ( ! cgroup - > parent ) {
blkcg = & blkio_root_cgroup ;
goto done ;
}
/* Currently we do not support hierarchy deeper than two level (0,1) */
parent_blkcg = cgroup_to_blkio_cgroup ( cgroup - > parent ) ;
if ( css_depth ( & parent_blkcg - > css ) > 0 )
return ERR_PTR ( - EINVAL ) ;
blkcg = kzalloc ( sizeof ( * blkcg ) , GFP_KERNEL ) ;
if ( ! blkcg )
return ERR_PTR ( - ENOMEM ) ;
blkcg - > weight = BLKIO_WEIGHT_DEFAULT ;
done :
spin_lock_init ( & blkcg - > lock ) ;
INIT_HLIST_HEAD ( & blkcg - > blkg_list ) ;
return & blkcg - > css ;
}
/*
* We cannot support shared io contexts , as we have no mean to support
* two tasks with the same ioc in two different groups without major rework
* of the main cic data structures . For now we allow a task to change
* its cgroup only if it ' s the only owner of its ioc .
*/
static int blkiocg_can_attach ( struct cgroup_subsys * subsys ,
struct cgroup * cgroup , struct task_struct * tsk ,
bool threadgroup )
{
struct io_context * ioc ;
int ret = 0 ;
/* task_lock() is needed to avoid races with exit_io_context() */
task_lock ( tsk ) ;
ioc = tsk - > io_context ;
if ( ioc & & atomic_read ( & ioc - > nr_tasks ) > 1 )
ret = - EINVAL ;
task_unlock ( tsk ) ;
return ret ;
}
static void blkiocg_attach ( struct cgroup_subsys * subsys , struct cgroup * cgroup ,
struct cgroup * prev , struct task_struct * tsk ,
bool threadgroup )
{
struct io_context * ioc ;
task_lock ( tsk ) ;
ioc = tsk - > io_context ;
if ( ioc )
ioc - > cgroup_changed = 1 ;
task_unlock ( tsk ) ;
}
struct cgroup_subsys blkio_subsys = {
. name = " blkio " ,
. create = blkiocg_create ,
. can_attach = blkiocg_can_attach ,
. attach = blkiocg_attach ,
. destroy = blkiocg_destroy ,
. populate = blkiocg_populate ,
. subsys_id = blkio_subsys_id ,
. use_id = 1 ,
} ;
2009-12-04 10:36:42 -05:00
void blkio_policy_register ( struct blkio_policy_type * blkiop )
{
spin_lock ( & blkio_list_lock ) ;
list_add_tail ( & blkiop - > list , & blkio_list ) ;
spin_unlock ( & blkio_list_lock ) ;
}
EXPORT_SYMBOL_GPL ( blkio_policy_register ) ;
void blkio_policy_unregister ( struct blkio_policy_type * blkiop )
{
spin_lock ( & blkio_list_lock ) ;
list_del_init ( & blkiop - > list ) ;
spin_unlock ( & blkio_list_lock ) ;
}
EXPORT_SYMBOL_GPL ( blkio_policy_unregister ) ;