2015-06-09 21:32:10 +10:00
/*
* Process number limiting controller for cgroups .
*
* Used to allow a cgroup hierarchy to stop any new processes from fork ( ) ing
* after a certain limit is reached .
*
* Since it is trivial to hit the task limit without hitting any kmemcg limits
* in place , PIDs are a fundamental resource . As such , PID exhaustion must be
* preventable in the scope of a cgroup hierarchy by allowing resource limiting
* of the number of tasks in a cgroup .
*
* In order to use the ` pids ` controller , set the maximum number of tasks in
* pids . max ( this is not available in the root cgroup for obvious reasons ) . The
* number of processes currently in the cgroup is given by pids . current .
* Organisational operations are not blocked by cgroup policies , so it is
* possible to have pids . current > pids . max . However , it is not possible to
* violate a cgroup policy through fork ( ) . fork ( ) will return - EAGAIN if forking
* would cause a cgroup policy to be violated .
*
* To set a cgroup to have no limit , set pids . max to " max " . This is the default
* for all new cgroups ( N . B . that PID limits are hierarchical , so the most
* stringent limit in the hierarchy is followed ) .
*
* pids . current tracks all child cgroup hierarchies , so parent / pids . current is
* a superset of parent / child / pids . current .
*
* Copyright ( C ) 2015 Aleksa Sarai < cyphar @ cyphar . com >
*
* This file is subject to the terms and conditions of version 2 of the GNU
* General Public License . See the file COPYING in the main directory of the
* Linux distribution for more details .
*/
# include <linux/kernel.h>
# include <linux/threads.h>
# include <linux/atomic.h>
# include <linux/cgroup.h>
# include <linux/slab.h>
# define PIDS_MAX (PID_MAX_LIMIT + 1ULL)
# define PIDS_MAX_STR "max"
struct pids_cgroup {
struct cgroup_subsys_state css ;
/*
* Use 64 - bit types so that we can safely represent " max " as
* % PIDS_MAX = ( % PID_MAX_LIMIT + 1 ) .
*/
atomic64_t counter ;
int64_t limit ;
} ;
static struct pids_cgroup * css_pids ( struct cgroup_subsys_state * css )
{
return container_of ( css , struct pids_cgroup , css ) ;
}
static struct pids_cgroup * parent_pids ( struct pids_cgroup * pids )
{
return css_pids ( pids - > css . parent ) ;
}
static struct cgroup_subsys_state *
pids_css_alloc ( struct cgroup_subsys_state * parent )
{
struct pids_cgroup * pids ;
pids = kzalloc ( sizeof ( struct pids_cgroup ) , GFP_KERNEL ) ;
if ( ! pids )
return ERR_PTR ( - ENOMEM ) ;
pids - > limit = PIDS_MAX ;
atomic64_set ( & pids - > counter , 0 ) ;
return & pids - > css ;
}
static void pids_css_free ( struct cgroup_subsys_state * css )
{
kfree ( css_pids ( css ) ) ;
}
/**
* pids_cancel - uncharge the local pid count
* @ pids : the pid cgroup state
* @ num : the number of pids to cancel
*
* This function will WARN if the pid count goes under 0 , because such a case is
* a bug in the pids controller proper .
*/
static void pids_cancel ( struct pids_cgroup * pids , int num )
{
/*
* A negative count ( or overflow for that matter ) is invalid ,
* and indicates a bug in the ` pids ` controller proper .
*/
WARN_ON_ONCE ( atomic64_add_negative ( - num , & pids - > counter ) ) ;
}
/**
* pids_uncharge - hierarchically uncharge the pid count
* @ pids : the pid cgroup state
* @ num : the number of pids to uncharge
*/
static void pids_uncharge ( struct pids_cgroup * pids , int num )
{
struct pids_cgroup * p ;
for ( p = pids ; p ; p = parent_pids ( p ) )
pids_cancel ( p , num ) ;
}
/**
* pids_charge - hierarchically charge the pid count
* @ pids : the pid cgroup state
* @ num : the number of pids to charge
*
* This function does * not * follow the pid limit set . It cannot fail and the new
* pid count may exceed the limit . This is only used for reverting failed
* attaches , where there is no other way out than violating the limit .
*/
static void pids_charge ( struct pids_cgroup * pids , int num )
{
struct pids_cgroup * p ;
for ( p = pids ; p ; p = parent_pids ( p ) )
atomic64_add ( num , & p - > counter ) ;
}
/**
* pids_try_charge - hierarchically try to charge the pid count
* @ pids : the pid cgroup state
* @ num : the number of pids to charge
*
* This function follows the set limit . It will fail if the charge would cause
* the new value to exceed the hierarchical limit . Returns 0 if the charge
* succeded , otherwise - EAGAIN .
*/
static int pids_try_charge ( struct pids_cgroup * pids , int num )
{
struct pids_cgroup * p , * q ;
for ( p = pids ; p ; p = parent_pids ( p ) ) {
int64_t new = atomic64_add_return ( num , & p - > counter ) ;
/*
* Since new is capped to the maximum number of pid_t , if
* p - > limit is % PIDS_MAX then we know that this test will never
* fail .
*/
if ( new > p - > limit )
goto revert ;
}
return 0 ;
revert :
for ( q = pids ; q ! = p ; q = parent_pids ( q ) )
pids_cancel ( q , num ) ;
pids_cancel ( p , num ) ;
return - EAGAIN ;
}
static int pids_can_attach ( struct cgroup_subsys_state * css ,
struct cgroup_taskset * tset )
{
struct pids_cgroup * pids = css_pids ( css ) ;
struct task_struct * task ;
cgroup_taskset_for_each ( task , tset ) {
struct cgroup_subsys_state * old_css ;
struct pids_cgroup * old_pids ;
/*
2015-08-25 12:50:44 +10:00
* No need to pin @ old_css between here and cancel_attach ( )
* because cgroup core protects it from being freed before
* the migration completes or fails .
2015-06-09 21:32:10 +10:00
*/
2015-08-25 12:50:44 +10:00
old_css = task_css ( task , pids_cgrp_id ) ;
2015-06-09 21:32:10 +10:00
old_pids = css_pids ( old_css ) ;
pids_charge ( pids , 1 ) ;
pids_uncharge ( old_pids , 1 ) ;
}
return 0 ;
}
static void pids_cancel_attach ( struct cgroup_subsys_state * css ,
struct cgroup_taskset * tset )
{
struct pids_cgroup * pids = css_pids ( css ) ;
struct task_struct * task ;
cgroup_taskset_for_each ( task , tset ) {
struct cgroup_subsys_state * old_css ;
struct pids_cgroup * old_pids ;
old_css = task_css ( task , pids_cgrp_id ) ;
old_pids = css_pids ( old_css ) ;
pids_charge ( old_pids , 1 ) ;
pids_uncharge ( pids , 1 ) ;
}
}
static int pids_can_fork ( struct task_struct * task , void * * priv_p )
{
struct cgroup_subsys_state * css ;
struct pids_cgroup * pids ;
int err ;
/*
* Use the " current " task_css for the pids subsystem as the tentative
* css . It is possible we will charge the wrong hierarchy , in which
* case we will forcefully revert / reapply the charge on the right
* hierarchy after it is committed to the task proper .
*/
css = task_get_css ( current , pids_cgrp_id ) ;
pids = css_pids ( css ) ;
err = pids_try_charge ( pids , 1 ) ;
if ( err )
goto err_css_put ;
* priv_p = css ;
return 0 ;
err_css_put :
css_put ( css ) ;
return err ;
}
static void pids_cancel_fork ( struct task_struct * task , void * priv )
{
struct cgroup_subsys_state * css = priv ;
struct pids_cgroup * pids = css_pids ( css ) ;
pids_uncharge ( pids , 1 ) ;
css_put ( css ) ;
}
static void pids_fork ( struct task_struct * task , void * priv )
{
struct cgroup_subsys_state * css ;
struct cgroup_subsys_state * old_css = priv ;
struct pids_cgroup * pids ;
struct pids_cgroup * old_pids = css_pids ( old_css ) ;
css = task_get_css ( task , pids_cgrp_id ) ;
pids = css_pids ( css ) ;
/*
* If the association has changed , we have to revert and reapply the
* charge / uncharge on the wrong hierarchy to the current one . Since
* the association can only change due to an organisation event , its
* okay for us to ignore the limit in this case .
*/
if ( pids ! = old_pids ) {
pids_uncharge ( old_pids , 1 ) ;
pids_charge ( pids , 1 ) ;
}
css_put ( css ) ;
css_put ( old_css ) ;
}
static void pids_exit ( struct cgroup_subsys_state * css ,
struct cgroup_subsys_state * old_css ,
struct task_struct * task )
{
struct pids_cgroup * pids = css_pids ( old_css ) ;
pids_uncharge ( pids , 1 ) ;
}
static ssize_t pids_max_write ( struct kernfs_open_file * of , char * buf ,
size_t nbytes , loff_t off )
{
struct cgroup_subsys_state * css = of_css ( of ) ;
struct pids_cgroup * pids = css_pids ( css ) ;
int64_t limit ;
int err ;
buf = strstrip ( buf ) ;
if ( ! strcmp ( buf , PIDS_MAX_STR ) ) {
limit = PIDS_MAX ;
goto set_limit ;
}
err = kstrtoll ( buf , 0 , & limit ) ;
if ( err )
return err ;
if ( limit < 0 | | limit > = PIDS_MAX )
return - EINVAL ;
set_limit :
/*
* Limit updates don ' t need to be mutex ' d , since it isn ' t
* critical that any racing fork ( ) s follow the new limit .
*/
pids - > limit = limit ;
return nbytes ;
}
static int pids_max_show ( struct seq_file * sf , void * v )
{
struct cgroup_subsys_state * css = seq_css ( sf ) ;
struct pids_cgroup * pids = css_pids ( css ) ;
int64_t limit = pids - > limit ;
if ( limit > = PIDS_MAX )
seq_printf ( sf , " %s \n " , PIDS_MAX_STR ) ;
else
seq_printf ( sf , " %lld \n " , limit ) ;
return 0 ;
}
static s64 pids_current_read ( struct cgroup_subsys_state * css ,
struct cftype * cft )
{
struct pids_cgroup * pids = css_pids ( css ) ;
return atomic64_read ( & pids - > counter ) ;
}
static struct cftype pids_files [ ] = {
{
. name = " max " ,
. write = pids_max_write ,
. seq_show = pids_max_show ,
. flags = CFTYPE_NOT_ON_ROOT ,
} ,
{
. name = " current " ,
. read_s64 = pids_current_read ,
} ,
{ } /* terminate */
} ;
struct cgroup_subsys pids_cgrp_subsys = {
. css_alloc = pids_css_alloc ,
. css_free = pids_css_free ,
. can_attach = pids_can_attach ,
. cancel_attach = pids_cancel_attach ,
. can_fork = pids_can_fork ,
. cancel_fork = pids_cancel_fork ,
. fork = pids_fork ,
. exit = pids_exit ,
. legacy_cftypes = pids_files ,
. dfl_cftypes = pids_files ,
} ;