Merge branch 'for-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup changes from Tejun Heo:
 "Not too much activity this time around.  css_id is finally killed and
  a minor update to device_cgroup"

* 'for-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  device_cgroup: remove can_attach
  cgroup: kill css_id
  memcg: stop using css id
  memcg: fail to create cgroup if the cgroup id is too big
  memcg: convert to use cgroup id
  memcg: convert to use cgroup_is_descendant()
This commit is contained in:
Linus Torvalds 2013-11-13 15:21:53 +09:00
commit a998646456
4 changed files with 41 additions and 321 deletions

View File

@ -612,11 +612,6 @@ struct cgroup_subsys {
int subsys_id;
int disabled;
int early_init;
/*
* True if this subsys uses ID. ID is not available before cgroup_init()
* (not available in early_init time.)
*/
bool use_id;
/*
* If %false, this subsystem is properly hierarchical -
@ -642,9 +637,6 @@ struct cgroup_subsys {
*/
struct cgroupfs_root *root;
struct list_head sibling;
/* used when use_id == true */
struct idr idr;
spinlock_t id_lock;
/* list of cftype_sets */
struct list_head cftsets;
@ -875,35 +867,6 @@ int css_scan_tasks(struct cgroup_subsys_state *css,
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
/*
* CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
* if cgroup_subsys.use_id == true. It can be used for looking up and scanning.
* CSS ID is assigned at cgroup allocation (create) automatically
* and removed when subsys calls free_css_id() function. This is because
* the lifetime of cgroup_subsys_state is subsys's matter.
*
* Looking up and scanning function should be called under rcu_read_lock().
* Taking cgroup_mutex is not necessary for following calls.
* But the css returned by this routine can be "not populated yet" or "being
* destroyed". The caller should check css and cgroup's status.
*/
/*
* Typically Called at ->destroy(), or somewhere the subsys frees
* cgroup_subsys_state.
*/
void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css);
/* Find a cgroup_subsys_state which has given ID */
struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id);
/* Returns true if root is ancestor of cg */
bool css_is_ancestor(struct cgroup_subsys_state *cg,
const struct cgroup_subsys_state *root);
/* Get id and depth of css */
unsigned short css_id(struct cgroup_subsys_state *css);
struct cgroup_subsys_state *css_from_dir(struct dentry *dentry,
struct cgroup_subsys *ss);

View File

@ -124,38 +124,6 @@ struct cfent {
struct simple_xattrs xattrs;
};
/*
* CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when
* cgroup_subsys->use_id != 0.
*/
#define CSS_ID_MAX (65535)
struct css_id {
/*
* The css to which this ID points. This pointer is set to valid value
* after cgroup is populated. If cgroup is removed, this will be NULL.
* This pointer is expected to be RCU-safe because destroy()
* is called after synchronize_rcu(). But for safe use, css_tryget()
* should be used for avoiding race.
*/
struct cgroup_subsys_state __rcu *css;
/*
* ID of this css.
*/
unsigned short id;
/*
* Depth in hierarchy which this ID belongs to.
*/
unsigned short depth;
/*
* ID is freed by RCU. (and lookup routine is RCU safe.)
*/
struct rcu_head rcu_head;
/*
* Hierarchy of CSS ID belongs to.
*/
unsigned short stack[0]; /* Array of Length (depth+1) */
};
/*
* cgroup_event represents events which userspace want to receive.
*/
@ -387,9 +355,6 @@ struct cgrp_cset_link {
static struct css_set init_css_set;
static struct cgrp_cset_link init_cgrp_cset_link;
static int cgroup_init_idr(struct cgroup_subsys *ss,
struct cgroup_subsys_state *css);
/*
* css_set_lock protects the list of css_set objects, and the chain of
* tasks off each css_set. Nests outside task->alloc_lock due to
@ -841,8 +806,6 @@ static struct backing_dev_info cgroup_backing_dev_info = {
.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
};
static int alloc_css_id(struct cgroup_subsys_state *child_css);
static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
{
struct inode *inode = new_inode(sb);
@ -4240,21 +4203,6 @@ static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask)
goto err;
}
}
/* This cgroup is ready now */
for_each_root_subsys(cgrp->root, ss) {
struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
struct css_id *id = rcu_dereference_protected(css->id, true);
/*
* Update id->css pointer and make this css visible from
* CSS ID functions. This pointer will be dereferened
* from RCU-read-side without locks.
*/
if (id)
rcu_assign_pointer(id->css, css);
}
return 0;
err:
cgroup_clear_dir(cgrp, subsys_mask);
@ -4323,7 +4271,6 @@ static void init_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss,
css->cgroup = cgrp;
css->ss = ss;
css->flags = 0;
css->id = NULL;
if (cgrp->parent)
css->parent = cgroup_css(cgrp->parent, ss);
@ -4455,12 +4402,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
goto err_free_all;
init_css(css, ss, cgrp);
if (ss->use_id) {
err = alloc_css_id(css);
if (err)
goto err_free_all;
}
}
/*
@ -4925,12 +4866,6 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
/* our new subsystem will be attached to the dummy hierarchy. */
init_css(css, ss, cgroup_dummy_top);
/* init_idr must be after init_css() because it sets css->id. */
if (ss->use_id) {
ret = cgroup_init_idr(ss, css);
if (ret)
goto err_unload;
}
/*
* Now we need to entangle the css into the existing css_sets. unlike
@ -4996,9 +4931,6 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
offline_css(cgroup_css(cgroup_dummy_top, ss));
if (ss->use_id)
idr_destroy(&ss->idr);
/* deassign the subsys_id */
cgroup_subsys[ss->subsys_id] = NULL;
@ -5025,8 +4957,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
/*
* remove subsystem's css from the cgroup_dummy_top and free it -
* need to free before marking as null because ss->css_free needs
* the cgrp->subsys pointer to find their state. note that this
* also takes care of freeing the css_id.
* the cgrp->subsys pointer to find their state.
*/
ss->css_free(cgroup_css(cgroup_dummy_top, ss));
RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL);
@ -5097,8 +5028,6 @@ int __init cgroup_init(void)
for_each_builtin_subsys(ss, i) {
if (!ss->early_init)
cgroup_init_subsys(ss);
if (ss->use_id)
cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
}
/* allocate id for the dummy hierarchy */
@ -5518,181 +5447,6 @@ static int __init cgroup_disable(char *str)
}
__setup("cgroup_disable=", cgroup_disable);
/*
* Functons for CSS ID.
*/
/* to get ID other than 0, this should be called when !cgroup_is_dead() */
unsigned short css_id(struct cgroup_subsys_state *css)
{
struct css_id *cssid;
/*
* This css_id() can return correct value when somone has refcnt
* on this or this is under rcu_read_lock(). Once css->id is allocated,
* it's unchanged until freed.
*/
cssid = rcu_dereference_raw(css->id);
if (cssid)
return cssid->id;
return 0;
}
EXPORT_SYMBOL_GPL(css_id);
/**
* css_is_ancestor - test "root" css is an ancestor of "child"
* @child: the css to be tested.
* @root: the css supporsed to be an ancestor of the child.
*
* Returns true if "root" is an ancestor of "child" in its hierarchy. Because
* this function reads css->id, the caller must hold rcu_read_lock().
* But, considering usual usage, the csses should be valid objects after test.
* Assuming that the caller will do some action to the child if this returns
* returns true, the caller must take "child";s reference count.
* If "child" is valid object and this returns true, "root" is valid, too.
*/
bool css_is_ancestor(struct cgroup_subsys_state *child,
const struct cgroup_subsys_state *root)
{
struct css_id *child_id;
struct css_id *root_id;
child_id = rcu_dereference(child->id);
if (!child_id)
return false;
root_id = rcu_dereference(root->id);
if (!root_id)
return false;
if (child_id->depth < root_id->depth)
return false;
if (child_id->stack[root_id->depth] != root_id->id)
return false;
return true;
}
void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
{
struct css_id *id = rcu_dereference_protected(css->id, true);
/* When this is called before css_id initialization, id can be NULL */
if (!id)
return;
BUG_ON(!ss->use_id);
rcu_assign_pointer(id->css, NULL);
rcu_assign_pointer(css->id, NULL);
spin_lock(&ss->id_lock);
idr_remove(&ss->idr, id->id);
spin_unlock(&ss->id_lock);
kfree_rcu(id, rcu_head);
}
EXPORT_SYMBOL_GPL(free_css_id);
/*
* This is called by init or create(). Then, calls to this function are
* always serialized (By cgroup_mutex() at create()).
*/
static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
{
struct css_id *newid;
int ret, size;
BUG_ON(!ss->use_id);
size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
newid = kzalloc(size, GFP_KERNEL);
if (!newid)
return ERR_PTR(-ENOMEM);
idr_preload(GFP_KERNEL);
spin_lock(&ss->id_lock);
/* Don't use 0. allocates an ID of 1-65535 */
ret = idr_alloc(&ss->idr, newid, 1, CSS_ID_MAX + 1, GFP_NOWAIT);
spin_unlock(&ss->id_lock);
idr_preload_end();
/* Returns error when there are no free spaces for new ID.*/
if (ret < 0)
goto err_out;
newid->id = ret;
newid->depth = depth;
return newid;
err_out:
kfree(newid);
return ERR_PTR(ret);
}
static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
struct cgroup_subsys_state *rootcss)
{
struct css_id *newid;
spin_lock_init(&ss->id_lock);
idr_init(&ss->idr);
newid = get_new_cssid(ss, 0);
if (IS_ERR(newid))
return PTR_ERR(newid);
newid->stack[0] = newid->id;
RCU_INIT_POINTER(newid->css, rootcss);
RCU_INIT_POINTER(rootcss->id, newid);
return 0;
}
static int alloc_css_id(struct cgroup_subsys_state *child_css)
{
struct cgroup_subsys_state *parent_css = css_parent(child_css);
struct css_id *child_id, *parent_id;
int i, depth;
parent_id = rcu_dereference_protected(parent_css->id, true);
depth = parent_id->depth + 1;
child_id = get_new_cssid(child_css->ss, depth);
if (IS_ERR(child_id))
return PTR_ERR(child_id);
for (i = 0; i < depth; i++)
child_id->stack[i] = parent_id->stack[i];
child_id->stack[depth] = child_id->id;
/*
* child_id->css pointer will be set after this cgroup is available
* see cgroup_populate_dir()
*/
rcu_assign_pointer(child_css->id, child_id);
return 0;
}
/**
* css_lookup - lookup css by id
* @ss: cgroup subsys to be looked into.
* @id: the id
*
* Returns pointer to cgroup_subsys_state if there is valid one with id.
* NULL if not. Should be called under rcu_read_lock()
*/
struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
{
struct css_id *cssid = NULL;
BUG_ON(!ss->use_id);
cssid = idr_find(&ss->idr, id);
if (unlikely(!cssid))
return NULL;
return rcu_dereference(cssid->css);
}
EXPORT_SYMBOL_GPL(css_lookup);
/**
* css_from_dir - get corresponding css from the dentry of a cgroup dir
* @dentry: directory dentry of interest

View File

@ -499,6 +499,29 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
return (memcg == root_mem_cgroup);
}
/*
* We restrict the id in the range of [1, 65535], so it can fit into
* an unsigned short.
*/
#define MEM_CGROUP_ID_MAX USHRT_MAX
static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
{
/*
* The ID of the root cgroup is 0, but memcg treat 0 as an
* invalid ID, so we return (cgroup_id + 1).
*/
return memcg->css.cgroup->id + 1;
}
static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
{
struct cgroup_subsys_state *css;
css = css_from_id(id - 1, &mem_cgroup_subsys);
return mem_cgroup_from_css(css);
}
/* Writing them here to avoid exposing memcg's inner layout */
#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
@ -570,16 +593,11 @@ static void disarm_sock_keys(struct mem_cgroup *memcg)
#ifdef CONFIG_MEMCG_KMEM
/*
* This will be the memcg's index in each cache's ->memcg_params->memcg_caches.
* There are two main reasons for not using the css_id for this:
* 1) this works better in sparse environments, where we have a lot of memcgs,
* but only a few kmem-limited. Or also, if we have, for instance, 200
* memcgs, and none but the 200th is kmem-limited, we'd have to have a
* 200 entry array for that.
*
* 2) In order not to violate the cgroup API, we would like to do all memory
* allocation in ->create(). At that point, we haven't yet allocated the
* css_id. Having a separate index prevents us from messing with the cgroup
* core for this
* The main reason for not using cgroup id for this:
* this works better in sparse environments, where we have a lot of memcgs,
* but only a few kmem-limited. Or also, if we have, for instance, 200
* memcgs, and none but the 200th is kmem-limited, we'd have to have a
* 200 entry array for that.
*
* The current size of the caches array is stored in
* memcg_limited_groups_array_size. It will double each time we have to
@ -594,14 +612,14 @@ int memcg_limited_groups_array_size;
* cgroups is a reasonable guess. In the future, it could be a parameter or
* tunable, but that is strictly not necessary.
*
* MAX_SIZE should be as large as the number of css_ids. Ideally, we could get
* MAX_SIZE should be as large as the number of cgrp_ids. Ideally, we could get
* this constant directly from cgroup, but it is understandable that this is
* better kept as an internal representation in cgroup.c. In any case, the
* css_id space is not getting any smaller, and we don't have to necessarily
* cgrp_id space is not getting any smaller, and we don't have to necessarily
* increase ours as well if it increases.
*/
#define MEMCG_CACHES_MIN_SIZE 4
#define MEMCG_CACHES_MAX_SIZE 65535
#define MEMCG_CACHES_MAX_SIZE MEM_CGROUP_ID_MAX
/*
* A lot of the calls to the cache allocation functions are expected to be
@ -1408,7 +1426,7 @@ bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
return true;
if (!root_memcg->use_hierarchy || !memcg)
return false;
return css_is_ancestor(&memcg->css, &root_memcg->css);
return cgroup_is_descendant(memcg->css.cgroup, root_memcg->css.cgroup);
}
static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
@ -2826,15 +2844,10 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
*/
static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
{
struct cgroup_subsys_state *css;
/* ID 0 is unused ID */
if (!id)
return NULL;
css = css_lookup(&mem_cgroup_subsys, id);
if (!css)
return NULL;
return mem_cgroup_from_css(css);
return mem_cgroup_from_id(id);
}
struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
@ -4350,7 +4363,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
* css_get() was called in uncharge().
*/
if (do_swap_account && swapout && memcg)
swap_cgroup_record(ent, css_id(&memcg->css));
swap_cgroup_record(ent, mem_cgroup_id(memcg));
}
#endif
@ -4402,8 +4415,8 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
{
unsigned short old_id, new_id;
old_id = css_id(&from->css);
new_id = css_id(&to->css);
old_id = mem_cgroup_id(from);
new_id = mem_cgroup_id(to);
if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
mem_cgroup_swap_statistics(from, false);
@ -6166,7 +6179,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
size_t size = memcg_size();
mem_cgroup_remove_from_trees(memcg);
free_css_id(&mem_cgroup_subsys, &memcg->css);
for_each_node(node)
free_mem_cgroup_per_zone_info(memcg, node);
@ -6269,6 +6281,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
int error = 0;
if (css->cgroup->id > MEM_CGROUP_ID_MAX)
return -ENOSPC;
if (!parent)
return 0;
@ -6540,7 +6555,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
}
/* There is a swap entry and a page doesn't exist or isn't charged */
if (ent.val && !ret &&
css_id(&mc.from->css) == lookup_swap_cgroup_id(ent)) {
mem_cgroup_id(mc.from) == lookup_swap_cgroup_id(ent)) {
ret = MC_TARGET_SWAP;
if (target)
target->ent = ent;
@ -6960,7 +6975,6 @@ struct cgroup_subsys mem_cgroup_subsys = {
.bind = mem_cgroup_bind,
.base_cftypes = mem_cgroup_files,
.early_init = 0,
.use_id = 1,
};
#ifdef CONFIG_MEMCG_SWAP

View File

@ -63,16 +63,6 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
struct cgroup_subsys devices_subsys;
static int devcgroup_can_attach(struct cgroup_subsys_state *new_css,
struct cgroup_taskset *set)
{
struct task_struct *task = cgroup_taskset_first(set);
if (current != task && !capable(CAP_SYS_ADMIN))
return -EPERM;
return 0;
}
/*
* called under devcgroup_mutex
*/
@ -697,7 +687,6 @@ static struct cftype dev_cgroup_files[] = {
struct cgroup_subsys devices_subsys = {
.name = "devices",
.can_attach = devcgroup_can_attach,
.css_alloc = devcgroup_css_alloc,
.css_free = devcgroup_css_free,
.css_online = devcgroup_online,