cgroup: make hierarchy iterators deal with cgroup_subsys_state instead of cgroup
cgroup is currently in the process of transitioning to using css (cgroup_subsys_state) as the primary handle instead of cgroup in subsystem API. For hierarchy iterators, this is beneficial because * In most cases, css is the only thing subsystems care about anyway. * On the planned unified hierarchy, iterations for different subsystems will need to skip over different subtrees of the hierarchy depending on which subsystems are enabled on each cgroup. Passing around css makes it unnecessary to explicitly specify the subsystem in question as css is intersection between cgroup and subsystem * For the planned unified hierarchy, css's would need to be created and destroyed dynamically independent from cgroup hierarchy. Having cgroup core manage css iteration makes enforcing deref rules a lot easier. Most subsystem conversions are straight-forward. Noteworthy changes are * blkio: cgroup_to_blkcg() is no longer used. Removed. * freezer: cgroup_freezer() is no longer used. Removed. * devices: cgroup_to_devcgroup() is no longer used. Removed. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Li Zefan <lizefan@huawei.com> Acked-by: Michal Hocko <mhocko@suse.cz> Acked-by: Vivek Goyal <vgoyal@redhat.com> Acked-by: Aristeu Rozanski <aris@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Balbir Singh <bsingharora@gmail.com> Cc: Matt Helsley <matthltc@us.ibm.com> Cc: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
f48e3924dc
commit
492eb21b98
@ -614,7 +614,7 @@ u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off)
|
||||
{
|
||||
struct blkcg_policy *pol = blkcg_policy[pd->plid];
|
||||
struct blkcg_gq *pos_blkg;
|
||||
struct cgroup *pos_cgrp;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
u64 sum;
|
||||
|
||||
lockdep_assert_held(pd->blkg->q->queue_lock);
|
||||
@ -622,7 +622,7 @@ u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off)
|
||||
sum = blkg_stat_read((void *)pd + off);
|
||||
|
||||
rcu_read_lock();
|
||||
blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) {
|
||||
blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
|
||||
struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
|
||||
struct blkg_stat *stat = (void *)pos_pd + off;
|
||||
|
||||
@ -649,7 +649,7 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
|
||||
{
|
||||
struct blkcg_policy *pol = blkcg_policy[pd->plid];
|
||||
struct blkcg_gq *pos_blkg;
|
||||
struct cgroup *pos_cgrp;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
struct blkg_rwstat sum;
|
||||
int i;
|
||||
|
||||
@ -658,7 +658,7 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
|
||||
sum = blkg_rwstat_read((void *)pd + off);
|
||||
|
||||
rcu_read_lock();
|
||||
blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) {
|
||||
blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
|
||||
struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
|
||||
struct blkg_rwstat *rwstat = (void *)pos_pd + off;
|
||||
struct blkg_rwstat tmp;
|
||||
|
@ -184,11 +184,6 @@ static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
|
||||
return css ? container_of(css, struct blkcg, css) : NULL;
|
||||
}
|
||||
|
||||
static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup)
|
||||
{
|
||||
return css_to_blkcg(cgroup_css(cgroup, blkio_subsys_id));
|
||||
}
|
||||
|
||||
static inline struct blkcg *task_blkcg(struct task_struct *tsk)
|
||||
{
|
||||
return css_to_blkcg(task_css(tsk, blkio_subsys_id));
|
||||
@ -289,32 +284,31 @@ struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
|
||||
/**
|
||||
* blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
|
||||
* @d_blkg: loop cursor pointing to the current descendant
|
||||
* @pos_cgrp: used for iteration
|
||||
* @pos_css: used for iteration
|
||||
* @p_blkg: target blkg to walk descendants of
|
||||
*
|
||||
* Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU
|
||||
* read locked. If called under either blkcg or queue lock, the iteration
|
||||
* is guaranteed to include all and only online blkgs. The caller may
|
||||
* update @pos_cgrp by calling cgroup_rightmost_descendant() to skip
|
||||
* subtree.
|
||||
* update @pos_css by calling css_rightmost_descendant() to skip subtree.
|
||||
*/
|
||||
#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \
|
||||
cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
|
||||
if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
|
||||
#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \
|
||||
css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \
|
||||
if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
|
||||
(p_blkg)->q, false)))
|
||||
|
||||
/**
|
||||
* blkg_for_each_descendant_post - post-order walk of a blkg's descendants
|
||||
* @d_blkg: loop cursor pointing to the current descendant
|
||||
* @pos_cgrp: used for iteration
|
||||
* @pos_css: used for iteration
|
||||
* @p_blkg: target blkg to walk descendants of
|
||||
*
|
||||
* Similar to blkg_for_each_descendant_pre() but performs post-order
|
||||
* traversal instead. Synchronization rules are the same.
|
||||
*/
|
||||
#define blkg_for_each_descendant_post(d_blkg, pos_cgrp, p_blkg) \
|
||||
cgroup_for_each_descendant_post((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
|
||||
if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
|
||||
#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \
|
||||
css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \
|
||||
if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
|
||||
(p_blkg)->q, false)))
|
||||
|
||||
/**
|
||||
@ -577,7 +571,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
|
||||
static inline void blkcg_deactivate_policy(struct request_queue *q,
|
||||
const struct blkcg_policy *pol) { }
|
||||
|
||||
static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; }
|
||||
static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
|
||||
|
||||
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
|
||||
|
@ -1349,7 +1349,7 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft,
|
||||
struct throtl_grp *tg;
|
||||
struct throtl_service_queue *sq;
|
||||
struct blkcg_gq *blkg;
|
||||
struct cgroup *pos_cgrp;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
int ret;
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
|
||||
@ -1380,7 +1380,7 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft,
|
||||
* blk-throttle.
|
||||
*/
|
||||
tg_update_has_rules(tg);
|
||||
blkg_for_each_descendant_pre(blkg, pos_cgrp, ctx.blkg)
|
||||
blkg_for_each_descendant_pre(blkg, pos_css, ctx.blkg)
|
||||
tg_update_has_rules(blkg_to_tg(blkg));
|
||||
|
||||
/*
|
||||
@ -1623,7 +1623,7 @@ void blk_throtl_drain(struct request_queue *q)
|
||||
{
|
||||
struct throtl_data *td = q->td;
|
||||
struct blkcg_gq *blkg;
|
||||
struct cgroup *pos_cgrp;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
struct bio *bio;
|
||||
int rw;
|
||||
|
||||
@ -1636,7 +1636,7 @@ void blk_throtl_drain(struct request_queue *q)
|
||||
* better to walk service_queue tree directly but blkg walk is
|
||||
* easier.
|
||||
*/
|
||||
blkg_for_each_descendant_post(blkg, pos_cgrp, td->queue->root_blkg)
|
||||
blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg)
|
||||
tg_drain_bios(&blkg_to_tg(blkg)->service_queue);
|
||||
|
||||
tg_drain_bios(&td_root_tg(td)->service_queue);
|
||||
|
@ -779,68 +779,72 @@ static inline struct cgroup *cgroup_from_id(struct cgroup_subsys *ss, int id)
|
||||
return idr_find(&ss->root->cgroup_idr, id);
|
||||
}
|
||||
|
||||
struct cgroup *cgroup_next_child(struct cgroup *pos, struct cgroup *cgrp);
|
||||
struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
|
||||
struct cgroup_subsys_state *parent);
|
||||
|
||||
/**
|
||||
* cgroup_for_each_child - iterate through children of a cgroup
|
||||
* @pos: the cgroup * to use as the loop cursor
|
||||
* @cgrp: cgroup whose children to walk
|
||||
* css_for_each_child - iterate through children of a css
|
||||
* @pos: the css * to use as the loop cursor
|
||||
* @parent: css whose children to walk
|
||||
*
|
||||
* Walk @cgrp's children. Must be called under rcu_read_lock(). A child
|
||||
* cgroup which hasn't finished ->css_online() or already has finished
|
||||
* Walk @parent's children. Must be called under rcu_read_lock(). A child
|
||||
* css which hasn't finished ->css_online() or already has finished
|
||||
* ->css_offline() may show up during traversal and it's each subsystem's
|
||||
* responsibility to verify that each @pos is alive.
|
||||
*
|
||||
* If a subsystem synchronizes against the parent in its ->css_online() and
|
||||
* before starting iterating, a cgroup which finished ->css_online() is
|
||||
* before starting iterating, a css which finished ->css_online() is
|
||||
* guaranteed to be visible in the future iterations.
|
||||
*
|
||||
* It is allowed to temporarily drop RCU read lock during iteration. The
|
||||
* caller is responsible for ensuring that @pos remains accessible until
|
||||
* the start of the next iteration by, for example, bumping the css refcnt.
|
||||
*/
|
||||
#define cgroup_for_each_child(pos, cgrp) \
|
||||
for ((pos) = cgroup_next_child(NULL, (cgrp)); (pos); \
|
||||
(pos) = cgroup_next_child((pos), (cgrp)))
|
||||
#define css_for_each_child(pos, parent) \
|
||||
for ((pos) = css_next_child(NULL, (parent)); (pos); \
|
||||
(pos) = css_next_child((pos), (parent)))
|
||||
|
||||
struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
|
||||
struct cgroup *cgroup);
|
||||
struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
|
||||
struct cgroup_subsys_state *
|
||||
css_next_descendant_pre(struct cgroup_subsys_state *pos,
|
||||
struct cgroup_subsys_state *css);
|
||||
|
||||
struct cgroup_subsys_state *
|
||||
css_rightmost_descendant(struct cgroup_subsys_state *pos);
|
||||
|
||||
/**
|
||||
* cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants
|
||||
* @pos: the cgroup * to use as the loop cursor
|
||||
* @cgroup: cgroup whose descendants to walk
|
||||
* css_for_each_descendant_pre - pre-order walk of a css's descendants
|
||||
* @pos: the css * to use as the loop cursor
|
||||
* @root: css whose descendants to walk
|
||||
*
|
||||
* Walk @cgroup's descendants. Must be called under rcu_read_lock(). A
|
||||
* descendant cgroup which hasn't finished ->css_online() or already has
|
||||
* Walk @root's descendants. Must be called under rcu_read_lock(). A
|
||||
* descendant css which hasn't finished ->css_online() or already has
|
||||
* finished ->css_offline() may show up during traversal and it's each
|
||||
* subsystem's responsibility to verify that each @pos is alive.
|
||||
*
|
||||
* If a subsystem synchronizes against the parent in its ->css_online() and
|
||||
* before starting iterating, and synchronizes against @pos on each
|
||||
* iteration, any descendant cgroup which finished ->css_online() is
|
||||
* iteration, any descendant css which finished ->css_online() is
|
||||
* guaranteed to be visible in the future iterations.
|
||||
*
|
||||
* In other words, the following guarantees that a descendant can't escape
|
||||
* state updates of its ancestors.
|
||||
*
|
||||
* my_online(@cgrp)
|
||||
* my_online(@css)
|
||||
* {
|
||||
* Lock @cgrp->parent and @cgrp;
|
||||
* Inherit state from @cgrp->parent;
|
||||
* Lock @css's parent and @css;
|
||||
* Inherit state from the parent;
|
||||
* Unlock both.
|
||||
* }
|
||||
*
|
||||
* my_update_state(@cgrp)
|
||||
* my_update_state(@css)
|
||||
* {
|
||||
* Lock @cgrp;
|
||||
* Update @cgrp's state;
|
||||
* Unlock @cgrp;
|
||||
* Lock @css;
|
||||
* Update @css's state;
|
||||
* Unlock @css;
|
||||
*
|
||||
* cgroup_for_each_descendant_pre(@pos, @cgrp) {
|
||||
* css_for_each_descendant_pre(@pos, @css) {
|
||||
* Lock @pos;
|
||||
* Verify @pos is alive and inherit state from @pos->parent;
|
||||
* Verify @pos is alive and inherit state from @pos's parent;
|
||||
* Unlock @pos;
|
||||
* }
|
||||
* }
|
||||
@ -851,8 +855,7 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
|
||||
* visible by walking order and, as long as inheriting operations to the
|
||||
* same @pos are atomic to each other, multiple updates racing each other
|
||||
* still result in the correct state. It's guaranateed that at least one
|
||||
* inheritance happens for any cgroup after the latest update to its
|
||||
* parent.
|
||||
* inheritance happens for any css after the latest update to its parent.
|
||||
*
|
||||
* If checking parent's state requires locking the parent, each inheriting
|
||||
* iteration should lock and unlock both @pos->parent and @pos.
|
||||
@ -865,25 +868,26 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
|
||||
* caller is responsible for ensuring that @pos remains accessible until
|
||||
* the start of the next iteration by, for example, bumping the css refcnt.
|
||||
*/
|
||||
#define cgroup_for_each_descendant_pre(pos, cgroup) \
|
||||
for (pos = cgroup_next_descendant_pre(NULL, (cgroup)); (pos); \
|
||||
pos = cgroup_next_descendant_pre((pos), (cgroup)))
|
||||
#define css_for_each_descendant_pre(pos, css) \
|
||||
for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \
|
||||
(pos) = css_next_descendant_pre((pos), (css)))
|
||||
|
||||
struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
|
||||
struct cgroup *cgroup);
|
||||
struct cgroup_subsys_state *
|
||||
css_next_descendant_post(struct cgroup_subsys_state *pos,
|
||||
struct cgroup_subsys_state *css);
|
||||
|
||||
/**
|
||||
* cgroup_for_each_descendant_post - post-order walk of a cgroup's descendants
|
||||
* @pos: the cgroup * to use as the loop cursor
|
||||
* @cgroup: cgroup whose descendants to walk
|
||||
* css_for_each_descendant_post - post-order walk of a css's descendants
|
||||
* @pos: the css * to use as the loop cursor
|
||||
* @css: css whose descendants to walk
|
||||
*
|
||||
* Similar to cgroup_for_each_descendant_pre() but performs post-order
|
||||
* Similar to css_for_each_descendant_pre() but performs post-order
|
||||
* traversal instead. Note that the walk visibility guarantee described in
|
||||
* pre-order walk doesn't apply the same to post-order walks.
|
||||
*/
|
||||
#define cgroup_for_each_descendant_post(pos, cgroup) \
|
||||
for (pos = cgroup_next_descendant_post(NULL, (cgroup)); (pos); \
|
||||
pos = cgroup_next_descendant_post((pos), (cgroup)))
|
||||
#define css_for_each_descendant_post(pos, css) \
|
||||
for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \
|
||||
(pos) = css_next_descendant_post((pos), (css)))
|
||||
|
||||
/* A cgroup_iter should be treated as an opaque object */
|
||||
struct cgroup_iter {
|
||||
|
131
kernel/cgroup.c
131
kernel/cgroup.c
@ -2814,8 +2814,8 @@ static void cgroup_cfts_prepare(void)
|
||||
/*
|
||||
* Thanks to the entanglement with vfs inode locking, we can't walk
|
||||
* the existing cgroups under cgroup_mutex and create files.
|
||||
* Instead, we use cgroup_for_each_descendant_pre() and drop RCU
|
||||
* read lock before calling cgroup_addrm_files().
|
||||
* Instead, we use css_for_each_descendant_pre() and drop RCU read
|
||||
* lock before calling cgroup_addrm_files().
|
||||
*/
|
||||
mutex_lock(&cgroup_mutex);
|
||||
}
|
||||
@ -2825,10 +2825,11 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)
|
||||
{
|
||||
LIST_HEAD(pending);
|
||||
struct cgroup_subsys *ss = cfts[0].ss;
|
||||
struct cgroup *cgrp, *root = &ss->root->top_cgroup;
|
||||
struct cgroup *root = &ss->root->top_cgroup;
|
||||
struct super_block *sb = ss->root->sb;
|
||||
struct dentry *prev = NULL;
|
||||
struct inode *inode;
|
||||
struct cgroup_subsys_state *css;
|
||||
u64 update_before;
|
||||
int ret = 0;
|
||||
|
||||
@ -2861,7 +2862,9 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)
|
||||
|
||||
/* add/rm files for all cgroups created before */
|
||||
rcu_read_lock();
|
||||
cgroup_for_each_descendant_pre(cgrp, root) {
|
||||
css_for_each_descendant_pre(css, cgroup_css(root, ss->subsys_id)) {
|
||||
struct cgroup *cgrp = css->cgroup;
|
||||
|
||||
if (cgroup_is_dead(cgrp))
|
||||
continue;
|
||||
|
||||
@ -3037,17 +3040,21 @@ static void cgroup_enable_task_cg_lists(void)
|
||||
}
|
||||
|
||||
/**
|
||||
* cgroup_next_child - find the next child of a given cgroup
|
||||
* @pos: the current position (%NULL to initiate traversal)
|
||||
* @cgrp: cgroup whose descendants to walk
|
||||
* css_next_child - find the next child of a given css
|
||||
* @pos_css: the current position (%NULL to initiate traversal)
|
||||
* @parent_css: css whose children to walk
|
||||
*
|
||||
* This function returns the next child of @cgrp and should be called under
|
||||
* RCU read lock. The only requirement is that @cgrp and @pos are
|
||||
* accessible. The next sibling is guaranteed to be returned regardless of
|
||||
* their states.
|
||||
* This function returns the next child of @parent_css and should be called
|
||||
* under RCU read lock. The only requirement is that @parent_css and
|
||||
* @pos_css are accessible. The next sibling is guaranteed to be returned
|
||||
* regardless of their states.
|
||||
*/
|
||||
struct cgroup *cgroup_next_child(struct cgroup *pos, struct cgroup *cgrp)
|
||||
struct cgroup_subsys_state *
|
||||
css_next_child(struct cgroup_subsys_state *pos_css,
|
||||
struct cgroup_subsys_state *parent_css)
|
||||
{
|
||||
struct cgroup *pos = pos_css ? pos_css->cgroup : NULL;
|
||||
struct cgroup *cgrp = parent_css->cgroup;
|
||||
struct cgroup *next;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
@ -3081,59 +3088,64 @@ struct cgroup *cgroup_next_child(struct cgroup *pos, struct cgroup *cgrp)
|
||||
break;
|
||||
}
|
||||
|
||||
if (&next->sibling != &cgrp->children)
|
||||
return next;
|
||||
return NULL;
|
||||
if (&next->sibling == &cgrp->children)
|
||||
return NULL;
|
||||
|
||||
if (parent_css->ss)
|
||||
return cgroup_css(next, parent_css->ss->subsys_id);
|
||||
else
|
||||
return &next->dummy_css;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cgroup_next_child);
|
||||
EXPORT_SYMBOL_GPL(css_next_child);
|
||||
|
||||
/**
|
||||
* cgroup_next_descendant_pre - find the next descendant for pre-order walk
|
||||
* css_next_descendant_pre - find the next descendant for pre-order walk
|
||||
* @pos: the current position (%NULL to initiate traversal)
|
||||
* @cgroup: cgroup whose descendants to walk
|
||||
* @root: css whose descendants to walk
|
||||
*
|
||||
* To be used by cgroup_for_each_descendant_pre(). Find the next
|
||||
* descendant to visit for pre-order traversal of @cgroup's descendants.
|
||||
* To be used by css_for_each_descendant_pre(). Find the next descendant
|
||||
* to visit for pre-order traversal of @root's descendants.
|
||||
*
|
||||
* While this function requires RCU read locking, it doesn't require the
|
||||
* whole traversal to be contained in a single RCU critical section. This
|
||||
* function will return the correct next descendant as long as both @pos
|
||||
* and @cgroup are accessible and @pos is a descendant of @cgroup.
|
||||
* and @root are accessible and @pos is a descendant of @root.
|
||||
*/
|
||||
struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
|
||||
struct cgroup *cgroup)
|
||||
struct cgroup_subsys_state *
|
||||
css_next_descendant_pre(struct cgroup_subsys_state *pos,
|
||||
struct cgroup_subsys_state *root)
|
||||
{
|
||||
struct cgroup *next;
|
||||
struct cgroup_subsys_state *next;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
|
||||
/* if first iteration, pretend we just visited @cgroup */
|
||||
/* if first iteration, pretend we just visited @root */
|
||||
if (!pos)
|
||||
pos = cgroup;
|
||||
pos = root;
|
||||
|
||||
/* visit the first child if exists */
|
||||
next = cgroup_next_child(NULL, pos);
|
||||
next = css_next_child(NULL, pos);
|
||||
if (next)
|
||||
return next;
|
||||
|
||||
/* no child, visit my or the closest ancestor's next sibling */
|
||||
while (pos != cgroup) {
|
||||
next = cgroup_next_child(pos, pos->parent);
|
||||
while (pos != root) {
|
||||
next = css_next_child(pos, css_parent(pos));
|
||||
if (next)
|
||||
return next;
|
||||
pos = pos->parent;
|
||||
pos = css_parent(pos);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre);
|
||||
EXPORT_SYMBOL_GPL(css_next_descendant_pre);
|
||||
|
||||
/**
|
||||
* cgroup_rightmost_descendant - return the rightmost descendant of a cgroup
|
||||
* @pos: cgroup of interest
|
||||
* css_rightmost_descendant - return the rightmost descendant of a css
|
||||
* @pos: css of interest
|
||||
*
|
||||
* Return the rightmost descendant of @pos. If there's no descendant,
|
||||
* @pos is returned. This can be used during pre-order traversal to skip
|
||||
* Return the rightmost descendant of @pos. If there's no descendant, @pos
|
||||
* is returned. This can be used during pre-order traversal to skip
|
||||
* subtree of @pos.
|
||||
*
|
||||
* While this function requires RCU read locking, it doesn't require the
|
||||
@ -3141,9 +3153,10 @@ EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre);
|
||||
* function will return the correct rightmost descendant as long as @pos is
|
||||
* accessible.
|
||||
*/
|
||||
struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos)
|
||||
struct cgroup_subsys_state *
|
||||
css_rightmost_descendant(struct cgroup_subsys_state *pos)
|
||||
{
|
||||
struct cgroup *last, *tmp;
|
||||
struct cgroup_subsys_state *last, *tmp;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
|
||||
@ -3151,62 +3164,64 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos)
|
||||
last = pos;
|
||||
/* ->prev isn't RCU safe, walk ->next till the end */
|
||||
pos = NULL;
|
||||
cgroup_for_each_child(tmp, last)
|
||||
css_for_each_child(tmp, last)
|
||||
pos = tmp;
|
||||
} while (pos);
|
||||
|
||||
return last;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cgroup_rightmost_descendant);
|
||||
EXPORT_SYMBOL_GPL(css_rightmost_descendant);
|
||||
|
||||
static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos)
|
||||
static struct cgroup_subsys_state *
|
||||
css_leftmost_descendant(struct cgroup_subsys_state *pos)
|
||||
{
|
||||
struct cgroup *last;
|
||||
struct cgroup_subsys_state *last;
|
||||
|
||||
do {
|
||||
last = pos;
|
||||
pos = cgroup_next_child(NULL, pos);
|
||||
pos = css_next_child(NULL, pos);
|
||||
} while (pos);
|
||||
|
||||
return last;
|
||||
}
|
||||
|
||||
/**
|
||||
* cgroup_next_descendant_post - find the next descendant for post-order walk
|
||||
* css_next_descendant_post - find the next descendant for post-order walk
|
||||
* @pos: the current position (%NULL to initiate traversal)
|
||||
* @cgroup: cgroup whose descendants to walk
|
||||
* @root: css whose descendants to walk
|
||||
*
|
||||
* To be used by cgroup_for_each_descendant_post(). Find the next
|
||||
* descendant to visit for post-order traversal of @cgroup's descendants.
|
||||
* To be used by css_for_each_descendant_post(). Find the next descendant
|
||||
* to visit for post-order traversal of @root's descendants.
|
||||
*
|
||||
* While this function requires RCU read locking, it doesn't require the
|
||||
* whole traversal to be contained in a single RCU critical section. This
|
||||
* function will return the correct next descendant as long as both @pos
|
||||
* and @cgroup are accessible and @pos is a descendant of @cgroup.
|
||||
*/
|
||||
struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
|
||||
struct cgroup *cgroup)
|
||||
struct cgroup_subsys_state *
|
||||
css_next_descendant_post(struct cgroup_subsys_state *pos,
|
||||
struct cgroup_subsys_state *root)
|
||||
{
|
||||
struct cgroup *next;
|
||||
struct cgroup_subsys_state *next;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
|
||||
/* if first iteration, visit the leftmost descendant */
|
||||
if (!pos) {
|
||||
next = cgroup_leftmost_descendant(cgroup);
|
||||
return next != cgroup ? next : NULL;
|
||||
next = css_leftmost_descendant(root);
|
||||
return next != root ? next : NULL;
|
||||
}
|
||||
|
||||
/* if there's an unvisited sibling, visit its leftmost descendant */
|
||||
next = cgroup_next_child(pos, pos->parent);
|
||||
next = css_next_child(pos, css_parent(pos));
|
||||
if (next)
|
||||
return cgroup_leftmost_descendant(next);
|
||||
return css_leftmost_descendant(next);
|
||||
|
||||
/* no sibling left, visit parent */
|
||||
next = pos->parent;
|
||||
return next != cgroup ? next : NULL;
|
||||
next = css_parent(pos);
|
||||
return next != root ? next : NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cgroup_next_descendant_post);
|
||||
EXPORT_SYMBOL_GPL(css_next_descendant_post);
|
||||
|
||||
void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
|
||||
__acquires(css_set_lock)
|
||||
@ -4549,9 +4564,9 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
|
||||
/*
|
||||
* Mark @cgrp dead. This prevents further task migration and child
|
||||
* creation by disabling cgroup_lock_live_group(). Note that
|
||||
* CGRP_DEAD assertion is depended upon by cgroup_next_child() to
|
||||
* CGRP_DEAD assertion is depended upon by css_next_child() to
|
||||
* resume iteration after dropping RCU read lock. See
|
||||
* cgroup_next_child() for details.
|
||||
* css_next_child() for details.
|
||||
*/
|
||||
set_bit(CGRP_DEAD, &cgrp->flags);
|
||||
|
||||
|
@ -50,11 +50,6 @@ static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
|
||||
return css ? container_of(css, struct freezer, css) : NULL;
|
||||
}
|
||||
|
||||
static inline struct freezer *cgroup_freezer(struct cgroup *cgroup)
|
||||
{
|
||||
return css_freezer(cgroup_css(cgroup, freezer_subsys_id));
|
||||
}
|
||||
|
||||
static inline struct freezer *task_freezer(struct task_struct *task)
|
||||
{
|
||||
return css_freezer(task_css(task, freezer_subsys_id));
|
||||
@ -120,7 +115,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css)
|
||||
/*
|
||||
* The following double locking and freezing state inheritance
|
||||
* guarantee that @cgroup can never escape ancestors' freezing
|
||||
* states. See cgroup_for_each_descendant_pre() for details.
|
||||
* states. See css_for_each_descendant_pre() for details.
|
||||
*/
|
||||
if (parent)
|
||||
spin_lock_irq(&parent->lock);
|
||||
@ -262,7 +257,7 @@ out:
|
||||
static void update_if_frozen(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct freezer *freezer = css_freezer(css);
|
||||
struct cgroup *pos;
|
||||
struct cgroup_subsys_state *pos;
|
||||
struct cgroup_iter it;
|
||||
struct task_struct *task;
|
||||
|
||||
@ -275,8 +270,8 @@ static void update_if_frozen(struct cgroup_subsys_state *css)
|
||||
goto out_unlock;
|
||||
|
||||
/* are all (live) children frozen? */
|
||||
cgroup_for_each_child(pos, css->cgroup) {
|
||||
struct freezer *child = cgroup_freezer(pos);
|
||||
css_for_each_child(pos, css) {
|
||||
struct freezer *child = css_freezer(pos);
|
||||
|
||||
if ((child->state & CGROUP_FREEZER_ONLINE) &&
|
||||
!(child->state & CGROUP_FROZEN))
|
||||
@ -309,13 +304,13 @@ out_unlock:
|
||||
static int freezer_read(struct cgroup_subsys_state *css, struct cftype *cft,
|
||||
struct seq_file *m)
|
||||
{
|
||||
struct cgroup *pos;
|
||||
struct cgroup_subsys_state *pos;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
/* update states bottom-up */
|
||||
cgroup_for_each_descendant_post(pos, css->cgroup)
|
||||
update_if_frozen(cgroup_css(pos, freezer_subsys_id));
|
||||
css_for_each_descendant_post(pos, css)
|
||||
update_if_frozen(pos);
|
||||
update_if_frozen(css);
|
||||
|
||||
rcu_read_unlock();
|
||||
@ -396,7 +391,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
|
||||
*/
|
||||
static void freezer_change_state(struct freezer *freezer, bool freeze)
|
||||
{
|
||||
struct cgroup *pos;
|
||||
struct cgroup_subsys_state *pos;
|
||||
|
||||
/* update @freezer */
|
||||
spin_lock_irq(&freezer->lock);
|
||||
@ -409,8 +404,8 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
|
||||
* CGROUP_FREEZING_PARENT.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
cgroup_for_each_descendant_pre(pos, freezer->css.cgroup) {
|
||||
struct freezer *pos_f = cgroup_freezer(pos);
|
||||
css_for_each_descendant_pre(pos, &freezer->css) {
|
||||
struct freezer *pos_f = css_freezer(pos);
|
||||
struct freezer *parent = parent_freezer(pos_f);
|
||||
|
||||
/*
|
||||
|
@ -210,29 +210,29 @@ static struct cpuset top_cpuset = {
|
||||
/**
|
||||
* cpuset_for_each_child - traverse online children of a cpuset
|
||||
* @child_cs: loop cursor pointing to the current child
|
||||
* @pos_cgrp: used for iteration
|
||||
* @pos_css: used for iteration
|
||||
* @parent_cs: target cpuset to walk children of
|
||||
*
|
||||
* Walk @child_cs through the online children of @parent_cs. Must be used
|
||||
* with RCU read locked.
|
||||
*/
|
||||
#define cpuset_for_each_child(child_cs, pos_cgrp, parent_cs) \
|
||||
cgroup_for_each_child((pos_cgrp), (parent_cs)->css.cgroup) \
|
||||
if (is_cpuset_online(((child_cs) = cgroup_cs((pos_cgrp)))))
|
||||
#define cpuset_for_each_child(child_cs, pos_css, parent_cs) \
|
||||
css_for_each_child((pos_css), &(parent_cs)->css) \
|
||||
if (is_cpuset_online(((child_cs) = css_cs((pos_css)))))
|
||||
|
||||
/**
|
||||
* cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants
|
||||
* @des_cs: loop cursor pointing to the current descendant
|
||||
* @pos_cgrp: used for iteration
|
||||
* @pos_css: used for iteration
|
||||
* @root_cs: target cpuset to walk ancestor of
|
||||
*
|
||||
* Walk @des_cs through the online descendants of @root_cs. Must be used
|
||||
* with RCU read locked. The caller may modify @pos_cgrp by calling
|
||||
* cgroup_rightmost_descendant() to skip subtree.
|
||||
* with RCU read locked. The caller may modify @pos_css by calling
|
||||
* css_rightmost_descendant() to skip subtree.
|
||||
*/
|
||||
#define cpuset_for_each_descendant_pre(des_cs, pos_cgrp, root_cs) \
|
||||
cgroup_for_each_descendant_pre((pos_cgrp), (root_cs)->css.cgroup) \
|
||||
if (is_cpuset_online(((des_cs) = cgroup_cs((pos_cgrp)))))
|
||||
#define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs) \
|
||||
css_for_each_descendant_pre((pos_css), &(root_cs)->css) \
|
||||
if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
|
||||
|
||||
/*
|
||||
* There are two global mutexes guarding cpuset structures - cpuset_mutex
|
||||
@ -430,7 +430,7 @@ static void free_trial_cpuset(struct cpuset *trial)
|
||||
|
||||
static int validate_change(struct cpuset *cur, struct cpuset *trial)
|
||||
{
|
||||
struct cgroup *cgrp;
|
||||
struct cgroup_subsys_state *css;
|
||||
struct cpuset *c, *par;
|
||||
int ret;
|
||||
|
||||
@ -438,7 +438,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
|
||||
|
||||
/* Each of our child cpusets must be a subset of us */
|
||||
ret = -EBUSY;
|
||||
cpuset_for_each_child(c, cgrp, cur)
|
||||
cpuset_for_each_child(c, css, cur)
|
||||
if (!is_cpuset_subset(c, trial))
|
||||
goto out;
|
||||
|
||||
@ -459,7 +459,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
|
||||
* overlap
|
||||
*/
|
||||
ret = -EINVAL;
|
||||
cpuset_for_each_child(c, cgrp, par) {
|
||||
cpuset_for_each_child(c, css, par) {
|
||||
if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
|
||||
c != cur &&
|
||||
cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
|
||||
@ -508,13 +508,13 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr,
|
||||
struct cpuset *root_cs)
|
||||
{
|
||||
struct cpuset *cp;
|
||||
struct cgroup *pos_cgrp;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
|
||||
rcu_read_lock();
|
||||
cpuset_for_each_descendant_pre(cp, pos_cgrp, root_cs) {
|
||||
cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
|
||||
/* skip the whole subtree if @cp doesn't have any CPU */
|
||||
if (cpumask_empty(cp->cpus_allowed)) {
|
||||
pos_cgrp = cgroup_rightmost_descendant(pos_cgrp);
|
||||
pos_css = css_rightmost_descendant(pos_css);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -589,7 +589,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
|
||||
struct sched_domain_attr *dattr; /* attributes for custom domains */
|
||||
int ndoms = 0; /* number of sched domains in result */
|
||||
int nslot; /* next empty doms[] struct cpumask slot */
|
||||
struct cgroup *pos_cgrp;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
|
||||
doms = NULL;
|
||||
dattr = NULL;
|
||||
@ -618,7 +618,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
|
||||
csn = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
cpuset_for_each_descendant_pre(cp, pos_cgrp, &top_cpuset) {
|
||||
cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) {
|
||||
/*
|
||||
* Continue traversing beyond @cp iff @cp has some CPUs and
|
||||
* isn't load balancing. The former is obvious. The
|
||||
@ -635,7 +635,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
|
||||
csa[csn++] = cp;
|
||||
|
||||
/* skip @cp's subtree */
|
||||
pos_cgrp = cgroup_rightmost_descendant(pos_cgrp);
|
||||
pos_css = css_rightmost_descendant(pos_css);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
@ -886,16 +886,16 @@ static void update_tasks_cpumask_hier(struct cpuset *root_cs,
|
||||
bool update_root, struct ptr_heap *heap)
|
||||
{
|
||||
struct cpuset *cp;
|
||||
struct cgroup *pos_cgrp;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
|
||||
if (update_root)
|
||||
update_tasks_cpumask(root_cs, heap);
|
||||
|
||||
rcu_read_lock();
|
||||
cpuset_for_each_descendant_pre(cp, pos_cgrp, root_cs) {
|
||||
cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
|
||||
/* skip the whole subtree if @cp have some CPU */
|
||||
if (!cpumask_empty(cp->cpus_allowed)) {
|
||||
pos_cgrp = cgroup_rightmost_descendant(pos_cgrp);
|
||||
pos_css = css_rightmost_descendant(pos_css);
|
||||
continue;
|
||||
}
|
||||
if (!css_tryget(&cp->css))
|
||||
@ -1143,16 +1143,16 @@ static void update_tasks_nodemask_hier(struct cpuset *root_cs,
|
||||
bool update_root, struct ptr_heap *heap)
|
||||
{
|
||||
struct cpuset *cp;
|
||||
struct cgroup *pos_cgrp;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
|
||||
if (update_root)
|
||||
update_tasks_nodemask(root_cs, heap);
|
||||
|
||||
rcu_read_lock();
|
||||
cpuset_for_each_descendant_pre(cp, pos_cgrp, root_cs) {
|
||||
cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
|
||||
/* skip the whole subtree if @cp have some CPU */
|
||||
if (!nodes_empty(cp->mems_allowed)) {
|
||||
pos_cgrp = cgroup_rightmost_descendant(pos_cgrp);
|
||||
pos_css = css_rightmost_descendant(pos_css);
|
||||
continue;
|
||||
}
|
||||
if (!css_tryget(&cp->css))
|
||||
@ -1973,7 +1973,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
|
||||
struct cpuset *cs = css_cs(css);
|
||||
struct cpuset *parent = parent_cs(cs);
|
||||
struct cpuset *tmp_cs;
|
||||
struct cgroup *pos_cgrp;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
|
||||
if (!parent)
|
||||
return 0;
|
||||
@ -2005,7 +2005,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
|
||||
* (and likewise for mems) to the new cgroup.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
cpuset_for_each_child(tmp_cs, pos_cgrp, parent) {
|
||||
cpuset_for_each_child(tmp_cs, pos_css, parent) {
|
||||
if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs)) {
|
||||
rcu_read_unlock();
|
||||
goto out_unlock;
|
||||
@ -2252,10 +2252,10 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
|
||||
/* if cpus or mems changed, we need to propagate to descendants */
|
||||
if (cpus_updated || mems_updated) {
|
||||
struct cpuset *cs;
|
||||
struct cgroup *pos_cgrp;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
|
||||
rcu_read_lock();
|
||||
cpuset_for_each_descendant_pre(cs, pos_cgrp, &top_cpuset) {
|
||||
cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
|
||||
if (!css_tryget(&cs->css))
|
||||
continue;
|
||||
rcu_read_unlock();
|
||||
|
@ -1082,7 +1082,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
|
||||
static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
|
||||
struct mem_cgroup *last_visited)
|
||||
{
|
||||
struct cgroup *prev_cgroup, *next_cgroup;
|
||||
struct cgroup_subsys_state *prev_css, *next_css;
|
||||
|
||||
/*
|
||||
* Root is not visited by cgroup iterators so it needs an
|
||||
@ -1091,11 +1091,9 @@ static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
|
||||
if (!last_visited)
|
||||
return root;
|
||||
|
||||
prev_cgroup = (last_visited == root) ? NULL
|
||||
: last_visited->css.cgroup;
|
||||
prev_css = (last_visited == root) ? NULL : &last_visited->css;
|
||||
skip_node:
|
||||
next_cgroup = cgroup_next_descendant_pre(
|
||||
prev_cgroup, root->css.cgroup);
|
||||
next_css = css_next_descendant_pre(prev_css, &root->css);
|
||||
|
||||
/*
|
||||
* Even if we found a group we have to make sure it is
|
||||
@ -1104,13 +1102,13 @@ skip_node:
|
||||
* last_visited css is safe to use because it is
|
||||
* protected by css_get and the tree walk is rcu safe.
|
||||
*/
|
||||
if (next_cgroup) {
|
||||
struct mem_cgroup *mem = mem_cgroup_from_cont(
|
||||
next_cgroup);
|
||||
if (next_css) {
|
||||
struct mem_cgroup *mem = mem_cgroup_from_css(next_css);
|
||||
|
||||
if (css_tryget(&mem->css))
|
||||
return mem;
|
||||
else {
|
||||
prev_cgroup = next_cgroup;
|
||||
prev_css = next_css;
|
||||
goto skip_node;
|
||||
}
|
||||
}
|
||||
@ -4939,10 +4937,10 @@ static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg)
|
||||
*/
|
||||
static inline bool __memcg_has_children(struct mem_cgroup *memcg)
|
||||
{
|
||||
struct cgroup *pos;
|
||||
struct cgroup_subsys_state *pos;
|
||||
|
||||
/* bounce at first found */
|
||||
cgroup_for_each_child(pos, memcg->css.cgroup)
|
||||
css_for_each_child(pos, &memcg->css)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
@ -56,11 +56,6 @@ static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
|
||||
return s ? container_of(s, struct dev_cgroup, css) : NULL;
|
||||
}
|
||||
|
||||
static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
|
||||
{
|
||||
return css_to_devcgroup(cgroup_css(cgroup, devices_subsys_id));
|
||||
}
|
||||
|
||||
static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
|
||||
{
|
||||
return css_to_devcgroup(task_css(task, devices_subsys_id));
|
||||
@ -447,13 +442,13 @@ static void revalidate_active_exceptions(struct dev_cgroup *devcg)
|
||||
static int propagate_exception(struct dev_cgroup *devcg_root,
|
||||
struct dev_exception_item *ex)
|
||||
{
|
||||
struct cgroup *root = devcg_root->css.cgroup, *pos;
|
||||
struct cgroup_subsys_state *pos;
|
||||
int rc = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
cgroup_for_each_descendant_pre(pos, root) {
|
||||
struct dev_cgroup *devcg = cgroup_to_devcgroup(pos);
|
||||
css_for_each_descendant_pre(pos, &devcg_root->css) {
|
||||
struct dev_cgroup *devcg = css_to_devcgroup(pos);
|
||||
|
||||
/*
|
||||
* Because devcgroup_mutex is held, no devcg will become
|
||||
|
Loading…
Reference in New Issue
Block a user