memcg: use for_each_mem_cgroup
In memory cgroup management, we sometimes have to walk through subhierarchy of cgroup to gather informaiton, or lock something, etc. Now, to do that, mem_cgroup_walk_tree() function is provided. It calls given callback function per cgroup found. But the bad thing is that it has to pass a fixed style function and argument, "void*" and it adds much type casting to memcontrol.c. To make the code clean, this patch replaces walk_tree() with for_each_mem_cgroup_tree(iter, root) An iterator style call. The good point is that iterator call doesn't have to assume what kind of function is called under it. A bad point is that it may cause reference-count leak if a caller use "break" from the loop by mistake. I think the benefit is larger. The modified code seems straigtforward and easy to read because we don't have misterious callbacks and pointer cast. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
32047e2a85
commit
7d74b06f24
188
mm/memcontrol.c
188
mm/memcontrol.c
@ -660,41 +660,58 @@ static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
|
|||||||
return mem;
|
return mem;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* The caller has to guarantee "mem" exists before calling this */
|
||||||
* Call callback function against all cgroup under hierarchy tree.
|
static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem)
|
||||||
*/
|
|
||||||
static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data,
|
|
||||||
int (*func)(struct mem_cgroup *, void *))
|
|
||||||
{
|
{
|
||||||
int found, ret, nextid;
|
if (mem && css_tryget(&mem->css))
|
||||||
struct cgroup_subsys_state *css;
|
return mem;
|
||||||
struct mem_cgroup *mem;
|
return NULL;
|
||||||
|
|
||||||
if (!root->use_hierarchy)
|
|
||||||
return (*func)(root, data);
|
|
||||||
|
|
||||||
nextid = 1;
|
|
||||||
do {
|
|
||||||
ret = 0;
|
|
||||||
mem = NULL;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
css = css_get_next(&mem_cgroup_subsys, nextid, &root->css,
|
|
||||||
&found);
|
|
||||||
if (css && css_tryget(css))
|
|
||||||
mem = container_of(css, struct mem_cgroup, css);
|
|
||||||
rcu_read_unlock();
|
|
||||||
|
|
||||||
if (mem) {
|
|
||||||
ret = (*func)(mem, data);
|
|
||||||
css_put(&mem->css);
|
|
||||||
}
|
|
||||||
nextid = found + 1;
|
|
||||||
} while (!ret && css);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
|
||||||
|
struct mem_cgroup *root,
|
||||||
|
bool cond)
|
||||||
|
{
|
||||||
|
int nextid = css_id(&iter->css) + 1;
|
||||||
|
int found;
|
||||||
|
int hierarchy_used;
|
||||||
|
struct cgroup_subsys_state *css;
|
||||||
|
|
||||||
|
hierarchy_used = iter->use_hierarchy;
|
||||||
|
|
||||||
|
css_put(&iter->css);
|
||||||
|
if (!cond || !hierarchy_used)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
do {
|
||||||
|
iter = NULL;
|
||||||
|
rcu_read_lock();
|
||||||
|
|
||||||
|
css = css_get_next(&mem_cgroup_subsys, nextid,
|
||||||
|
&root->css, &found);
|
||||||
|
if (css && css_tryget(css))
|
||||||
|
iter = container_of(css, struct mem_cgroup, css);
|
||||||
|
rcu_read_unlock();
|
||||||
|
/* If css is NULL, no more cgroups will be found */
|
||||||
|
nextid = found + 1;
|
||||||
|
} while (css && !iter);
|
||||||
|
|
||||||
|
return iter;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* for_eacn_mem_cgroup_tree() for visiting all cgroup under tree. Please
|
||||||
|
* be careful that "break" loop is not allowed. We have reference count.
|
||||||
|
* Instead of that modify "cond" to be false and "continue" to exit the loop.
|
||||||
|
*/
|
||||||
|
#define for_each_mem_cgroup_tree_cond(iter, root, cond) \
|
||||||
|
for (iter = mem_cgroup_start_loop(root);\
|
||||||
|
iter != NULL;\
|
||||||
|
iter = mem_cgroup_get_next(iter, root, cond))
|
||||||
|
|
||||||
|
#define for_each_mem_cgroup_tree(iter, root) \
|
||||||
|
for_each_mem_cgroup_tree_cond(iter, root, true)
|
||||||
|
|
||||||
|
|
||||||
static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
|
static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
|
||||||
{
|
{
|
||||||
return (mem == root_mem_cgroup);
|
return (mem == root_mem_cgroup);
|
||||||
@ -1132,13 +1149,6 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *mem)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mem_cgroup_count_children_cb(struct mem_cgroup *mem, void *data)
|
|
||||||
{
|
|
||||||
int *val = data;
|
|
||||||
(*val)++;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode.
|
* mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode.
|
||||||
* @memcg: The memory cgroup that went over limit
|
* @memcg: The memory cgroup that went over limit
|
||||||
@ -1213,7 +1223,10 @@ done:
|
|||||||
static int mem_cgroup_count_children(struct mem_cgroup *mem)
|
static int mem_cgroup_count_children(struct mem_cgroup *mem)
|
||||||
{
|
{
|
||||||
int num = 0;
|
int num = 0;
|
||||||
mem_cgroup_walk_tree(mem, &num, mem_cgroup_count_children_cb);
|
struct mem_cgroup *iter;
|
||||||
|
|
||||||
|
for_each_mem_cgroup_tree(iter, mem)
|
||||||
|
num++;
|
||||||
return num;
|
return num;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1362,49 +1375,39 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
|
|||||||
return total;
|
return total;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mem_cgroup_oom_lock_cb(struct mem_cgroup *mem, void *data)
|
|
||||||
{
|
|
||||||
int *val = (int *)data;
|
|
||||||
int x;
|
|
||||||
/*
|
|
||||||
* Logically, we can stop scanning immediately when we find
|
|
||||||
* a memcg is already locked. But condidering unlock ops and
|
|
||||||
* creation/removal of memcg, scan-all is simple operation.
|
|
||||||
*/
|
|
||||||
x = atomic_inc_return(&mem->oom_lock);
|
|
||||||
*val = max(x, *val);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
* Check OOM-Killer is already running under our hierarchy.
|
* Check OOM-Killer is already running under our hierarchy.
|
||||||
* If someone is running, return false.
|
* If someone is running, return false.
|
||||||
*/
|
*/
|
||||||
static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
|
static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
|
||||||
{
|
{
|
||||||
int lock_count = 0;
|
int x, lock_count = 0;
|
||||||
|
struct mem_cgroup *iter;
|
||||||
|
|
||||||
mem_cgroup_walk_tree(mem, &lock_count, mem_cgroup_oom_lock_cb);
|
for_each_mem_cgroup_tree(iter, mem) {
|
||||||
|
x = atomic_inc_return(&iter->oom_lock);
|
||||||
|
lock_count = max(x, lock_count);
|
||||||
|
}
|
||||||
|
|
||||||
if (lock_count == 1)
|
if (lock_count == 1)
|
||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mem_cgroup_oom_unlock_cb(struct mem_cgroup *mem, void *data)
|
static int mem_cgroup_oom_unlock(struct mem_cgroup *mem)
|
||||||
{
|
{
|
||||||
|
struct mem_cgroup *iter;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When a new child is created while the hierarchy is under oom,
|
* When a new child is created while the hierarchy is under oom,
|
||||||
* mem_cgroup_oom_lock() may not be called. We have to use
|
* mem_cgroup_oom_lock() may not be called. We have to use
|
||||||
* atomic_add_unless() here.
|
* atomic_add_unless() here.
|
||||||
*/
|
*/
|
||||||
atomic_add_unless(&mem->oom_lock, -1, 0);
|
for_each_mem_cgroup_tree(iter, mem)
|
||||||
|
atomic_add_unless(&iter->oom_lock, -1, 0);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mem_cgroup_oom_unlock(struct mem_cgroup *mem)
|
|
||||||
{
|
|
||||||
mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_unlock_cb);
|
|
||||||
}
|
|
||||||
|
|
||||||
static DEFINE_MUTEX(memcg_oom_mutex);
|
static DEFINE_MUTEX(memcg_oom_mutex);
|
||||||
static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
|
static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
|
||||||
@ -3207,33 +3210,25 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
|
|||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct mem_cgroup_idx_data {
|
|
||||||
s64 val;
|
|
||||||
enum mem_cgroup_stat_index idx;
|
|
||||||
};
|
|
||||||
|
|
||||||
static int
|
static u64 mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem,
|
||||||
mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data)
|
enum mem_cgroup_stat_index idx)
|
||||||
{
|
{
|
||||||
struct mem_cgroup_idx_data *d = data;
|
struct mem_cgroup *iter;
|
||||||
d->val += mem_cgroup_read_stat(mem, d->idx);
|
s64 val = 0;
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
/* each per cpu's value can be minus.Then, use s64 */
|
||||||
mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem,
|
for_each_mem_cgroup_tree(iter, mem)
|
||||||
enum mem_cgroup_stat_index idx, s64 *val)
|
val += mem_cgroup_read_stat(iter, idx);
|
||||||
{
|
|
||||||
struct mem_cgroup_idx_data d;
|
if (val < 0) /* race ? */
|
||||||
d.idx = idx;
|
val = 0;
|
||||||
d.val = 0;
|
return val;
|
||||||
mem_cgroup_walk_tree(mem, &d, mem_cgroup_get_idx_stat);
|
|
||||||
*val = d.val;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap)
|
static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap)
|
||||||
{
|
{
|
||||||
u64 idx_val, val;
|
u64 val;
|
||||||
|
|
||||||
if (!mem_cgroup_is_root(mem)) {
|
if (!mem_cgroup_is_root(mem)) {
|
||||||
if (!swap)
|
if (!swap)
|
||||||
@ -3242,16 +3237,12 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap)
|
|||||||
return res_counter_read_u64(&mem->memsw, RES_USAGE);
|
return res_counter_read_u64(&mem->memsw, RES_USAGE);
|
||||||
}
|
}
|
||||||
|
|
||||||
mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_CACHE, &idx_val);
|
val = mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_CACHE);
|
||||||
val = idx_val;
|
val += mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_RSS);
|
||||||
mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_RSS, &idx_val);
|
|
||||||
val += idx_val;
|
|
||||||
|
|
||||||
if (swap) {
|
if (swap)
|
||||||
mem_cgroup_get_recursive_idx_stat(mem,
|
val += mem_cgroup_get_recursive_idx_stat(mem,
|
||||||
MEM_CGROUP_STAT_SWAPOUT, &idx_val);
|
MEM_CGROUP_STAT_SWAPOUT);
|
||||||
val += idx_val;
|
|
||||||
}
|
|
||||||
|
|
||||||
return val << PAGE_SHIFT;
|
return val << PAGE_SHIFT;
|
||||||
}
|
}
|
||||||
@ -3459,9 +3450,9 @@ struct {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data)
|
static void
|
||||||
|
mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
|
||||||
{
|
{
|
||||||
struct mcs_total_stat *s = data;
|
|
||||||
s64 val;
|
s64 val;
|
||||||
|
|
||||||
/* per cpu stat */
|
/* per cpu stat */
|
||||||
@ -3491,13 +3482,15 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data)
|
|||||||
s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE;
|
s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE;
|
||||||
val = mem_cgroup_get_local_zonestat(mem, LRU_UNEVICTABLE);
|
val = mem_cgroup_get_local_zonestat(mem, LRU_UNEVICTABLE);
|
||||||
s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE;
|
s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE;
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
|
mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
|
||||||
{
|
{
|
||||||
mem_cgroup_walk_tree(mem, s, mem_cgroup_get_local_stat);
|
struct mem_cgroup *iter;
|
||||||
|
|
||||||
|
for_each_mem_cgroup_tree(iter, mem)
|
||||||
|
mem_cgroup_get_local_stat(iter, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
|
static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
|
||||||
@ -3674,7 +3667,7 @@ static int compare_thresholds(const void *a, const void *b)
|
|||||||
return _a->threshold - _b->threshold;
|
return _a->threshold - _b->threshold;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem, void *data)
|
static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem)
|
||||||
{
|
{
|
||||||
struct mem_cgroup_eventfd_list *ev;
|
struct mem_cgroup_eventfd_list *ev;
|
||||||
|
|
||||||
@ -3685,7 +3678,10 @@ static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem, void *data)
|
|||||||
|
|
||||||
static void mem_cgroup_oom_notify(struct mem_cgroup *mem)
|
static void mem_cgroup_oom_notify(struct mem_cgroup *mem)
|
||||||
{
|
{
|
||||||
mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_notify_cb);
|
struct mem_cgroup *iter;
|
||||||
|
|
||||||
|
for_each_mem_cgroup_tree(iter, mem)
|
||||||
|
mem_cgroup_oom_notify_cb(iter);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mem_cgroup_usage_register_event(struct cgroup *cgrp,
|
static int mem_cgroup_usage_register_event(struct cgroup *cgrp,
|
||||||
|
Loading…
Reference in New Issue
Block a user