vmscan: per memory cgroup slab shrinkers
This patch adds SHRINKER_MEMCG_AWARE flag. If a shrinker has this flag set, it will be called per memory cgroup. The memory cgroup to scan objects from is passed in shrink_control->memcg. If the memory cgroup is NULL, a memcg aware shrinker is supposed to scan objects from the global list. Unaware shrinkers are only called on global pressure with memcg=NULL. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Greg Thelen <gthelen@google.com> Cc: Glauber Costa <glommer@gmail.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
4101b62435
commit
cb731d6c62
@ -37,20 +37,6 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
|
|||||||
iput(toput_inode);
|
iput(toput_inode);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void drop_slab(void)
|
|
||||||
{
|
|
||||||
int nr_objects;
|
|
||||||
|
|
||||||
do {
|
|
||||||
int nid;
|
|
||||||
|
|
||||||
nr_objects = 0;
|
|
||||||
for_each_online_node(nid)
|
|
||||||
nr_objects += shrink_node_slabs(GFP_KERNEL, nid,
|
|
||||||
1000, 1000);
|
|
||||||
} while (nr_objects > 10);
|
|
||||||
}
|
|
||||||
|
|
||||||
int drop_caches_sysctl_handler(struct ctl_table *table, int write,
|
int drop_caches_sysctl_handler(struct ctl_table *table, int write,
|
||||||
void __user *buffer, size_t *length, loff_t *ppos)
|
void __user *buffer, size_t *length, loff_t *ppos)
|
||||||
{
|
{
|
||||||
|
@ -413,6 +413,8 @@ static inline bool memcg_kmem_enabled(void)
|
|||||||
return static_key_false(&memcg_kmem_enabled_key);
|
return static_key_false(&memcg_kmem_enabled_key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool memcg_kmem_is_active(struct mem_cgroup *memcg);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In general, we'll do everything in our power to not incur in any overhead
|
* In general, we'll do everything in our power to not incur in any overhead
|
||||||
* for non-memcg users for the kmem functions. Not even a function call, if we
|
* for non-memcg users for the kmem functions. Not even a function call, if we
|
||||||
@ -542,6 +544,11 @@ static inline bool memcg_kmem_enabled(void)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
|
memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
|
||||||
{
|
{
|
||||||
|
@ -2168,9 +2168,8 @@ int drop_caches_sysctl_handler(struct ctl_table *, int,
|
|||||||
void __user *, size_t *, loff_t *);
|
void __user *, size_t *, loff_t *);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
|
void drop_slab(void);
|
||||||
unsigned long nr_scanned,
|
void drop_slab_node(int nid);
|
||||||
unsigned long nr_eligible);
|
|
||||||
|
|
||||||
#ifndef CONFIG_MMU
|
#ifndef CONFIG_MMU
|
||||||
#define randomize_va_space 0
|
#define randomize_va_space 0
|
||||||
|
@ -20,6 +20,9 @@ struct shrink_control {
|
|||||||
|
|
||||||
/* current node being shrunk (for NUMA aware shrinkers) */
|
/* current node being shrunk (for NUMA aware shrinkers) */
|
||||||
int nid;
|
int nid;
|
||||||
|
|
||||||
|
/* current memcg being shrunk (for memcg aware shrinkers) */
|
||||||
|
struct mem_cgroup *memcg;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define SHRINK_STOP (~0UL)
|
#define SHRINK_STOP (~0UL)
|
||||||
@ -61,7 +64,8 @@ struct shrinker {
|
|||||||
#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
|
#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
|
||||||
|
|
||||||
/* Flags */
|
/* Flags */
|
||||||
#define SHRINKER_NUMA_AWARE (1 << 0)
|
#define SHRINKER_NUMA_AWARE (1 << 0)
|
||||||
|
#define SHRINKER_MEMCG_AWARE (1 << 1)
|
||||||
|
|
||||||
extern int register_shrinker(struct shrinker *);
|
extern int register_shrinker(struct shrinker *);
|
||||||
extern void unregister_shrinker(struct shrinker *);
|
extern void unregister_shrinker(struct shrinker *);
|
||||||
|
@ -352,7 +352,7 @@ struct mem_cgroup {
|
|||||||
};
|
};
|
||||||
|
|
||||||
#ifdef CONFIG_MEMCG_KMEM
|
#ifdef CONFIG_MEMCG_KMEM
|
||||||
static bool memcg_kmem_is_active(struct mem_cgroup *memcg)
|
bool memcg_kmem_is_active(struct mem_cgroup *memcg)
|
||||||
{
|
{
|
||||||
return memcg->kmemcg_id >= 0;
|
return memcg->kmemcg_id >= 0;
|
||||||
}
|
}
|
||||||
|
@ -242,15 +242,8 @@ void shake_page(struct page *p, int access)
|
|||||||
* Only call shrink_node_slabs here (which would also shrink
|
* Only call shrink_node_slabs here (which would also shrink
|
||||||
* other caches) if access is not potentially fatal.
|
* other caches) if access is not potentially fatal.
|
||||||
*/
|
*/
|
||||||
if (access) {
|
if (access)
|
||||||
int nr;
|
drop_slab_node(page_to_nid(p));
|
||||||
int nid = page_to_nid(p);
|
|
||||||
do {
|
|
||||||
nr = shrink_node_slabs(GFP_KERNEL, nid, 1000, 1000);
|
|
||||||
if (page_count(p) == 1)
|
|
||||||
break;
|
|
||||||
} while (nr > 10);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(shake_page);
|
EXPORT_SYMBOL_GPL(shake_page);
|
||||||
|
|
||||||
|
83
mm/vmscan.c
83
mm/vmscan.c
@ -232,10 +232,10 @@ EXPORT_SYMBOL(unregister_shrinker);
|
|||||||
|
|
||||||
#define SHRINK_BATCH 128
|
#define SHRINK_BATCH 128
|
||||||
|
|
||||||
static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
|
static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
|
||||||
struct shrinker *shrinker,
|
struct shrinker *shrinker,
|
||||||
unsigned long nr_scanned,
|
unsigned long nr_scanned,
|
||||||
unsigned long nr_eligible)
|
unsigned long nr_eligible)
|
||||||
{
|
{
|
||||||
unsigned long freed = 0;
|
unsigned long freed = 0;
|
||||||
unsigned long long delta;
|
unsigned long long delta;
|
||||||
@ -344,9 +344,10 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* shrink_node_slabs - shrink slab caches of a given node
|
* shrink_slab - shrink slab caches
|
||||||
* @gfp_mask: allocation context
|
* @gfp_mask: allocation context
|
||||||
* @nid: node whose slab caches to target
|
* @nid: node whose slab caches to target
|
||||||
|
* @memcg: memory cgroup whose slab caches to target
|
||||||
* @nr_scanned: pressure numerator
|
* @nr_scanned: pressure numerator
|
||||||
* @nr_eligible: pressure denominator
|
* @nr_eligible: pressure denominator
|
||||||
*
|
*
|
||||||
@ -355,6 +356,12 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
|
|||||||
* @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set,
|
* @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set,
|
||||||
* unaware shrinkers will receive a node id of 0 instead.
|
* unaware shrinkers will receive a node id of 0 instead.
|
||||||
*
|
*
|
||||||
|
* @memcg specifies the memory cgroup to target. If it is not NULL,
|
||||||
|
* only shrinkers with SHRINKER_MEMCG_AWARE set will be called to scan
|
||||||
|
* objects from the memory cgroup specified. Otherwise all shrinkers
|
||||||
|
* are called, and memcg aware shrinkers are supposed to scan the
|
||||||
|
* global list then.
|
||||||
|
*
|
||||||
* @nr_scanned and @nr_eligible form a ratio that indicate how much of
|
* @nr_scanned and @nr_eligible form a ratio that indicate how much of
|
||||||
* the available objects should be scanned. Page reclaim for example
|
* the available objects should be scanned. Page reclaim for example
|
||||||
* passes the number of pages scanned and the number of pages on the
|
* passes the number of pages scanned and the number of pages on the
|
||||||
@ -365,13 +372,17 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
|
|||||||
*
|
*
|
||||||
* Returns the number of reclaimed slab objects.
|
* Returns the number of reclaimed slab objects.
|
||||||
*/
|
*/
|
||||||
unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
|
static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
|
||||||
unsigned long nr_scanned,
|
struct mem_cgroup *memcg,
|
||||||
unsigned long nr_eligible)
|
unsigned long nr_scanned,
|
||||||
|
unsigned long nr_eligible)
|
||||||
{
|
{
|
||||||
struct shrinker *shrinker;
|
struct shrinker *shrinker;
|
||||||
unsigned long freed = 0;
|
unsigned long freed = 0;
|
||||||
|
|
||||||
|
if (memcg && !memcg_kmem_is_active(memcg))
|
||||||
|
return 0;
|
||||||
|
|
||||||
if (nr_scanned == 0)
|
if (nr_scanned == 0)
|
||||||
nr_scanned = SWAP_CLUSTER_MAX;
|
nr_scanned = SWAP_CLUSTER_MAX;
|
||||||
|
|
||||||
@ -390,12 +401,16 @@ unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
|
|||||||
struct shrink_control sc = {
|
struct shrink_control sc = {
|
||||||
.gfp_mask = gfp_mask,
|
.gfp_mask = gfp_mask,
|
||||||
.nid = nid,
|
.nid = nid,
|
||||||
|
.memcg = memcg,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (memcg && !(shrinker->flags & SHRINKER_MEMCG_AWARE))
|
||||||
|
continue;
|
||||||
|
|
||||||
if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
|
if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
|
||||||
sc.nid = 0;
|
sc.nid = 0;
|
||||||
|
|
||||||
freed += shrink_slabs(&sc, shrinker, nr_scanned, nr_eligible);
|
freed += do_shrink_slab(&sc, shrinker, nr_scanned, nr_eligible);
|
||||||
}
|
}
|
||||||
|
|
||||||
up_read(&shrinker_rwsem);
|
up_read(&shrinker_rwsem);
|
||||||
@ -404,6 +419,29 @@ out:
|
|||||||
return freed;
|
return freed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void drop_slab_node(int nid)
|
||||||
|
{
|
||||||
|
unsigned long freed;
|
||||||
|
|
||||||
|
do {
|
||||||
|
struct mem_cgroup *memcg = NULL;
|
||||||
|
|
||||||
|
freed = 0;
|
||||||
|
do {
|
||||||
|
freed += shrink_slab(GFP_KERNEL, nid, memcg,
|
||||||
|
1000, 1000);
|
||||||
|
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
|
||||||
|
} while (freed > 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
void drop_slab(void)
|
||||||
|
{
|
||||||
|
int nid;
|
||||||
|
|
||||||
|
for_each_online_node(nid)
|
||||||
|
drop_slab_node(nid);
|
||||||
|
}
|
||||||
|
|
||||||
static inline int is_page_cache_freeable(struct page *page)
|
static inline int is_page_cache_freeable(struct page *page)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@ -2276,6 +2314,7 @@ static inline bool should_continue_reclaim(struct zone *zone,
|
|||||||
static bool shrink_zone(struct zone *zone, struct scan_control *sc,
|
static bool shrink_zone(struct zone *zone, struct scan_control *sc,
|
||||||
bool is_classzone)
|
bool is_classzone)
|
||||||
{
|
{
|
||||||
|
struct reclaim_state *reclaim_state = current->reclaim_state;
|
||||||
unsigned long nr_reclaimed, nr_scanned;
|
unsigned long nr_reclaimed, nr_scanned;
|
||||||
bool reclaimable = false;
|
bool reclaimable = false;
|
||||||
|
|
||||||
@ -2294,6 +2333,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
|
|||||||
memcg = mem_cgroup_iter(root, NULL, &reclaim);
|
memcg = mem_cgroup_iter(root, NULL, &reclaim);
|
||||||
do {
|
do {
|
||||||
unsigned long lru_pages;
|
unsigned long lru_pages;
|
||||||
|
unsigned long scanned;
|
||||||
struct lruvec *lruvec;
|
struct lruvec *lruvec;
|
||||||
int swappiness;
|
int swappiness;
|
||||||
|
|
||||||
@ -2305,10 +2345,16 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
|
|||||||
|
|
||||||
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
|
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
|
||||||
swappiness = mem_cgroup_swappiness(memcg);
|
swappiness = mem_cgroup_swappiness(memcg);
|
||||||
|
scanned = sc->nr_scanned;
|
||||||
|
|
||||||
shrink_lruvec(lruvec, swappiness, sc, &lru_pages);
|
shrink_lruvec(lruvec, swappiness, sc, &lru_pages);
|
||||||
zone_lru_pages += lru_pages;
|
zone_lru_pages += lru_pages;
|
||||||
|
|
||||||
|
if (memcg && is_classzone)
|
||||||
|
shrink_slab(sc->gfp_mask, zone_to_nid(zone),
|
||||||
|
memcg, sc->nr_scanned - scanned,
|
||||||
|
lru_pages);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Direct reclaim and kswapd have to scan all memory
|
* Direct reclaim and kswapd have to scan all memory
|
||||||
* cgroups to fulfill the overall scan target for the
|
* cgroups to fulfill the overall scan target for the
|
||||||
@ -2330,19 +2376,14 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
|
|||||||
* Shrink the slab caches in the same proportion that
|
* Shrink the slab caches in the same proportion that
|
||||||
* the eligible LRU pages were scanned.
|
* the eligible LRU pages were scanned.
|
||||||
*/
|
*/
|
||||||
if (global_reclaim(sc) && is_classzone) {
|
if (global_reclaim(sc) && is_classzone)
|
||||||
struct reclaim_state *reclaim_state;
|
shrink_slab(sc->gfp_mask, zone_to_nid(zone), NULL,
|
||||||
|
sc->nr_scanned - nr_scanned,
|
||||||
|
zone_lru_pages);
|
||||||
|
|
||||||
shrink_node_slabs(sc->gfp_mask, zone_to_nid(zone),
|
if (reclaim_state) {
|
||||||
sc->nr_scanned - nr_scanned,
|
sc->nr_reclaimed += reclaim_state->reclaimed_slab;
|
||||||
zone_lru_pages);
|
reclaim_state->reclaimed_slab = 0;
|
||||||
|
|
||||||
reclaim_state = current->reclaim_state;
|
|
||||||
if (reclaim_state) {
|
|
||||||
sc->nr_reclaimed +=
|
|
||||||
reclaim_state->reclaimed_slab;
|
|
||||||
reclaim_state->reclaimed_slab = 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
|
vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user