2012-07-07 00:25:11 +04:00
# ifndef MM_SLAB_H
# define MM_SLAB_H
/*
* Internal slab definitions
*/
/*
* State of the slab allocator .
*
* This is used to describe the states of the allocator during bootup .
* Allocators use this to gradually bootstrap themselves . Most allocators
* have the problem that the structures used for managing slab caches are
* allocated from slab caches themselves .
*/
enum slab_state {
DOWN , /* No slab functionality yet */
PARTIAL , /* SLUB: kmem_cache_node available */
PARTIAL_ARRAYCACHE , /* SLAB: kmalloc size for arraycache available */
2013-01-10 23:14:19 +04:00
PARTIAL_NODE , /* SLAB: kmalloc size for node struct available */
2012-07-07 00:25:11 +04:00
UP , /* Slab caches usable but not all extras yet */
FULL /* Everything is working */
} ;
extern enum slab_state slab_state ;
2012-07-07 00:25:12 +04:00
/* The slab cache mutex protects the management structures during changes */
extern struct mutex slab_mutex ;
2012-09-05 04:20:33 +04:00
/* The list of all slab caches on the system */
2012-07-07 00:25:12 +04:00
extern struct list_head slab_caches ;
2012-09-05 04:20:33 +04:00
/* The slab cache that manages slab cache information */
extern struct kmem_cache * kmem_cache ;
2012-11-28 20:23:16 +04:00
unsigned long calculate_alignment ( unsigned long flags ,
unsigned long align , unsigned long size ) ;
2013-01-10 23:12:17 +04:00
# ifndef CONFIG_SLOB
/* Kmalloc array related functions */
void create_kmalloc_caches ( unsigned long ) ;
2013-01-10 23:14:19 +04:00
/* Find the kmalloc slab corresponding for a certain size */
struct kmem_cache * kmalloc_slab ( size_t , gfp_t ) ;
2013-01-10 23:12:17 +04:00
# endif
2012-09-05 04:20:33 +04:00
/* Functions provided by the slab allocators */
2012-09-05 03:18:33 +04:00
extern int __kmem_cache_create ( struct kmem_cache * , unsigned long flags ) ;
2012-07-07 00:25:11 +04:00
2012-11-28 20:23:07 +04:00
extern struct kmem_cache * create_kmalloc_cache ( const char * name , size_t size ,
unsigned long flags ) ;
extern void create_boot_cache ( struct kmem_cache * , const char * name ,
size_t size , unsigned long flags ) ;
2012-12-19 02:22:34 +04:00
struct mem_cgroup ;
2012-09-05 04:18:32 +04:00
# ifdef CONFIG_SLUB
2012-12-19 02:22:34 +04:00
struct kmem_cache *
memcg, slab: never try to merge memcg caches
When a kmem cache is created (kmem_cache_create_memcg()), we first try to
find a compatible cache that already exists and can handle requests from
the new cache, i.e. has the same object size, alignment, ctor, etc. If
there is such a cache, we do not create any new caches, instead we simply
increment the refcount of the cache found and return it.
Currently we do this procedure not only when creating root caches, but
also for memcg caches. However, there is no point in that, because, as
every memcg cache has exactly the same parameters as its parent and cache
merging cannot be turned off in runtime (only on boot by passing
"slub_nomerge"), the root caches of any two potentially mergeable memcg
caches should be merged already, i.e. it must be the same root cache, and
therefore we couldn't even get to the memcg cache creation, because it
already exists.
The only exception is boot caches - they are explicitly forbidden to be
merged by setting their refcount to -1. There are currently only two of
them - kmem_cache and kmem_cache_node, which are used in slab internals (I
do not count kmalloc caches as their refcount is set to 1 immediately
after creation). Since they are prevented from merging preliminary I
guess we should avoid to merge their children too.
So let's remove the useless code responsible for merging memcg caches.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Glauber Costa <glommer@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-04-08 02:39:23 +04:00
__kmem_cache_alias ( const char * name , size_t size , size_t align ,
unsigned long flags , void ( * ctor ) ( void * ) ) ;
2012-09-05 04:18:32 +04:00
# else
2012-12-19 02:22:34 +04:00
static inline struct kmem_cache *
memcg, slab: never try to merge memcg caches
When a kmem cache is created (kmem_cache_create_memcg()), we first try to
find a compatible cache that already exists and can handle requests from
the new cache, i.e. has the same object size, alignment, ctor, etc. If
there is such a cache, we do not create any new caches, instead we simply
increment the refcount of the cache found and return it.
Currently we do this procedure not only when creating root caches, but
also for memcg caches. However, there is no point in that, because, as
every memcg cache has exactly the same parameters as its parent and cache
merging cannot be turned off in runtime (only on boot by passing
"slub_nomerge"), the root caches of any two potentially mergeable memcg
caches should be merged already, i.e. it must be the same root cache, and
therefore we couldn't even get to the memcg cache creation, because it
already exists.
The only exception is boot caches - they are explicitly forbidden to be
merged by setting their refcount to -1. There are currently only two of
them - kmem_cache and kmem_cache_node, which are used in slab internals (I
do not count kmalloc caches as their refcount is set to 1 immediately
after creation). Since they are prevented from merging preliminary I
guess we should avoid to merge their children too.
So let's remove the useless code responsible for merging memcg caches.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Glauber Costa <glommer@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-04-08 02:39:23 +04:00
__kmem_cache_alias ( const char * name , size_t size , size_t align ,
unsigned long flags , void ( * ctor ) ( void * ) )
2012-09-05 04:18:32 +04:00
{ return NULL ; }
# endif
2012-10-17 15:36:51 +04:00
/* Legal flag mask for kmem_cache_create(), for various configurations */
# define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \
SLAB_DESTROY_BY_RCU | SLAB_DEBUG_OBJECTS )
# if defined(CONFIG_DEBUG_SLAB)
# define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
# elif defined(CONFIG_SLUB_DEBUG)
# define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
SLAB_TRACE | SLAB_DEBUG_FREE )
# else
# define SLAB_DEBUG_FLAGS (0)
# endif
# if defined(CONFIG_SLAB)
# define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \
SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | SLAB_NOTRACK )
# elif defined(CONFIG_SLUB)
# define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
SLAB_TEMPORARY | SLAB_NOTRACK )
# else
# define SLAB_CACHE_FLAGS (0)
# endif
# define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
2012-09-05 03:18:33 +04:00
int __kmem_cache_shutdown ( struct kmem_cache * ) ;
slab: get_online_mems for kmem_cache_{create,destroy,shrink}
When we create a sl[au]b cache, we allocate kmem_cache_node structures
for each online NUMA node. To handle nodes taken online/offline, we
register memory hotplug notifier and allocate/free kmem_cache_node
corresponding to the node that changes its state for each kmem cache.
To synchronize between the two paths we hold the slab_mutex during both
the cache creationg/destruction path and while tuning per-node parts of
kmem caches in memory hotplug handler, but that's not quite right,
because it does not guarantee that a newly created cache will have all
kmem_cache_nodes initialized in case it races with memory hotplug. For
instance, in case of slub:
CPU0 CPU1
---- ----
kmem_cache_create: online_pages:
__kmem_cache_create: slab_memory_callback:
slab_mem_going_online_callback:
lock slab_mutex
for each slab_caches list entry
allocate kmem_cache node
unlock slab_mutex
lock slab_mutex
init_kmem_cache_nodes:
for_each_node_state(node, N_NORMAL_MEMORY)
allocate kmem_cache node
add kmem_cache to slab_caches list
unlock slab_mutex
online_pages (continued):
node_states_set_node
As a result we'll get a kmem cache with not all kmem_cache_nodes
allocated.
To avoid issues like that we should hold get/put_online_mems() during
the whole kmem cache creation/destruction/shrink paths, just like we
deal with cpu hotplug. This patch does the trick.
Note, that after it's applied, there is no need in taking the slab_mutex
for kmem_cache_shrink any more, so it is removed from there.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Jiang Liu <liuj97@gmail.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-06-05 03:07:20 +04:00
int __kmem_cache_shrink ( struct kmem_cache * ) ;
2014-05-06 23:50:08 +04:00
void slab_kmem_cache_release ( struct kmem_cache * ) ;
2012-09-05 03:18:33 +04:00
2012-10-19 18:20:25 +04:00
struct seq_file ;
struct file ;
2012-10-19 18:20:27 +04:00
struct slabinfo {
unsigned long active_objs ;
unsigned long num_objs ;
unsigned long active_slabs ;
unsigned long num_slabs ;
unsigned long shared_avail ;
unsigned int limit ;
unsigned int batchcount ;
unsigned int shared ;
unsigned int objects_per_slab ;
unsigned int cache_order ;
} ;
void get_slabinfo ( struct kmem_cache * s , struct slabinfo * sinfo ) ;
void slabinfo_show_stats ( struct seq_file * m , struct kmem_cache * s ) ;
2012-10-19 18:20:25 +04:00
ssize_t slabinfo_write ( struct file * file , const char __user * buffer ,
size_t count , loff_t * ppos ) ;
2012-12-19 02:22:27 +04:00
# ifdef CONFIG_MEMCG_KMEM
static inline bool is_root_cache ( struct kmem_cache * s )
{
return ! s - > memcg_params | | s - > memcg_params - > is_root_cache ;
}
2012-12-19 02:22:34 +04:00
2012-12-19 02:22:46 +04:00
static inline bool slab_equal_or_root ( struct kmem_cache * s ,
struct kmem_cache * p )
{
return ( p = = s ) | |
( s - > memcg_params & & ( p = = s - > memcg_params - > root_cache ) ) ;
}
2012-12-19 02:23:01 +04:00
/*
* We use suffixes to the name in memcg because we can ' t have caches
* created in the system with the same name . But when we print them
* locally , better refer to them with the base name
*/
static inline const char * cache_name ( struct kmem_cache * s )
{
if ( ! is_root_cache ( s ) )
return s - > memcg_params - > root_cache - > name ;
return s - > name ;
}
2014-01-24 03:53:06 +04:00
/*
* Note , we protect with RCU only the memcg_caches array , not per - memcg caches .
* That said the caller must assure the memcg ' s cache won ' t go away . Since once
* created a memcg ' s cache is destroyed only along with the root cache , it is
* true if we are going to allocate from the cache or hold a reference to the
* root cache by other means . Otherwise , we should hold either the slab_mutex
* or the memcg ' s slab_caches_mutex while calling this function and accessing
* the returned value .
*/
2013-11-13 03:08:23 +04:00
static inline struct kmem_cache *
cache_from_memcg_idx ( struct kmem_cache * s , int idx )
2012-12-19 02:23:01 +04:00
{
2014-01-24 03:52:59 +04:00
struct kmem_cache * cachep ;
2014-01-24 03:53:06 +04:00
struct memcg_cache_params * params ;
2014-01-24 03:52:59 +04:00
2013-08-29 03:35:20 +04:00
if ( ! s - > memcg_params )
return NULL ;
2014-01-24 03:53:06 +04:00
rcu_read_lock ( ) ;
params = rcu_dereference ( s - > memcg_params ) ;
cachep = params - > memcg_caches [ idx ] ;
rcu_read_unlock ( ) ;
2014-01-24 03:52:59 +04:00
/*
* Make sure we will access the up - to - date value . The code updating
* memcg_caches issues a write barrier to match this ( see
* memcg_register_cache ( ) ) .
*/
smp_read_barrier_depends ( ) ;
return cachep ;
2012-12-19 02:23:01 +04:00
}
2012-12-19 02:23:03 +04:00
static inline struct kmem_cache * memcg_root_cache ( struct kmem_cache * s )
{
if ( is_root_cache ( s ) )
return s ;
return s - > memcg_params - > root_cache ;
}
2014-06-05 03:06:38 +04:00
static __always_inline int memcg_charge_slab ( struct kmem_cache * s ,
gfp_t gfp , int order )
{
if ( ! memcg_kmem_enabled ( ) )
return 0 ;
if ( is_root_cache ( s ) )
return 0 ;
2014-06-05 03:07:39 +04:00
return __memcg_charge_slab ( s , gfp , order ) ;
2014-06-05 03:06:38 +04:00
}
static __always_inline void memcg_uncharge_slab ( struct kmem_cache * s , int order )
{
if ( ! memcg_kmem_enabled ( ) )
return ;
if ( is_root_cache ( s ) )
return ;
2014-06-05 03:07:39 +04:00
__memcg_uncharge_slab ( s , order ) ;
2014-06-05 03:06:38 +04:00
}
2012-12-19 02:22:27 +04:00
# else
static inline bool is_root_cache ( struct kmem_cache * s )
{
return true ;
}
2012-12-19 02:22:46 +04:00
static inline bool slab_equal_or_root ( struct kmem_cache * s ,
struct kmem_cache * p )
{
return true ;
}
2012-12-19 02:23:01 +04:00
static inline const char * cache_name ( struct kmem_cache * s )
{
return s - > name ;
}
2013-11-13 03:08:23 +04:00
static inline struct kmem_cache *
cache_from_memcg_idx ( struct kmem_cache * s , int idx )
2012-12-19 02:23:01 +04:00
{
return NULL ;
}
2012-12-19 02:23:03 +04:00
static inline struct kmem_cache * memcg_root_cache ( struct kmem_cache * s )
{
return s ;
}
2014-06-05 03:06:38 +04:00
static inline int memcg_charge_slab ( struct kmem_cache * s , gfp_t gfp , int order )
{
return 0 ;
}
static inline void memcg_uncharge_slab ( struct kmem_cache * s , int order )
{
}
2012-12-19 02:22:27 +04:00
# endif
2012-12-19 02:22:46 +04:00
static inline struct kmem_cache * cache_from_obj ( struct kmem_cache * s , void * x )
{
struct kmem_cache * cachep ;
struct page * page ;
/*
* When kmemcg is not being used , both assignments should return the
* same value . but we don ' t want to pay the assignment price in that
* case . If it is not compiled in , the compiler should be smart enough
* to not do even the assignment . In that case , slab_equal_or_root
* will also be a constant .
*/
if ( ! memcg_kmem_enabled ( ) & & ! unlikely ( s - > flags & SLAB_DEBUG_FREE ) )
return s ;
page = virt_to_head_page ( x ) ;
cachep = page - > slab_cache ;
if ( slab_equal_or_root ( cachep , s ) )
return cachep ;
pr_err ( " %s: Wrong slab cache. %s but object is from %s \n " ,
__FUNCTION__ , cachep - > name , s - > name ) ;
WARN_ON_ONCE ( 1 ) ;
return s ;
}
2012-07-07 00:25:11 +04:00
# endif
2013-01-10 23:14:19 +04:00
/*
* The slab lists for all objects .
*/
struct kmem_cache_node {
spinlock_t list_lock ;
# ifdef CONFIG_SLAB
struct list_head slabs_partial ; /* partial list first, better asm code */
struct list_head slabs_full ;
struct list_head slabs_free ;
unsigned long free_objects ;
unsigned int free_limit ;
unsigned int colour_next ; /* Per-node cache coloring */
struct array_cache * shared ; /* shared per node */
struct array_cache * * alien ; /* on other nodes */
unsigned long next_reap ; /* updated without locking */
int free_touched ; /* updated without locking */
# endif
# ifdef CONFIG_SLUB
unsigned long nr_partial ;
struct list_head partial ;
# ifdef CONFIG_SLUB_DEBUG
atomic_long_t nr_slabs ;
atomic_long_t total_objects ;
struct list_head full ;
# endif
# endif
} ;
2013-07-04 04:33:23 +04:00
2013-07-08 04:08:28 +04:00
void * slab_next ( struct seq_file * m , void * p , loff_t * pos ) ;
void slab_stop ( struct seq_file * m , void * p ) ;