2012-07-07 00:25:10 +04:00
/*
* Slab allocator functions that are independent of the allocator strategy
*
* ( C ) 2012 Christoph Lameter < cl @ linux . com >
*/
# include <linux/slab.h>
# include <linux/mm.h>
# include <linux/poison.h>
# include <linux/interrupt.h>
# include <linux/memory.h>
# include <linux/compiler.h>
# include <linux/module.h>
2012-07-07 00:25:13 +04:00
# include <linux/cpu.h>
# include <linux/uaccess.h>
2012-10-19 18:20:25 +04:00
# include <linux/seq_file.h>
# include <linux/proc_fs.h>
2012-07-07 00:25:10 +04:00
# include <asm/cacheflush.h>
# include <asm/tlbflush.h>
# include <asm/page.h>
2012-12-19 02:22:34 +04:00
# include <linux/memcontrol.h>
2012-07-07 00:25:10 +04:00
2012-07-07 00:25:11 +04:00
# include "slab.h"
enum slab_state slab_state ;
2012-07-07 00:25:12 +04:00
LIST_HEAD ( slab_caches ) ;
DEFINE_MUTEX ( slab_mutex ) ;
2012-09-05 04:20:33 +04:00
struct kmem_cache * kmem_cache ;
2012-07-07 00:25:11 +04:00
2012-08-16 11:09:46 +04:00
# ifdef CONFIG_DEBUG_VM
2012-12-19 02:22:34 +04:00
static int kmem_cache_sanity_check ( struct mem_cgroup * memcg , const char * name ,
size_t size )
2012-07-07 00:25:10 +04:00
{
struct kmem_cache * s = NULL ;
if ( ! name | | in_interrupt ( ) | | size < sizeof ( void * ) | |
size > KMALLOC_MAX_SIZE ) {
2012-08-16 11:09:46 +04:00
pr_err ( " kmem_cache_create(%s) integrity check failed \n " , name ) ;
return - EINVAL ;
2012-07-07 00:25:10 +04:00
}
2012-08-16 11:12:18 +04:00
2012-07-07 00:25:13 +04:00
list_for_each_entry ( s , & slab_caches , list ) {
char tmp ;
int res ;
/*
* This happens when the module gets unloaded and doesn ' t
* destroy its slab cache and no - one else reuses the vmalloc
* area of the module . Print a warning .
*/
res = probe_kernel_address ( s - > name , tmp ) ;
if ( res ) {
2012-08-16 11:09:46 +04:00
pr_err ( " Slab cache with size %d has lost its name \n " ,
2012-07-07 00:25:13 +04:00
s - > object_size ) ;
continue ;
}
2012-12-19 02:22:34 +04:00
/*
* For simplicity , we won ' t check this in the list of memcg
* caches . We have control over memcg naming , and if there
* aren ' t duplicates in the global list , there won ' t be any
* duplicates in the memcg lists as well .
*/
if ( ! memcg & & ! strcmp ( s - > name , name ) ) {
2012-08-16 11:09:46 +04:00
pr_err ( " %s (%s): Cache name already exists. \n " ,
__func__ , name ) ;
2012-07-07 00:25:13 +04:00
dump_stack ( ) ;
s = NULL ;
2012-08-16 11:09:46 +04:00
return - EINVAL ;
2012-07-07 00:25:13 +04:00
}
}
WARN_ON ( strchr ( name , ' ' ) ) ; /* It confuses parsers */
2012-08-16 11:09:46 +04:00
return 0 ;
}
# else
2012-12-19 02:22:34 +04:00
static inline int kmem_cache_sanity_check ( struct mem_cgroup * memcg ,
const char * name , size_t size )
2012-08-16 11:09:46 +04:00
{
return 0 ;
}
2012-07-07 00:25:13 +04:00
# endif
memcg: allocate memory for memcg caches whenever a new memcg appears
Every cache that is considered a root cache (basically the "original"
caches, tied to the root memcg/no-memcg) will have an array that should be
large enough to store a cache pointer per each memcg in the system.
Theoreticaly, this is as high as 1 << sizeof(css_id), which is currently
in the 64k pointers range. Most of the time, we won't be using that much.
What goes in this patch, is a simple scheme to dynamically allocate such
an array, in order to minimize memory usage for memcg caches. Because we
would also like to avoid allocations all the time, at least for now, the
array will only grow. It will tend to be big enough to hold the maximum
number of kmem-limited memcgs ever achieved.
We'll allocate it to be a minimum of 64 kmem-limited memcgs. When we have
more than that, we'll start doubling the size of this array every time the
limit is reached.
Because we are only considering kmem limited memcgs, a natural point for
this to happen is when we write to the limit. At that point, we already
have set_limit_mutex held, so that will become our natural synchronization
mechanism.
Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-12-19 02:22:38 +04:00
# ifdef CONFIG_MEMCG_KMEM
int memcg_update_all_caches ( int num_memcgs )
{
struct kmem_cache * s ;
int ret = 0 ;
mutex_lock ( & slab_mutex ) ;
list_for_each_entry ( s , & slab_caches , list ) {
if ( ! is_root_cache ( s ) )
continue ;
ret = memcg_update_cache_size ( s , num_memcgs ) ;
/*
* See comment in memcontrol . c , memcg_update_cache_size :
* Instead of freeing the memory , we ' ll just leave the caches
* up to this point in an updated state .
*/
if ( ret )
goto out ;
}
memcg_update_array_size ( num_memcgs ) ;
out :
mutex_unlock ( & slab_mutex ) ;
return ret ;
}
# endif
2012-11-28 20:23:16 +04:00
/*
* Figure out what the alignment of the objects will be given a set of
* flags , a user specified alignment and the size of the objects .
*/
unsigned long calculate_alignment ( unsigned long flags ,
unsigned long align , unsigned long size )
{
/*
* If the user wants hardware cache aligned objects then follow that
* suggestion if the object is sufficiently large .
*
* The hardware cache alignment cannot override the specified
* alignment though . If that is greater then use it .
*/
if ( flags & SLAB_HWCACHE_ALIGN ) {
unsigned long ralign = cache_line_size ( ) ;
while ( size < = ralign / 2 )
ralign / = 2 ;
align = max ( align , ralign ) ;
}
if ( align < ARCH_SLAB_MINALIGN )
align = ARCH_SLAB_MINALIGN ;
return ALIGN ( align , sizeof ( void * ) ) ;
}
2012-08-16 11:09:46 +04:00
/*
* kmem_cache_create - Create a cache .
* @ name : A string which is used in / proc / slabinfo to identify this cache .
* @ size : The size of objects to be created in this cache .
* @ align : The required alignment for the objects .
* @ flags : SLAB flags
* @ ctor : A constructor for the objects .
*
* Returns a ptr to the cache on success , NULL on failure .
* Cannot be called within a interrupt , but can be interrupted .
* The @ ctor is run when new pages are allocated by the cache .
*
* The flags are
*
* % SLAB_POISON - Poison the slab with a known test pattern ( a5a5a5a5 )
* to catch references to uninitialised memory .
*
* % SLAB_RED_ZONE - Insert ` Red ' zones around the allocated memory to check
* for buffer overruns .
*
* % SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
* cacheline . This can be beneficial if you ' re counting cycles as closely
* as davem .
*/
2012-12-19 02:22:34 +04:00
struct kmem_cache *
kmem_cache_create_memcg ( struct mem_cgroup * memcg , const char * name , size_t size ,
2012-12-19 02:23:03 +04:00
size_t align , unsigned long flags , void ( * ctor ) ( void * ) ,
struct kmem_cache * parent_cache )
2012-08-16 11:09:46 +04:00
{
struct kmem_cache * s = NULL ;
2012-09-05 04:20:33 +04:00
int err = 0 ;
2012-07-07 00:25:10 +04:00
2012-08-16 11:09:46 +04:00
get_online_cpus ( ) ;
mutex_lock ( & slab_mutex ) ;
2012-09-05 04:20:33 +04:00
2012-12-19 02:22:34 +04:00
if ( ! kmem_cache_sanity_check ( memcg , name , size ) = = 0 )
2012-09-05 04:20:33 +04:00
goto out_locked ;
2012-10-17 15:36:51 +04:00
/*
* Some allocators will constraint the set of valid flags to a subset
* of all flags . We expect them to define CACHE_CREATE_MASK in this
* case , and we ' ll just provide them with a sanitized version of the
* passed flags .
*/
flags & = CACHE_CREATE_MASK ;
2012-09-05 04:20:33 +04:00
2012-12-19 02:22:34 +04:00
s = __kmem_cache_alias ( memcg , name , size , align , flags , ctor ) ;
2012-09-05 04:18:32 +04:00
if ( s )
goto out_locked ;
2012-09-05 04:20:34 +04:00
s = kmem_cache_zalloc ( kmem_cache , GFP_KERNEL ) ;
2012-09-05 03:18:33 +04:00
if ( s ) {
2012-09-05 03:18:33 +04:00
s - > object_size = s - > size = size ;
2012-11-28 20:23:16 +04:00
s - > align = calculate_alignment ( flags , align , size ) ;
2012-09-05 03:18:33 +04:00
s - > ctor = ctor ;
2012-12-19 02:22:34 +04:00
2012-12-19 02:23:03 +04:00
if ( memcg_register_cache ( memcg , s , parent_cache ) ) {
2012-12-19 02:22:34 +04:00
kmem_cache_free ( kmem_cache , s ) ;
err = - ENOMEM ;
goto out_locked ;
}
2012-09-05 03:18:33 +04:00
s - > name = kstrdup ( name , GFP_KERNEL ) ;
if ( ! s - > name ) {
kmem_cache_free ( kmem_cache , s ) ;
err = - ENOMEM ;
goto out_locked ;
}
err = __kmem_cache_create ( s , flags ) ;
2012-09-05 03:38:33 +04:00
if ( ! err ) {
s - > refcount = 1 ;
2012-09-05 03:18:33 +04:00
list_add ( & s - > list , & slab_caches ) ;
2012-12-19 02:22:34 +04:00
memcg_cache_list_add ( memcg , s ) ;
2012-09-05 03:38:33 +04:00
} else {
2012-09-05 03:18:33 +04:00
kfree ( s - > name ) ;
2012-09-05 04:20:34 +04:00
kmem_cache_free ( kmem_cache , s ) ;
}
2012-09-05 03:18:33 +04:00
} else
2012-09-05 04:20:34 +04:00
err = - ENOMEM ;
2012-09-05 03:38:33 +04:00
2012-09-05 04:20:33 +04:00
out_locked :
2012-07-07 00:25:13 +04:00
mutex_unlock ( & slab_mutex ) ;
put_online_cpus ( ) ;
2012-09-05 04:20:33 +04:00
if ( err ) {
if ( flags & SLAB_PANIC )
panic ( " kmem_cache_create: Failed to create slab '%s'. Error %d \n " ,
name , err ) ;
else {
printk ( KERN_WARNING " kmem_cache_create(%s) failed with error %d " ,
name , err ) ;
dump_stack ( ) ;
}
return NULL ;
}
2012-07-07 00:25:10 +04:00
return s ;
}
2012-12-19 02:22:34 +04:00
struct kmem_cache *
kmem_cache_create ( const char * name , size_t size , size_t align ,
unsigned long flags , void ( * ctor ) ( void * ) )
{
2012-12-19 02:23:03 +04:00
return kmem_cache_create_memcg ( NULL , name , size , align , flags , ctor , NULL ) ;
2012-12-19 02:22:34 +04:00
}
2012-07-07 00:25:10 +04:00
EXPORT_SYMBOL ( kmem_cache_create ) ;
2012-07-07 00:25:11 +04:00
2012-09-05 03:18:33 +04:00
void kmem_cache_destroy ( struct kmem_cache * s )
{
2012-12-19 02:22:55 +04:00
/* Destroy all the children caches if we aren't a memcg cache */
kmem_cache_destroy_memcg_children ( s ) ;
2012-09-05 03:18:33 +04:00
get_online_cpus ( ) ;
mutex_lock ( & slab_mutex ) ;
s - > refcount - - ;
if ( ! s - > refcount ) {
list_del ( & s - > list ) ;
if ( ! __kmem_cache_shutdown ( s ) ) {
2012-10-08 11:26:01 +04:00
mutex_unlock ( & slab_mutex ) ;
2012-09-05 03:18:33 +04:00
if ( s - > flags & SLAB_DESTROY_BY_RCU )
rcu_barrier ( ) ;
2012-12-19 02:22:34 +04:00
memcg_release_cache ( s ) ;
2012-09-05 03:18:33 +04:00
kfree ( s - > name ) ;
2012-09-05 04:18:32 +04:00
kmem_cache_free ( kmem_cache , s ) ;
2012-09-05 03:18:33 +04:00
} else {
list_add ( & s - > list , & slab_caches ) ;
2012-10-08 11:26:01 +04:00
mutex_unlock ( & slab_mutex ) ;
2012-09-05 03:18:33 +04:00
printk ( KERN_ERR " kmem_cache_destroy %s: Slab cache still has objects \n " ,
s - > name ) ;
dump_stack ( ) ;
}
2012-10-08 11:26:01 +04:00
} else {
mutex_unlock ( & slab_mutex ) ;
2012-09-05 03:18:33 +04:00
}
put_online_cpus ( ) ;
}
EXPORT_SYMBOL ( kmem_cache_destroy ) ;
2012-07-07 00:25:11 +04:00
int slab_is_available ( void )
{
return slab_state > = UP ;
}
2012-10-19 18:20:25 +04:00
2012-11-28 20:23:07 +04:00
# ifndef CONFIG_SLOB
/* Create a cache during boot when no slab services are available yet */
void __init create_boot_cache ( struct kmem_cache * s , const char * name , size_t size ,
unsigned long flags )
{
int err ;
s - > name = name ;
s - > size = s - > object_size = size ;
2012-11-28 20:23:16 +04:00
s - > align = calculate_alignment ( flags , ARCH_KMALLOC_MINALIGN , size ) ;
2012-11-28 20:23:07 +04:00
err = __kmem_cache_create ( s , flags ) ;
if ( err )
panic ( " Creation of kmalloc slab %s size=%zd failed. Reason %d \n " ,
name , size , err ) ;
s - > refcount = - 1 ; /* Exempt from merging for now */
}
struct kmem_cache * __init create_kmalloc_cache ( const char * name , size_t size ,
unsigned long flags )
{
struct kmem_cache * s = kmem_cache_zalloc ( kmem_cache , GFP_NOWAIT ) ;
if ( ! s )
panic ( " Out of memory when creating slab %s \n " , name ) ;
create_boot_cache ( s , name , size , flags ) ;
list_add ( & s - > list , & slab_caches ) ;
s - > refcount = 1 ;
return s ;
}
# endif /* !CONFIG_SLOB */
2012-10-19 18:20:25 +04:00
# ifdef CONFIG_SLABINFO
2012-12-19 02:23:01 +04:00
void print_slabinfo_header ( struct seq_file * m )
2012-10-19 18:20:26 +04:00
{
/*
* Output format version , so at least we can change it
* without _too_ many complaints .
*/
# ifdef CONFIG_DEBUG_SLAB
seq_puts ( m , " slabinfo - version: 2.1 (statistics) \n " ) ;
# else
seq_puts ( m , " slabinfo - version: 2.1 \n " ) ;
# endif
seq_puts ( m , " # name <active_objs> <num_objs> <objsize> "
" <objperslab> <pagesperslab> " ) ;
seq_puts ( m , " : tunables <limit> <batchcount> <sharedfactor> " ) ;
seq_puts ( m , " : slabdata <active_slabs> <num_slabs> <sharedavail> " ) ;
# ifdef CONFIG_DEBUG_SLAB
seq_puts ( m , " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
" <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow> " ) ;
seq_puts ( m , " : cpustat <allochit> <allocmiss> <freehit> <freemiss> " ) ;
# endif
seq_putc ( m , ' \n ' ) ;
}
2012-10-19 18:20:25 +04:00
static void * s_start ( struct seq_file * m , loff_t * pos )
{
loff_t n = * pos ;
mutex_lock ( & slab_mutex ) ;
if ( ! n )
print_slabinfo_header ( m ) ;
return seq_list_start ( & slab_caches , * pos ) ;
}
static void * s_next ( struct seq_file * m , void * p , loff_t * pos )
{
return seq_list_next ( p , & slab_caches , pos ) ;
}
static void s_stop ( struct seq_file * m , void * p )
{
mutex_unlock ( & slab_mutex ) ;
}
2012-12-19 02:23:01 +04:00
static void
memcg_accumulate_slabinfo ( struct kmem_cache * s , struct slabinfo * info )
{
struct kmem_cache * c ;
struct slabinfo sinfo ;
int i ;
if ( ! is_root_cache ( s ) )
return ;
for_each_memcg_cache_index ( i ) {
c = cache_from_memcg ( s , i ) ;
if ( ! c )
continue ;
memset ( & sinfo , 0 , sizeof ( sinfo ) ) ;
get_slabinfo ( c , & sinfo ) ;
info - > active_slabs + = sinfo . active_slabs ;
info - > num_slabs + = sinfo . num_slabs ;
info - > shared_avail + = sinfo . shared_avail ;
info - > active_objs + = sinfo . active_objs ;
info - > num_objs + = sinfo . num_objs ;
}
}
int cache_show ( struct kmem_cache * s , struct seq_file * m )
2012-10-19 18:20:25 +04:00
{
2012-10-19 18:20:27 +04:00
struct slabinfo sinfo ;
memset ( & sinfo , 0 , sizeof ( sinfo ) ) ;
get_slabinfo ( s , & sinfo ) ;
2012-12-19 02:23:01 +04:00
memcg_accumulate_slabinfo ( s , & sinfo ) ;
2012-10-19 18:20:27 +04:00
seq_printf ( m , " %-17s %6lu %6lu %6u %4u %4d " ,
2012-12-19 02:23:01 +04:00
cache_name ( s ) , sinfo . active_objs , sinfo . num_objs , s - > size ,
2012-10-19 18:20:27 +04:00
sinfo . objects_per_slab , ( 1 < < sinfo . cache_order ) ) ;
seq_printf ( m , " : tunables %4u %4u %4u " ,
sinfo . limit , sinfo . batchcount , sinfo . shared ) ;
seq_printf ( m , " : slabdata %6lu %6lu %6lu " ,
sinfo . active_slabs , sinfo . num_slabs , sinfo . shared_avail ) ;
slabinfo_show_stats ( m , s ) ;
seq_putc ( m , ' \n ' ) ;
return 0 ;
2012-10-19 18:20:25 +04:00
}
2012-12-19 02:23:01 +04:00
static int s_show ( struct seq_file * m , void * p )
{
struct kmem_cache * s = list_entry ( p , struct kmem_cache , list ) ;
if ( ! is_root_cache ( s ) )
return 0 ;
return cache_show ( s , m ) ;
}
2012-10-19 18:20:25 +04:00
/*
* slabinfo_op - iterator that generates / proc / slabinfo
*
* Output layout :
* cache - name
* num - active - objs
* total - objs
* object size
* num - active - slabs
* total - slabs
* num - pages - per - slab
* + further values on SMP and with statistics enabled
*/
static const struct seq_operations slabinfo_op = {
. start = s_start ,
. next = s_next ,
. stop = s_stop ,
. show = s_show ,
} ;
static int slabinfo_open ( struct inode * inode , struct file * file )
{
return seq_open ( file , & slabinfo_op ) ;
}
static const struct file_operations proc_slabinfo_operations = {
. open = slabinfo_open ,
. read = seq_read ,
. write = slabinfo_write ,
. llseek = seq_lseek ,
. release = seq_release ,
} ;
static int __init slab_proc_init ( void )
{
proc_create ( " slabinfo " , S_IRUSR , NULL , & proc_slabinfo_operations ) ;
return 0 ;
}
module_init ( slab_proc_init ) ;
# endif /* CONFIG_SLABINFO */