Merge branch 'slab/for-6.8/slub-hook-cleanups' into slab/for-next
Merge the SLAB allocator removal and a number of subsequent SLUB cleanups and optimizations.
This commit is contained in:
commit
61d7e367f8
12
CREDITS
12
CREDITS
@ -9,10 +9,6 @@
|
||||
Linus
|
||||
----------
|
||||
|
||||
N: Matt Mackal
|
||||
E: mpm@selenic.com
|
||||
D: SLOB slab allocator
|
||||
|
||||
N: Matti Aarnio
|
||||
E: mea@nic.funet.fi
|
||||
D: Alpha systems hacking, IPv6 and other network related stuff
|
||||
@ -1572,6 +1568,10 @@ S: Ampferstr. 50 / 4
|
||||
S: 6020 Innsbruck
|
||||
S: Austria
|
||||
|
||||
N: Mark Hemment
|
||||
E: markhe@nextd.demon.co.uk
|
||||
D: SLAB allocator implementation
|
||||
|
||||
N: Richard Henderson
|
||||
E: rth@twiddle.net
|
||||
E: rth@cygnus.com
|
||||
@ -2437,6 +2437,10 @@ D: work on suspend-to-ram/disk, killing duplicates from ioctl32,
|
||||
D: Altera SoCFPGA and Nokia N900 support.
|
||||
S: Czech Republic
|
||||
|
||||
N: Olivia Mackall
|
||||
E: olivia@selenic.com
|
||||
D: SLOB slab allocator
|
||||
|
||||
N: Paul Mackerras
|
||||
E: paulus@samba.org
|
||||
D: PPP driver
|
||||
|
@ -37,7 +37,7 @@ The Slab Cache
|
||||
.. kernel-doc:: include/linux/slab.h
|
||||
:internal:
|
||||
|
||||
.. kernel-doc:: mm/slab.c
|
||||
.. kernel-doc:: mm/slub.c
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: mm/slab_common.c
|
||||
|
@ -154,7 +154,7 @@ config ARM64
|
||||
select HAVE_MOVE_PUD
|
||||
select HAVE_PCI
|
||||
select HAVE_ACPI_APEI if (ACPI && EFI)
|
||||
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
|
||||
select HAVE_ALIGNED_STRUCT_PAGE
|
||||
select HAVE_ARCH_AUDITSYSCALL
|
||||
select HAVE_ARCH_BITREVERSE
|
||||
select HAVE_ARCH_COMPILER_H
|
||||
|
@ -146,7 +146,7 @@ config S390
|
||||
select GENERIC_TIME_VSYSCALL
|
||||
select GENERIC_VDSO_TIME_NS
|
||||
select GENERIC_IOREMAP if PCI
|
||||
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
|
||||
select HAVE_ALIGNED_STRUCT_PAGE
|
||||
select HAVE_ARCH_AUDITSYSCALL
|
||||
select HAVE_ARCH_JUMP_LABEL
|
||||
select HAVE_ARCH_JUMP_LABEL_RELATIVE
|
||||
|
@ -169,7 +169,7 @@ config X86
|
||||
select HAS_IOPORT
|
||||
select HAVE_ACPI_APEI if ACPI
|
||||
select HAVE_ACPI_APEI_NMI if ACPI
|
||||
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
|
||||
select HAVE_ALIGNED_STRUCT_PAGE
|
||||
select HAVE_ARCH_AUDITSYSCALL
|
||||
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
|
||||
select HAVE_ARCH_HUGE_VMALLOC if X86_64
|
||||
|
@ -108,7 +108,6 @@ enum cpuhp_state {
|
||||
CPUHP_X2APIC_PREPARE,
|
||||
CPUHP_SMPCFD_PREPARE,
|
||||
CPUHP_RELAY_PREPARE,
|
||||
CPUHP_SLAB_PREPARE,
|
||||
CPUHP_MD_RAID5_PREPARE,
|
||||
CPUHP_RCUTREE_PREP,
|
||||
CPUHP_CPUIDLE_COUPLED_PREPARE,
|
||||
|
@ -24,7 +24,7 @@
|
||||
|
||||
/*
|
||||
* Flags to pass to kmem_cache_create().
|
||||
* The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set.
|
||||
* The ones marked DEBUG need CONFIG_SLUB_DEBUG enabled, otherwise are no-op
|
||||
*/
|
||||
/* DEBUG: Perform (expensive) checks on alloc/free */
|
||||
#define SLAB_CONSISTENCY_CHECKS ((slab_flags_t __force)0x00000100U)
|
||||
@ -302,25 +302,15 @@ static inline unsigned int arch_slab_minalign(void)
|
||||
* Kmalloc array related definitions
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SLAB
|
||||
/*
|
||||
* SLAB and SLUB directly allocates requests fitting in to an order-1 page
|
||||
* SLUB directly allocates requests fitting in to an order-1 page
|
||||
* (PAGE_SIZE*2). Larger requests are passed to the page allocator.
|
||||
*/
|
||||
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
|
||||
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT)
|
||||
#ifndef KMALLOC_SHIFT_LOW
|
||||
#define KMALLOC_SHIFT_LOW 5
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SLUB
|
||||
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
|
||||
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT)
|
||||
#ifndef KMALLOC_SHIFT_LOW
|
||||
#define KMALLOC_SHIFT_LOW 3
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Maximum allocatable size */
|
||||
#define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX)
|
||||
@ -788,12 +778,4 @@ size_t kmalloc_size_roundup(size_t size);
|
||||
|
||||
void __init kmem_cache_init_late(void);
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_SLAB)
|
||||
int slab_prepare_cpu(unsigned int cpu);
|
||||
int slab_dead_cpu(unsigned int cpu);
|
||||
#else
|
||||
#define slab_prepare_cpu NULL
|
||||
#define slab_dead_cpu NULL
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_SLAB_H */
|
||||
|
@ -1,124 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_SLAB_DEF_H
|
||||
#define _LINUX_SLAB_DEF_H
|
||||
|
||||
#include <linux/kfence.h>
|
||||
#include <linux/reciprocal_div.h>
|
||||
|
||||
/*
|
||||
* Definitions unique to the original Linux SLAB allocator.
|
||||
*/
|
||||
|
||||
struct kmem_cache {
|
||||
struct array_cache __percpu *cpu_cache;
|
||||
|
||||
/* 1) Cache tunables. Protected by slab_mutex */
|
||||
unsigned int batchcount;
|
||||
unsigned int limit;
|
||||
unsigned int shared;
|
||||
|
||||
unsigned int size;
|
||||
struct reciprocal_value reciprocal_buffer_size;
|
||||
/* 2) touched by every alloc & free from the backend */
|
||||
|
||||
slab_flags_t flags; /* constant flags */
|
||||
unsigned int num; /* # of objs per slab */
|
||||
|
||||
/* 3) cache_grow/shrink */
|
||||
/* order of pgs per slab (2^n) */
|
||||
unsigned int gfporder;
|
||||
|
||||
/* force GFP flags, e.g. GFP_DMA */
|
||||
gfp_t allocflags;
|
||||
|
||||
size_t colour; /* cache colouring range */
|
||||
unsigned int colour_off; /* colour offset */
|
||||
unsigned int freelist_size;
|
||||
|
||||
/* constructor func */
|
||||
void (*ctor)(void *obj);
|
||||
|
||||
/* 4) cache creation/removal */
|
||||
const char *name;
|
||||
struct list_head list;
|
||||
int refcount;
|
||||
int object_size;
|
||||
int align;
|
||||
|
||||
/* 5) statistics */
|
||||
#ifdef CONFIG_DEBUG_SLAB
|
||||
unsigned long num_active;
|
||||
unsigned long num_allocations;
|
||||
unsigned long high_mark;
|
||||
unsigned long grown;
|
||||
unsigned long reaped;
|
||||
unsigned long errors;
|
||||
unsigned long max_freeable;
|
||||
unsigned long node_allocs;
|
||||
unsigned long node_frees;
|
||||
unsigned long node_overflow;
|
||||
atomic_t allochit;
|
||||
atomic_t allocmiss;
|
||||
atomic_t freehit;
|
||||
atomic_t freemiss;
|
||||
|
||||
/*
|
||||
* If debugging is enabled, then the allocator can add additional
|
||||
* fields and/or padding to every object. 'size' contains the total
|
||||
* object size including these internal fields, while 'obj_offset'
|
||||
* and 'object_size' contain the offset to the user object and its
|
||||
* size.
|
||||
*/
|
||||
int obj_offset;
|
||||
#endif /* CONFIG_DEBUG_SLAB */
|
||||
|
||||
#ifdef CONFIG_KASAN_GENERIC
|
||||
struct kasan_cache kasan_info;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SLAB_FREELIST_RANDOM
|
||||
unsigned int *random_seq;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HARDENED_USERCOPY
|
||||
unsigned int useroffset; /* Usercopy region offset */
|
||||
unsigned int usersize; /* Usercopy region size */
|
||||
#endif
|
||||
|
||||
struct kmem_cache_node *node[MAX_NUMNODES];
|
||||
};
|
||||
|
||||
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
|
||||
void *x)
|
||||
{
|
||||
void *object = x - (x - slab->s_mem) % cache->size;
|
||||
void *last_object = slab->s_mem + (cache->num - 1) * cache->size;
|
||||
|
||||
if (unlikely(object > last_object))
|
||||
return last_object;
|
||||
else
|
||||
return object;
|
||||
}
|
||||
|
||||
/*
|
||||
* We want to avoid an expensive divide : (offset / cache->size)
|
||||
* Using the fact that size is a constant for a particular cache,
|
||||
* we can replace (offset / cache->size) by
|
||||
* reciprocal_divide(offset, cache->reciprocal_buffer_size)
|
||||
*/
|
||||
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
|
||||
const struct slab *slab, void *obj)
|
||||
{
|
||||
u32 offset = (obj - slab->s_mem);
|
||||
return reciprocal_divide(offset, cache->reciprocal_buffer_size);
|
||||
}
|
||||
|
||||
static inline int objs_per_slab(const struct kmem_cache *cache,
|
||||
const struct slab *slab)
|
||||
{
|
||||
if (is_kfence_address(slab_address(slab)))
|
||||
return 1;
|
||||
return cache->num;
|
||||
}
|
||||
|
||||
#endif /* _LINUX_SLAB_DEF_H */
|
@ -1,204 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_SLUB_DEF_H
|
||||
#define _LINUX_SLUB_DEF_H
|
||||
|
||||
/*
|
||||
* SLUB : A Slab allocator without object queues.
|
||||
*
|
||||
* (C) 2007 SGI, Christoph Lameter
|
||||
*/
|
||||
#include <linux/kfence.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/reciprocal_div.h>
|
||||
#include <linux/local_lock.h>
|
||||
|
||||
enum stat_item {
|
||||
ALLOC_FASTPATH, /* Allocation from cpu slab */
|
||||
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
|
||||
FREE_FASTPATH, /* Free to cpu slab */
|
||||
FREE_SLOWPATH, /* Freeing not to cpu slab */
|
||||
FREE_FROZEN, /* Freeing to frozen slab */
|
||||
FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
|
||||
FREE_REMOVE_PARTIAL, /* Freeing removes last object */
|
||||
ALLOC_FROM_PARTIAL, /* Cpu slab acquired from node partial list */
|
||||
ALLOC_SLAB, /* Cpu slab acquired from page allocator */
|
||||
ALLOC_REFILL, /* Refill cpu slab from slab freelist */
|
||||
ALLOC_NODE_MISMATCH, /* Switching cpu slab */
|
||||
FREE_SLAB, /* Slab freed to the page allocator */
|
||||
CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
|
||||
DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
|
||||
DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
|
||||
DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
|
||||
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
|
||||
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
|
||||
DEACTIVATE_BYPASS, /* Implicit deactivation */
|
||||
ORDER_FALLBACK, /* Number of times fallback was necessary */
|
||||
CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */
|
||||
CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */
|
||||
CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */
|
||||
CPU_PARTIAL_FREE, /* Refill cpu partial on free */
|
||||
CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */
|
||||
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
|
||||
NR_SLUB_STAT_ITEMS
|
||||
};
|
||||
|
||||
#ifndef CONFIG_SLUB_TINY
|
||||
/*
|
||||
* When changing the layout, make sure freelist and tid are still compatible
|
||||
* with this_cpu_cmpxchg_double() alignment requirements.
|
||||
*/
|
||||
struct kmem_cache_cpu {
|
||||
union {
|
||||
struct {
|
||||
void **freelist; /* Pointer to next available object */
|
||||
unsigned long tid; /* Globally unique transaction id */
|
||||
};
|
||||
freelist_aba_t freelist_tid;
|
||||
};
|
||||
struct slab *slab; /* The slab from which we are allocating */
|
||||
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
||||
struct slab *partial; /* Partially allocated frozen slabs */
|
||||
#endif
|
||||
local_lock_t lock; /* Protects the fields above */
|
||||
#ifdef CONFIG_SLUB_STATS
|
||||
unsigned stat[NR_SLUB_STAT_ITEMS];
|
||||
#endif
|
||||
};
|
||||
#endif /* CONFIG_SLUB_TINY */
|
||||
|
||||
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
||||
#define slub_percpu_partial(c) ((c)->partial)
|
||||
|
||||
#define slub_set_percpu_partial(c, p) \
|
||||
({ \
|
||||
slub_percpu_partial(c) = (p)->next; \
|
||||
})
|
||||
|
||||
#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c))
|
||||
#else
|
||||
#define slub_percpu_partial(c) NULL
|
||||
|
||||
#define slub_set_percpu_partial(c, p)
|
||||
|
||||
#define slub_percpu_partial_read_once(c) NULL
|
||||
#endif // CONFIG_SLUB_CPU_PARTIAL
|
||||
|
||||
/*
|
||||
* Word size structure that can be atomically updated or read and that
|
||||
* contains both the order and the number of objects that a slab of the
|
||||
* given order would contain.
|
||||
*/
|
||||
struct kmem_cache_order_objects {
|
||||
unsigned int x;
|
||||
};
|
||||
|
||||
/*
|
||||
* Slab cache management.
|
||||
*/
|
||||
struct kmem_cache {
|
||||
#ifndef CONFIG_SLUB_TINY
|
||||
struct kmem_cache_cpu __percpu *cpu_slab;
|
||||
#endif
|
||||
/* Used for retrieving partial slabs, etc. */
|
||||
slab_flags_t flags;
|
||||
unsigned long min_partial;
|
||||
unsigned int size; /* The size of an object including metadata */
|
||||
unsigned int object_size;/* The size of an object without metadata */
|
||||
struct reciprocal_value reciprocal_size;
|
||||
unsigned int offset; /* Free pointer offset */
|
||||
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
||||
/* Number of per cpu partial objects to keep around */
|
||||
unsigned int cpu_partial;
|
||||
/* Number of per cpu partial slabs to keep around */
|
||||
unsigned int cpu_partial_slabs;
|
||||
#endif
|
||||
struct kmem_cache_order_objects oo;
|
||||
|
||||
/* Allocation and freeing of slabs */
|
||||
struct kmem_cache_order_objects min;
|
||||
gfp_t allocflags; /* gfp flags to use on each alloc */
|
||||
int refcount; /* Refcount for slab cache destroy */
|
||||
void (*ctor)(void *);
|
||||
unsigned int inuse; /* Offset to metadata */
|
||||
unsigned int align; /* Alignment */
|
||||
unsigned int red_left_pad; /* Left redzone padding size */
|
||||
const char *name; /* Name (only for display!) */
|
||||
struct list_head list; /* List of slab caches */
|
||||
#ifdef CONFIG_SYSFS
|
||||
struct kobject kobj; /* For sysfs */
|
||||
#endif
|
||||
#ifdef CONFIG_SLAB_FREELIST_HARDENED
|
||||
unsigned long random;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
/*
|
||||
* Defragmentation by allocating from a remote node.
|
||||
*/
|
||||
unsigned int remote_node_defrag_ratio;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SLAB_FREELIST_RANDOM
|
||||
unsigned int *random_seq;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KASAN_GENERIC
|
||||
struct kasan_cache kasan_info;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HARDENED_USERCOPY
|
||||
unsigned int useroffset; /* Usercopy region offset */
|
||||
unsigned int usersize; /* Usercopy region size */
|
||||
#endif
|
||||
|
||||
struct kmem_cache_node *node[MAX_NUMNODES];
|
||||
};
|
||||
|
||||
#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
|
||||
#define SLAB_SUPPORTS_SYSFS
|
||||
void sysfs_slab_unlink(struct kmem_cache *);
|
||||
void sysfs_slab_release(struct kmem_cache *);
|
||||
#else
|
||||
static inline void sysfs_slab_unlink(struct kmem_cache *s)
|
||||
{
|
||||
}
|
||||
static inline void sysfs_slab_release(struct kmem_cache *s)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
void *fixup_red_left(struct kmem_cache *s, void *p);
|
||||
|
||||
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
|
||||
void *x) {
|
||||
void *object = x - (x - slab_address(slab)) % cache->size;
|
||||
void *last_object = slab_address(slab) +
|
||||
(slab->objects - 1) * cache->size;
|
||||
void *result = (unlikely(object > last_object)) ? last_object : object;
|
||||
|
||||
result = fixup_red_left(cache, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Determine object index from a given position */
|
||||
static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
|
||||
void *addr, void *obj)
|
||||
{
|
||||
return reciprocal_divide(kasan_reset_tag(obj) - addr,
|
||||
cache->reciprocal_size);
|
||||
}
|
||||
|
||||
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
|
||||
const struct slab *slab, void *obj)
|
||||
{
|
||||
if (is_kfence_address(obj))
|
||||
return 0;
|
||||
return __obj_to_index(cache, slab_address(slab), obj);
|
||||
}
|
||||
|
||||
static inline int objs_per_slab(const struct kmem_cache *cache,
|
||||
const struct slab *slab)
|
||||
{
|
||||
return slab->objects;
|
||||
}
|
||||
#endif /* _LINUX_SLUB_DEF_H */
|
@ -2125,11 +2125,6 @@ static struct cpuhp_step cpuhp_hp_states[] = {
|
||||
.startup.single = relay_prepare_cpu,
|
||||
.teardown.single = NULL,
|
||||
},
|
||||
[CPUHP_SLAB_PREPARE] = {
|
||||
.name = "slab:prepare",
|
||||
.startup.single = slab_prepare_cpu,
|
||||
.teardown.single = slab_dead_cpu,
|
||||
},
|
||||
[CPUHP_RCUTREE_PREP] = {
|
||||
.name = "RCU/tree:prepare",
|
||||
.startup.single = rcutree_prepare_cpu,
|
||||
|
@ -1985,7 +1985,6 @@ config FAULT_INJECTION
|
||||
config FAILSLAB
|
||||
bool "Fault-injection capability for kmalloc"
|
||||
depends on FAULT_INJECTION
|
||||
depends on SLAB || SLUB
|
||||
help
|
||||
Provide fault-injection capability for kmalloc.
|
||||
|
||||
|
@ -37,7 +37,7 @@ menuconfig KASAN
|
||||
(HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)) && \
|
||||
CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \
|
||||
HAVE_ARCH_KASAN_HW_TAGS
|
||||
depends on (SLUB && SYSFS && !SLUB_TINY) || (SLAB && !DEBUG_SLAB)
|
||||
depends on SYSFS && !SLUB_TINY
|
||||
select STACKDEPOT_ALWAYS_INIT
|
||||
help
|
||||
Enables KASAN (Kernel Address Sanitizer) - a dynamic memory safety
|
||||
@ -78,7 +78,7 @@ config KASAN_GENERIC
|
||||
bool "Generic KASAN"
|
||||
depends on HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC
|
||||
depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS
|
||||
select SLUB_DEBUG if SLUB
|
||||
select SLUB_DEBUG
|
||||
select CONSTRUCTORS
|
||||
help
|
||||
Enables Generic KASAN.
|
||||
@ -89,13 +89,11 @@ config KASAN_GENERIC
|
||||
overhead of ~50% for dynamic allocations.
|
||||
The performance slowdown is ~x3.
|
||||
|
||||
(Incompatible with CONFIG_DEBUG_SLAB: the kernel does not boot.)
|
||||
|
||||
config KASAN_SW_TAGS
|
||||
bool "Software Tag-Based KASAN"
|
||||
depends on HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS
|
||||
depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS
|
||||
select SLUB_DEBUG if SLUB
|
||||
select SLUB_DEBUG
|
||||
select CONSTRUCTORS
|
||||
help
|
||||
Enables Software Tag-Based KASAN.
|
||||
@ -110,12 +108,9 @@ config KASAN_SW_TAGS
|
||||
May potentially introduce problems related to pointer casting and
|
||||
comparison, as it embeds a tag into the top byte of each pointer.
|
||||
|
||||
(Incompatible with CONFIG_DEBUG_SLAB: the kernel does not boot.)
|
||||
|
||||
config KASAN_HW_TAGS
|
||||
bool "Hardware Tag-Based KASAN"
|
||||
depends on HAVE_ARCH_KASAN_HW_TAGS
|
||||
depends on SLUB
|
||||
help
|
||||
Enables Hardware Tag-Based KASAN.
|
||||
|
||||
|
@ -5,7 +5,7 @@ config HAVE_ARCH_KFENCE
|
||||
|
||||
menuconfig KFENCE
|
||||
bool "KFENCE: low-overhead sampling-based memory safety error detector"
|
||||
depends on HAVE_ARCH_KFENCE && (SLAB || SLUB)
|
||||
depends on HAVE_ARCH_KFENCE
|
||||
select STACKTRACE
|
||||
select IRQ_WORK
|
||||
help
|
||||
|
@ -11,7 +11,7 @@ config HAVE_KMSAN_COMPILER
|
||||
config KMSAN
|
||||
bool "KMSAN: detector of uninitialized values use"
|
||||
depends on HAVE_ARCH_KMSAN && HAVE_KMSAN_COMPILER
|
||||
depends on SLUB && DEBUG_KERNEL && !KASAN && !KCSAN
|
||||
depends on DEBUG_KERNEL && !KASAN && !KCSAN
|
||||
depends on !PREEMPT_RT
|
||||
select STACKDEPOT
|
||||
select STACKDEPOT_ALWAYS_INIT
|
||||
|
68
mm/Kconfig
68
mm/Kconfig
@ -226,52 +226,17 @@ config ZSMALLOC_CHAIN_SIZE
|
||||
|
||||
For more information, see zsmalloc documentation.
|
||||
|
||||
menu "SLAB allocator options"
|
||||
|
||||
choice
|
||||
prompt "Choose SLAB allocator"
|
||||
default SLUB
|
||||
help
|
||||
This option allows to select a slab allocator.
|
||||
|
||||
config SLAB_DEPRECATED
|
||||
bool "SLAB (DEPRECATED)"
|
||||
depends on !PREEMPT_RT
|
||||
help
|
||||
Deprecated and scheduled for removal in a few cycles. Replaced by
|
||||
SLUB.
|
||||
|
||||
If you cannot migrate to SLUB, please contact linux-mm@kvack.org
|
||||
and the people listed in the SLAB ALLOCATOR section of MAINTAINERS
|
||||
file, explaining why.
|
||||
|
||||
The regular slab allocator that is established and known to work
|
||||
well in all environments. It organizes cache hot objects in
|
||||
per cpu and per node queues.
|
||||
menu "Slab allocator options"
|
||||
|
||||
config SLUB
|
||||
bool "SLUB (Unqueued Allocator)"
|
||||
help
|
||||
SLUB is a slab allocator that minimizes cache line usage
|
||||
instead of managing queues of cached objects (SLAB approach).
|
||||
Per cpu caching is realized using slabs of objects instead
|
||||
of queues of objects. SLUB can use memory efficiently
|
||||
and has enhanced diagnostics. SLUB is the default choice for
|
||||
a slab allocator.
|
||||
|
||||
endchoice
|
||||
|
||||
config SLAB
|
||||
bool
|
||||
default y
|
||||
depends on SLAB_DEPRECATED
|
||||
def_bool y
|
||||
|
||||
config SLUB_TINY
|
||||
bool "Configure SLUB for minimal memory footprint"
|
||||
depends on SLUB && EXPERT
|
||||
bool "Configure for minimal memory footprint"
|
||||
depends on EXPERT
|
||||
select SLAB_MERGE_DEFAULT
|
||||
help
|
||||
Configures the SLUB allocator in a way to achieve minimal memory
|
||||
Configures the slab allocator in a way to achieve minimal memory
|
||||
footprint, sacrificing scalability, debugging and other features.
|
||||
This is intended only for the smallest system that had used the
|
||||
SLOB allocator and is not recommended for systems with more than
|
||||
@ -282,7 +247,6 @@ config SLUB_TINY
|
||||
config SLAB_MERGE_DEFAULT
|
||||
bool "Allow slab caches to be merged"
|
||||
default y
|
||||
depends on SLAB || SLUB
|
||||
help
|
||||
For reduced kernel memory fragmentation, slab caches can be
|
||||
merged when they share the same size and other characteristics.
|
||||
@ -296,7 +260,7 @@ config SLAB_MERGE_DEFAULT
|
||||
|
||||
config SLAB_FREELIST_RANDOM
|
||||
bool "Randomize slab freelist"
|
||||
depends on SLAB || (SLUB && !SLUB_TINY)
|
||||
depends on !SLUB_TINY
|
||||
help
|
||||
Randomizes the freelist order used on creating new pages. This
|
||||
security feature reduces the predictability of the kernel slab
|
||||
@ -304,21 +268,19 @@ config SLAB_FREELIST_RANDOM
|
||||
|
||||
config SLAB_FREELIST_HARDENED
|
||||
bool "Harden slab freelist metadata"
|
||||
depends on SLAB || (SLUB && !SLUB_TINY)
|
||||
depends on !SLUB_TINY
|
||||
help
|
||||
Many kernel heap attacks try to target slab cache metadata and
|
||||
other infrastructure. This options makes minor performance
|
||||
sacrifices to harden the kernel slab allocator against common
|
||||
freelist exploit methods. Some slab implementations have more
|
||||
sanity-checking than others. This option is most effective with
|
||||
CONFIG_SLUB.
|
||||
freelist exploit methods.
|
||||
|
||||
config SLUB_STATS
|
||||
default n
|
||||
bool "Enable SLUB performance statistics"
|
||||
depends on SLUB && SYSFS && !SLUB_TINY
|
||||
bool "Enable performance statistics"
|
||||
depends on SYSFS && !SLUB_TINY
|
||||
help
|
||||
SLUB statistics are useful to debug SLUBs allocation behavior in
|
||||
The statistics are useful to debug slab allocation behavior in
|
||||
order find ways to optimize the allocator. This should never be
|
||||
enabled for production use since keeping statistics slows down
|
||||
the allocator by a few percentage points. The slabinfo command
|
||||
@ -328,8 +290,8 @@ config SLUB_STATS
|
||||
|
||||
config SLUB_CPU_PARTIAL
|
||||
default y
|
||||
depends on SLUB && SMP && !SLUB_TINY
|
||||
bool "SLUB per cpu partial cache"
|
||||
depends on SMP && !SLUB_TINY
|
||||
bool "Enable per cpu partial caches"
|
||||
help
|
||||
Per cpu partial caches accelerate objects allocation and freeing
|
||||
that is local to a processor at the price of more indeterminism
|
||||
@ -339,7 +301,7 @@ config SLUB_CPU_PARTIAL
|
||||
|
||||
config RANDOM_KMALLOC_CACHES
|
||||
default n
|
||||
depends on SLUB && !SLUB_TINY
|
||||
depends on !SLUB_TINY
|
||||
bool "Randomize slab caches for normal kmalloc"
|
||||
help
|
||||
A hardening feature that creates multiple copies of slab caches for
|
||||
@ -354,7 +316,7 @@ config RANDOM_KMALLOC_CACHES
|
||||
limited degree of memory and CPU overhead that relates to hardware and
|
||||
system workload.
|
||||
|
||||
endmenu # SLAB allocator options
|
||||
endmenu # Slab allocator options
|
||||
|
||||
config SHUFFLE_PAGE_ALLOCATOR
|
||||
bool "Page allocator randomization"
|
||||
|
@ -45,18 +45,10 @@ config DEBUG_PAGEALLOC_ENABLE_DEFAULT
|
||||
Enable debug page memory allocations by default? This value
|
||||
can be overridden by debug_pagealloc=off|on.
|
||||
|
||||
config DEBUG_SLAB
|
||||
bool "Debug slab memory allocations"
|
||||
depends on DEBUG_KERNEL && SLAB
|
||||
help
|
||||
Say Y here to have the kernel do limited verification on memory
|
||||
allocation as well as poisoning memory on free to catch use of freed
|
||||
memory. This can make kmalloc/kfree-intensive workloads much slower.
|
||||
|
||||
config SLUB_DEBUG
|
||||
default y
|
||||
bool "Enable SLUB debugging support" if EXPERT
|
||||
depends on SLUB && SYSFS && !SLUB_TINY
|
||||
depends on SYSFS && !SLUB_TINY
|
||||
select STACKDEPOT if STACKTRACE_SUPPORT
|
||||
help
|
||||
SLUB has extensive debug support features. Disabling these can
|
||||
@ -66,7 +58,7 @@ config SLUB_DEBUG
|
||||
|
||||
config SLUB_DEBUG_ON
|
||||
bool "SLUB debugging on by default"
|
||||
depends on SLUB && SLUB_DEBUG
|
||||
depends on SLUB_DEBUG
|
||||
select STACKDEPOT_ALWAYS_INIT if STACKTRACE_SUPPORT
|
||||
default n
|
||||
help
|
||||
@ -231,8 +223,8 @@ config DEBUG_KMEMLEAK
|
||||
allocations. See Documentation/dev-tools/kmemleak.rst for more
|
||||
details.
|
||||
|
||||
Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances
|
||||
of finding leaks due to the slab objects poisoning.
|
||||
Enabling SLUB_DEBUG may increase the chances of finding leaks
|
||||
due to the slab objects poisoning.
|
||||
|
||||
In order to access the kmemleak file, debugfs needs to be
|
||||
mounted (usually at /sys/kernel/debug).
|
||||
|
@ -4,7 +4,6 @@
|
||||
#
|
||||
|
||||
KASAN_SANITIZE_slab_common.o := n
|
||||
KASAN_SANITIZE_slab.o := n
|
||||
KASAN_SANITIZE_slub.o := n
|
||||
KCSAN_SANITIZE_kmemleak.o := n
|
||||
|
||||
@ -12,7 +11,6 @@ KCSAN_SANITIZE_kmemleak.o := n
|
||||
# the same word but accesses to different bits of that word. Re-enable KCSAN
|
||||
# for these when we have more consensus on what to do about them.
|
||||
KCSAN_SANITIZE_slab_common.o := n
|
||||
KCSAN_SANITIZE_slab.o := n
|
||||
KCSAN_SANITIZE_slub.o := n
|
||||
KCSAN_SANITIZE_page_alloc.o := n
|
||||
# But enable explicit instrumentation for memory barriers.
|
||||
@ -22,7 +20,6 @@ KCSAN_INSTRUMENT_BARRIERS := y
|
||||
# flaky coverage that is not a function of syscall inputs. E.g. slab is out of
|
||||
# free pages, or a task is migrated between nodes.
|
||||
KCOV_INSTRUMENT_slab_common.o := n
|
||||
KCOV_INSTRUMENT_slab.o := n
|
||||
KCOV_INSTRUMENT_slub.o := n
|
||||
KCOV_INSTRUMENT_page_alloc.o := n
|
||||
KCOV_INSTRUMENT_debug-pagealloc.o := n
|
||||
@ -66,6 +63,7 @@ obj-y += page-alloc.o
|
||||
obj-y += init-mm.o
|
||||
obj-y += memblock.o
|
||||
obj-y += $(memory-hotplug-y)
|
||||
obj-y += slub.o
|
||||
|
||||
ifdef CONFIG_MMU
|
||||
obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o
|
||||
@ -82,8 +80,6 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
|
||||
obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
|
||||
obj-$(CONFIG_KSM) += ksm.o
|
||||
obj-$(CONFIG_PAGE_POISONING) += page_poison.o
|
||||
obj-$(CONFIG_SLAB) += slab.o
|
||||
obj-$(CONFIG_SLUB) += slub.o
|
||||
obj-$(CONFIG_KASAN) += kasan/
|
||||
obj-$(CONFIG_KFENCE) += kfence/
|
||||
obj-$(CONFIG_KMSAN) += kmsan/
|
||||
|
@ -36,7 +36,7 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/wait.h>
|
||||
|
||||
#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
|
||||
#ifdef CONFIG_SLUB_DEBUG_ON
|
||||
#define DMAPOOL_DEBUG 1
|
||||
#endif
|
||||
|
||||
|
@ -153,10 +153,6 @@ void __kasan_poison_object_data(struct kmem_cache *cache, void *object)
|
||||
* 2. A cache might be SLAB_TYPESAFE_BY_RCU, which means objects can be
|
||||
* accessed after being freed. We preassign tags for objects in these
|
||||
* caches as well.
|
||||
* 3. For SLAB allocator we can't preassign tags randomly since the freelist
|
||||
* is stored as an array of indexes instead of a linked list. Assign tags
|
||||
* based on objects indexes, so that objects that are next to each other
|
||||
* get different tags.
|
||||
*/
|
||||
static inline u8 assign_tag(struct kmem_cache *cache,
|
||||
const void *object, bool init)
|
||||
@ -171,17 +167,12 @@ static inline u8 assign_tag(struct kmem_cache *cache,
|
||||
if (!cache->ctor && !(cache->flags & SLAB_TYPESAFE_BY_RCU))
|
||||
return init ? KASAN_TAG_KERNEL : kasan_random_tag();
|
||||
|
||||
/* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU: */
|
||||
#ifdef CONFIG_SLAB
|
||||
/* For SLAB assign tags based on the object index in the freelist. */
|
||||
return (u8)obj_to_index(cache, virt_to_slab(object), (void *)object);
|
||||
#else
|
||||
/*
|
||||
* For SLUB assign a random tag during slab creation, otherwise reuse
|
||||
* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU,
|
||||
* assign a random tag during slab creation, otherwise reuse
|
||||
* the already assigned tag.
|
||||
*/
|
||||
return init ? kasan_random_tag() : get_tag(object);
|
||||
#endif
|
||||
}
|
||||
|
||||
void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache,
|
||||
|
@ -373,8 +373,7 @@ void kasan_set_track(struct kasan_track *track, gfp_t flags);
|
||||
void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags);
|
||||
void kasan_save_free_info(struct kmem_cache *cache, void *object);
|
||||
|
||||
#if defined(CONFIG_KASAN_GENERIC) && \
|
||||
(defined(CONFIG_SLAB) || defined(CONFIG_SLUB))
|
||||
#ifdef CONFIG_KASAN_GENERIC
|
||||
bool kasan_quarantine_put(struct kmem_cache *cache, void *object);
|
||||
void kasan_quarantine_reduce(void);
|
||||
void kasan_quarantine_remove_cache(struct kmem_cache *cache);
|
||||
|
@ -144,10 +144,6 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
|
||||
{
|
||||
void *object = qlink_to_object(qlink, cache);
|
||||
struct kasan_free_meta *meta = kasan_get_free_meta(cache, object);
|
||||
unsigned long flags;
|
||||
|
||||
if (IS_ENABLED(CONFIG_SLAB))
|
||||
local_irq_save(flags);
|
||||
|
||||
/*
|
||||
* If init_on_free is enabled and KASAN's free metadata is stored in
|
||||
@ -166,9 +162,6 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
|
||||
*(u8 *)kasan_mem_to_shadow(object) = KASAN_SLAB_FREE;
|
||||
|
||||
___cache_free(cache, object, _THIS_IP_);
|
||||
|
||||
if (IS_ENABLED(CONFIG_SLAB))
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static void qlist_free_all(struct qlist_head *q, struct kmem_cache *cache)
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <linux/stacktrace.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
|
@ -463,11 +463,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
|
||||
/* Set required slab fields. */
|
||||
slab = virt_to_slab((void *)meta->addr);
|
||||
slab->slab_cache = cache;
|
||||
#if defined(CONFIG_SLUB)
|
||||
slab->objects = 1;
|
||||
#elif defined(CONFIG_SLAB)
|
||||
slab->s_mem = addr;
|
||||
#endif
|
||||
|
||||
/* Memory initialization. */
|
||||
set_canary(meta);
|
||||
|
@ -64,6 +64,7 @@
|
||||
#include <linux/psi.h>
|
||||
#include <linux/seq_buf.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <linux/kmemleak.h>
|
||||
#include "internal.h"
|
||||
#include <net/sock.h>
|
||||
#include <net/ip.h>
|
||||
@ -5149,7 +5150,7 @@ out_kfree:
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_MEMCG_KMEM) && (defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG))
|
||||
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_SLUB_DEBUG)
|
||||
static int mem_cgroup_slab_show(struct seq_file *m, void *p)
|
||||
{
|
||||
/*
|
||||
@ -5258,8 +5259,7 @@ static struct cftype mem_cgroup_legacy_files[] = {
|
||||
.write = mem_cgroup_reset,
|
||||
.read_u64 = mem_cgroup_read_u64,
|
||||
},
|
||||
#if defined(CONFIG_MEMCG_KMEM) && \
|
||||
(defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG))
|
||||
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_SLUB_DEBUG)
|
||||
{
|
||||
.name = "kmem.slabinfo",
|
||||
.seq_show = mem_cgroup_slab_show,
|
||||
|
@ -20,7 +20,7 @@
|
||||
#include <linux/writeback.h>
|
||||
#include "slab.h"
|
||||
|
||||
#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
|
||||
#ifdef CONFIG_SLUB_DEBUG_ON
|
||||
static void poison_error(mempool_t *pool, void *element, size_t size,
|
||||
size_t byte)
|
||||
{
|
||||
@ -95,14 +95,14 @@ static void poison_element(mempool_t *pool, void *element)
|
||||
kunmap_atomic(addr);
|
||||
}
|
||||
}
|
||||
#else /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
|
||||
#else /* CONFIG_SLUB_DEBUG_ON */
|
||||
static inline void check_element(mempool_t *pool, void *element)
|
||||
{
|
||||
}
|
||||
static inline void poison_element(mempool_t *pool, void *element)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
|
||||
#endif /* CONFIG_SLUB_DEBUG_ON */
|
||||
|
||||
static __always_inline void kasan_poison_element(mempool_t *pool, void *element)
|
||||
{
|
||||
|
551
mm/slab.h
551
mm/slab.h
@ -1,10 +1,20 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef MM_SLAB_H
|
||||
#define MM_SLAB_H
|
||||
|
||||
#include <linux/reciprocal_div.h>
|
||||
#include <linux/list_lru.h>
|
||||
#include <linux/local_lock.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/memcontrol.h>
|
||||
#include <linux/kfence.h>
|
||||
#include <linux/kasan.h>
|
||||
|
||||
/*
|
||||
* Internal slab definitions
|
||||
*/
|
||||
void __init kmem_cache_init(void);
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
# ifdef system_has_cmpxchg128
|
||||
@ -42,21 +52,6 @@ typedef union {
|
||||
struct slab {
|
||||
unsigned long __page_flags;
|
||||
|
||||
#if defined(CONFIG_SLAB)
|
||||
|
||||
struct kmem_cache *slab_cache;
|
||||
union {
|
||||
struct {
|
||||
struct list_head slab_list;
|
||||
void *freelist; /* array of free object indexes */
|
||||
void *s_mem; /* first object */
|
||||
};
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
unsigned int active;
|
||||
|
||||
#elif defined(CONFIG_SLUB)
|
||||
|
||||
struct kmem_cache *slab_cache;
|
||||
union {
|
||||
struct {
|
||||
@ -91,10 +86,6 @@ struct slab {
|
||||
};
|
||||
unsigned int __unused;
|
||||
|
||||
#else
|
||||
#error "Unexpected slab allocator configured"
|
||||
#endif
|
||||
|
||||
atomic_t __page_refcount;
|
||||
#ifdef CONFIG_MEMCG
|
||||
unsigned long memcg_data;
|
||||
@ -111,7 +102,7 @@ SLAB_MATCH(memcg_data, memcg_data);
|
||||
#endif
|
||||
#undef SLAB_MATCH
|
||||
static_assert(sizeof(struct slab) <= sizeof(struct page));
|
||||
#if defined(system_has_freelist_aba) && defined(CONFIG_SLUB)
|
||||
#if defined(system_has_freelist_aba)
|
||||
static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t)));
|
||||
#endif
|
||||
|
||||
@ -228,21 +219,138 @@ static inline size_t slab_size(const struct slab *slab)
|
||||
return PAGE_SIZE << slab_order(slab);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SLAB
|
||||
#include <linux/slab_def.h>
|
||||
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
||||
#define slub_percpu_partial(c) ((c)->partial)
|
||||
|
||||
#define slub_set_percpu_partial(c, p) \
|
||||
({ \
|
||||
slub_percpu_partial(c) = (p)->next; \
|
||||
})
|
||||
|
||||
#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c))
|
||||
#else
|
||||
#define slub_percpu_partial(c) NULL
|
||||
|
||||
#define slub_set_percpu_partial(c, p)
|
||||
|
||||
#define slub_percpu_partial_read_once(c) NULL
|
||||
#endif // CONFIG_SLUB_CPU_PARTIAL
|
||||
|
||||
/*
|
||||
* Word size structure that can be atomically updated or read and that
|
||||
* contains both the order and the number of objects that a slab of the
|
||||
* given order would contain.
|
||||
*/
|
||||
struct kmem_cache_order_objects {
|
||||
unsigned int x;
|
||||
};
|
||||
|
||||
/*
|
||||
* Slab cache management.
|
||||
*/
|
||||
struct kmem_cache {
|
||||
#ifndef CONFIG_SLUB_TINY
|
||||
struct kmem_cache_cpu __percpu *cpu_slab;
|
||||
#endif
|
||||
/* Used for retrieving partial slabs, etc. */
|
||||
slab_flags_t flags;
|
||||
unsigned long min_partial;
|
||||
unsigned int size; /* Object size including metadata */
|
||||
unsigned int object_size; /* Object size without metadata */
|
||||
struct reciprocal_value reciprocal_size;
|
||||
unsigned int offset; /* Free pointer offset */
|
||||
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
||||
/* Number of per cpu partial objects to keep around */
|
||||
unsigned int cpu_partial;
|
||||
/* Number of per cpu partial slabs to keep around */
|
||||
unsigned int cpu_partial_slabs;
|
||||
#endif
|
||||
struct kmem_cache_order_objects oo;
|
||||
|
||||
/* Allocation and freeing of slabs */
|
||||
struct kmem_cache_order_objects min;
|
||||
gfp_t allocflags; /* gfp flags to use on each alloc */
|
||||
int refcount; /* Refcount for slab cache destroy */
|
||||
void (*ctor)(void *object); /* Object constructor */
|
||||
unsigned int inuse; /* Offset to metadata */
|
||||
unsigned int align; /* Alignment */
|
||||
unsigned int red_left_pad; /* Left redzone padding size */
|
||||
const char *name; /* Name (only for display!) */
|
||||
struct list_head list; /* List of slab caches */
|
||||
#ifdef CONFIG_SYSFS
|
||||
struct kobject kobj; /* For sysfs */
|
||||
#endif
|
||||
#ifdef CONFIG_SLAB_FREELIST_HARDENED
|
||||
unsigned long random;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SLUB
|
||||
#include <linux/slub_def.h>
|
||||
#ifdef CONFIG_NUMA
|
||||
/*
|
||||
* Defragmentation by allocating from a remote node.
|
||||
*/
|
||||
unsigned int remote_node_defrag_ratio;
|
||||
#endif
|
||||
|
||||
#include <linux/memcontrol.h>
|
||||
#include <linux/fault-inject.h>
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/kmemleak.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/list_lru.h>
|
||||
#ifdef CONFIG_SLAB_FREELIST_RANDOM
|
||||
unsigned int *random_seq;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KASAN_GENERIC
|
||||
struct kasan_cache kasan_info;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HARDENED_USERCOPY
|
||||
unsigned int useroffset; /* Usercopy region offset */
|
||||
unsigned int usersize; /* Usercopy region size */
|
||||
#endif
|
||||
|
||||
struct kmem_cache_node *node[MAX_NUMNODES];
|
||||
};
|
||||
|
||||
#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
|
||||
#define SLAB_SUPPORTS_SYSFS
|
||||
void sysfs_slab_unlink(struct kmem_cache *s);
|
||||
void sysfs_slab_release(struct kmem_cache *s);
|
||||
#else
|
||||
static inline void sysfs_slab_unlink(struct kmem_cache *s) { }
|
||||
static inline void sysfs_slab_release(struct kmem_cache *s) { }
|
||||
#endif
|
||||
|
||||
void *fixup_red_left(struct kmem_cache *s, void *p);
|
||||
|
||||
static inline void *nearest_obj(struct kmem_cache *cache,
|
||||
const struct slab *slab, void *x)
|
||||
{
|
||||
void *object = x - (x - slab_address(slab)) % cache->size;
|
||||
void *last_object = slab_address(slab) +
|
||||
(slab->objects - 1) * cache->size;
|
||||
void *result = (unlikely(object > last_object)) ? last_object : object;
|
||||
|
||||
result = fixup_red_left(cache, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Determine object index from a given position */
|
||||
static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
|
||||
void *addr, void *obj)
|
||||
{
|
||||
return reciprocal_divide(kasan_reset_tag(obj) - addr,
|
||||
cache->reciprocal_size);
|
||||
}
|
||||
|
||||
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
|
||||
const struct slab *slab, void *obj)
|
||||
{
|
||||
if (is_kfence_address(obj))
|
||||
return 0;
|
||||
return __obj_to_index(cache, slab_address(slab), obj);
|
||||
}
|
||||
|
||||
static inline int objs_per_slab(const struct kmem_cache *cache,
|
||||
const struct slab *slab)
|
||||
{
|
||||
return slab->objects;
|
||||
}
|
||||
|
||||
/*
|
||||
* State of the slab allocator.
|
||||
@ -281,19 +389,39 @@ extern const struct kmalloc_info_struct {
|
||||
void setup_kmalloc_cache_index_table(void);
|
||||
void create_kmalloc_caches(slab_flags_t);
|
||||
|
||||
/* Find the kmalloc slab corresponding for a certain size */
|
||||
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller);
|
||||
extern u8 kmalloc_size_index[24];
|
||||
|
||||
void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags,
|
||||
int node, size_t orig_size,
|
||||
unsigned long caller);
|
||||
void __kmem_cache_free(struct kmem_cache *s, void *x, unsigned long caller);
|
||||
static inline unsigned int size_index_elem(unsigned int bytes)
|
||||
{
|
||||
return (bytes - 1) / 8;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the kmem_cache structure that serves a given size of
|
||||
* allocation
|
||||
*
|
||||
* This assumes size is larger than zero and not larger than
|
||||
* KMALLOC_MAX_CACHE_SIZE and the caller must check that.
|
||||
*/
|
||||
static inline struct kmem_cache *
|
||||
kmalloc_slab(size_t size, gfp_t flags, unsigned long caller)
|
||||
{
|
||||
unsigned int index;
|
||||
|
||||
if (size <= 192)
|
||||
index = kmalloc_size_index[size_index_elem(size)];
|
||||
else
|
||||
index = fls(size - 1);
|
||||
|
||||
return kmalloc_caches[kmalloc_type(flags, caller)][index];
|
||||
}
|
||||
|
||||
gfp_t kmalloc_fix_flags(gfp_t flags);
|
||||
|
||||
/* Functions provided by the slab allocators */
|
||||
int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
|
||||
|
||||
void __init kmem_cache_init(void);
|
||||
void __init new_kmalloc_cache(int idx, enum kmalloc_cache_type type,
|
||||
slab_flags_t flags);
|
||||
extern void create_boot_cache(struct kmem_cache *, const char *name,
|
||||
@ -320,26 +448,16 @@ static inline bool is_kmalloc_cache(struct kmem_cache *s)
|
||||
SLAB_CACHE_DMA32 | SLAB_PANIC | \
|
||||
SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
|
||||
|
||||
#if defined(CONFIG_DEBUG_SLAB)
|
||||
#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
|
||||
#elif defined(CONFIG_SLUB_DEBUG)
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
|
||||
SLAB_TRACE | SLAB_CONSISTENCY_CHECKS)
|
||||
#else
|
||||
#define SLAB_DEBUG_FLAGS (0)
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_SLAB)
|
||||
#define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \
|
||||
SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | \
|
||||
SLAB_ACCOUNT | SLAB_NO_MERGE)
|
||||
#elif defined(CONFIG_SLUB)
|
||||
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
|
||||
SLAB_TEMPORARY | SLAB_ACCOUNT | \
|
||||
SLAB_NO_USER_FLAGS | SLAB_KMALLOC | SLAB_NO_MERGE)
|
||||
#else
|
||||
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE)
|
||||
#endif
|
||||
|
||||
/* Common flags available with current configuration */
|
||||
#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
|
||||
@ -387,12 +505,6 @@ void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s);
|
||||
ssize_t slabinfo_write(struct file *file, const char __user *buffer,
|
||||
size_t count, loff_t *ppos);
|
||||
|
||||
static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
|
||||
{
|
||||
return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
|
||||
NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
#ifdef CONFIG_SLUB_DEBUG_ON
|
||||
DECLARE_STATIC_KEY_TRUE(slub_debug_enabled);
|
||||
@ -452,238 +564,32 @@ int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
|
||||
gfp_t gfp, bool new_slab);
|
||||
void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
|
||||
enum node_stat_item idx, int nr);
|
||||
|
||||
static inline void memcg_free_slab_cgroups(struct slab *slab)
|
||||
{
|
||||
kfree(slab_objcgs(slab));
|
||||
slab->memcg_data = 0;
|
||||
}
|
||||
|
||||
static inline size_t obj_full_size(struct kmem_cache *s)
|
||||
{
|
||||
/*
|
||||
* For each accounted object there is an extra space which is used
|
||||
* to store obj_cgroup membership. Charge it too.
|
||||
*/
|
||||
return s->size + sizeof(struct obj_cgroup *);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns false if the allocation should fail.
|
||||
*/
|
||||
static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
|
||||
struct list_lru *lru,
|
||||
struct obj_cgroup **objcgp,
|
||||
size_t objects, gfp_t flags)
|
||||
{
|
||||
struct obj_cgroup *objcg;
|
||||
|
||||
if (!memcg_kmem_online())
|
||||
return true;
|
||||
|
||||
if (!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* The obtained objcg pointer is safe to use within the current scope,
|
||||
* defined by current task or set_active_memcg() pair.
|
||||
* obj_cgroup_get() is used to get a permanent reference.
|
||||
*/
|
||||
objcg = current_obj_cgroup();
|
||||
if (!objcg)
|
||||
return true;
|
||||
|
||||
if (lru) {
|
||||
int ret;
|
||||
struct mem_cgroup *memcg;
|
||||
|
||||
memcg = get_mem_cgroup_from_objcg(objcg);
|
||||
ret = memcg_list_lru_alloc(memcg, lru, flags);
|
||||
css_put(&memcg->css);
|
||||
|
||||
if (ret)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s)))
|
||||
return false;
|
||||
|
||||
*objcgp = objcg;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
|
||||
struct obj_cgroup *objcg,
|
||||
gfp_t flags, size_t size,
|
||||
void **p)
|
||||
{
|
||||
struct slab *slab;
|
||||
unsigned long off;
|
||||
size_t i;
|
||||
|
||||
if (!memcg_kmem_online() || !objcg)
|
||||
return;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
if (likely(p[i])) {
|
||||
slab = virt_to_slab(p[i]);
|
||||
|
||||
if (!slab_objcgs(slab) &&
|
||||
memcg_alloc_slab_cgroups(slab, s, flags,
|
||||
false)) {
|
||||
obj_cgroup_uncharge(objcg, obj_full_size(s));
|
||||
continue;
|
||||
}
|
||||
|
||||
off = obj_to_index(s, slab, p[i]);
|
||||
obj_cgroup_get(objcg);
|
||||
slab_objcgs(slab)[off] = objcg;
|
||||
mod_objcg_state(objcg, slab_pgdat(slab),
|
||||
cache_vmstat_idx(s), obj_full_size(s));
|
||||
} else {
|
||||
obj_cgroup_uncharge(objcg, obj_full_size(s));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
|
||||
void **p, int objects)
|
||||
{
|
||||
struct obj_cgroup **objcgs;
|
||||
int i;
|
||||
|
||||
if (!memcg_kmem_online())
|
||||
return;
|
||||
|
||||
objcgs = slab_objcgs(slab);
|
||||
if (!objcgs)
|
||||
return;
|
||||
|
||||
for (i = 0; i < objects; i++) {
|
||||
struct obj_cgroup *objcg;
|
||||
unsigned int off;
|
||||
|
||||
off = obj_to_index(s, slab, p[i]);
|
||||
objcg = objcgs[off];
|
||||
if (!objcg)
|
||||
continue;
|
||||
|
||||
objcgs[off] = NULL;
|
||||
obj_cgroup_uncharge(objcg, obj_full_size(s));
|
||||
mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
|
||||
-obj_full_size(s));
|
||||
obj_cgroup_put(objcg);
|
||||
}
|
||||
}
|
||||
|
||||
#else /* CONFIG_MEMCG_KMEM */
|
||||
static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int memcg_alloc_slab_cgroups(struct slab *slab,
|
||||
struct kmem_cache *s, gfp_t gfp,
|
||||
bool new_slab)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void memcg_free_slab_cgroups(struct slab *slab)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
|
||||
struct list_lru *lru,
|
||||
struct obj_cgroup **objcgp,
|
||||
size_t objects, gfp_t flags)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
|
||||
struct obj_cgroup *objcg,
|
||||
gfp_t flags, size_t size,
|
||||
void **p)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
|
||||
void **p, int objects)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_MEMCG_KMEM */
|
||||
|
||||
static inline struct kmem_cache *virt_to_cache(const void *obj)
|
||||
{
|
||||
struct slab *slab;
|
||||
|
||||
slab = virt_to_slab(obj);
|
||||
if (WARN_ONCE(!slab, "%s: Object is not a Slab page!\n",
|
||||
__func__))
|
||||
return NULL;
|
||||
return slab->slab_cache;
|
||||
}
|
||||
|
||||
static __always_inline void account_slab(struct slab *slab, int order,
|
||||
struct kmem_cache *s, gfp_t gfp)
|
||||
{
|
||||
if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
|
||||
memcg_alloc_slab_cgroups(slab, s, gfp, true);
|
||||
|
||||
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
|
||||
PAGE_SIZE << order);
|
||||
}
|
||||
|
||||
static __always_inline void unaccount_slab(struct slab *slab, int order,
|
||||
struct kmem_cache *s)
|
||||
{
|
||||
if (memcg_kmem_online())
|
||||
memcg_free_slab_cgroups(slab);
|
||||
|
||||
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
|
||||
-(PAGE_SIZE << order));
|
||||
}
|
||||
|
||||
static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
|
||||
{
|
||||
struct kmem_cache *cachep;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) &&
|
||||
!kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS))
|
||||
return s;
|
||||
|
||||
cachep = virt_to_cache(x);
|
||||
if (WARN(cachep && cachep != s,
|
||||
"%s: Wrong slab cache. %s but object is from %s\n",
|
||||
__func__, s->name, cachep->name))
|
||||
print_tracking(cachep, x);
|
||||
return cachep;
|
||||
}
|
||||
|
||||
void free_large_kmalloc(struct folio *folio, void *object);
|
||||
|
||||
size_t __ksize(const void *objp);
|
||||
|
||||
static inline size_t slab_ksize(const struct kmem_cache *s)
|
||||
{
|
||||
#ifndef CONFIG_SLUB
|
||||
return s->object_size;
|
||||
|
||||
#else /* CONFIG_SLUB */
|
||||
# ifdef CONFIG_SLUB_DEBUG
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
/*
|
||||
* Debugging requires use of the padding between object
|
||||
* and whatever may come after it.
|
||||
*/
|
||||
if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
|
||||
return s->object_size;
|
||||
# endif
|
||||
#endif
|
||||
if (s->flags & SLAB_KASAN)
|
||||
return s->object_size;
|
||||
/*
|
||||
@ -697,128 +603,9 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
|
||||
* Else we can use all the padding etc for the allocation
|
||||
*/
|
||||
return s->size;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
|
||||
struct list_lru *lru,
|
||||
struct obj_cgroup **objcgp,
|
||||
size_t size, gfp_t flags)
|
||||
{
|
||||
flags &= gfp_allowed_mask;
|
||||
|
||||
might_alloc(flags);
|
||||
|
||||
if (should_failslab(s, flags))
|
||||
return NULL;
|
||||
|
||||
if (!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags))
|
||||
return NULL;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static inline void slab_post_alloc_hook(struct kmem_cache *s,
|
||||
struct obj_cgroup *objcg, gfp_t flags,
|
||||
size_t size, void **p, bool init,
|
||||
unsigned int orig_size)
|
||||
{
|
||||
unsigned int zero_size = s->object_size;
|
||||
bool kasan_init = init;
|
||||
size_t i;
|
||||
|
||||
flags &= gfp_allowed_mask;
|
||||
|
||||
/*
|
||||
* For kmalloc object, the allocated memory size(object_size) is likely
|
||||
* larger than the requested size(orig_size). If redzone check is
|
||||
* enabled for the extra space, don't zero it, as it will be redzoned
|
||||
* soon. The redzone operation for this extra space could be seen as a
|
||||
* replacement of current poisoning under certain debug option, and
|
||||
* won't break other sanity checks.
|
||||
*/
|
||||
if (kmem_cache_debug_flags(s, SLAB_STORE_USER | SLAB_RED_ZONE) &&
|
||||
(s->flags & SLAB_KMALLOC))
|
||||
zero_size = orig_size;
|
||||
|
||||
/*
|
||||
* When slub_debug is enabled, avoid memory initialization integrated
|
||||
* into KASAN and instead zero out the memory via the memset below with
|
||||
* the proper size. Otherwise, KASAN might overwrite SLUB redzones and
|
||||
* cause false-positive reports. This does not lead to a performance
|
||||
* penalty on production builds, as slub_debug is not intended to be
|
||||
* enabled there.
|
||||
*/
|
||||
if (__slub_debug_enabled())
|
||||
kasan_init = false;
|
||||
|
||||
/*
|
||||
* As memory initialization might be integrated into KASAN,
|
||||
* kasan_slab_alloc and initialization memset must be
|
||||
* kept together to avoid discrepancies in behavior.
|
||||
*
|
||||
* As p[i] might get tagged, memset and kmemleak hook come after KASAN.
|
||||
*/
|
||||
for (i = 0; i < size; i++) {
|
||||
p[i] = kasan_slab_alloc(s, p[i], flags, kasan_init);
|
||||
if (p[i] && init && (!kasan_init || !kasan_has_integrated_init()))
|
||||
memset(p[i], 0, zero_size);
|
||||
kmemleak_alloc_recursive(p[i], s->object_size, 1,
|
||||
s->flags, flags);
|
||||
kmsan_slab_alloc(s, p[i], flags);
|
||||
}
|
||||
|
||||
memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
|
||||
}
|
||||
|
||||
/*
|
||||
* The slab lists for all objects.
|
||||
*/
|
||||
struct kmem_cache_node {
|
||||
#ifdef CONFIG_SLAB
|
||||
raw_spinlock_t list_lock;
|
||||
struct list_head slabs_partial; /* partial list first, better asm code */
|
||||
struct list_head slabs_full;
|
||||
struct list_head slabs_free;
|
||||
unsigned long total_slabs; /* length of all slab lists */
|
||||
unsigned long free_slabs; /* length of free slab list only */
|
||||
unsigned long free_objects;
|
||||
unsigned int free_limit;
|
||||
unsigned int colour_next; /* Per-node cache coloring */
|
||||
struct array_cache *shared; /* shared per node */
|
||||
struct alien_cache **alien; /* on other nodes */
|
||||
unsigned long next_reap; /* updated without locking */
|
||||
int free_touched; /* updated without locking */
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SLUB
|
||||
spinlock_t list_lock;
|
||||
unsigned long nr_partial;
|
||||
struct list_head partial;
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
atomic_long_t nr_slabs;
|
||||
atomic_long_t total_objects;
|
||||
struct list_head full;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
|
||||
{
|
||||
return s->node[node];
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterator over all nodes. The body will be executed for each node that has
|
||||
* a kmem_cache_node structure allocated (which is true for all online nodes)
|
||||
*/
|
||||
#define for_each_kmem_cache_node(__s, __node, __n) \
|
||||
for (__node = 0; __node < nr_node_ids; __node++) \
|
||||
if ((__n = get_node(__s, __node)))
|
||||
|
||||
|
||||
#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
|
||||
void dump_unreclaimable_slab(void);
|
||||
#else
|
||||
static inline void dump_unreclaimable_slab(void)
|
||||
|
231
mm/slab_common.c
231
mm/slab_common.c
@ -21,6 +21,7 @@
|
||||
#include <linux/swiotlb.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/kmemleak.h>
|
||||
#include <linux/kasan.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/tlbflush.h>
|
||||
@ -71,10 +72,8 @@ static int __init setup_slab_merge(char *str)
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SLUB
|
||||
__setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
|
||||
__setup_param("slub_merge", slub_merge, setup_slab_merge, 0);
|
||||
#endif
|
||||
|
||||
__setup("slab_nomerge", setup_slab_nomerge);
|
||||
__setup("slab_merge", setup_slab_merge);
|
||||
@ -197,10 +196,6 @@ struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
|
||||
if (s->size - size >= sizeof(void *))
|
||||
continue;
|
||||
|
||||
if (IS_ENABLED(CONFIG_SLAB) && align &&
|
||||
(align > s->align || s->align % align))
|
||||
continue;
|
||||
|
||||
return s;
|
||||
}
|
||||
return NULL;
|
||||
@ -670,7 +665,7 @@ EXPORT_SYMBOL(random_kmalloc_seed);
|
||||
* of two cache sizes there. The size of larger slabs can be determined using
|
||||
* fls.
|
||||
*/
|
||||
static u8 size_index[24] __ro_after_init = {
|
||||
u8 kmalloc_size_index[24] __ro_after_init = {
|
||||
3, /* 8 */
|
||||
4, /* 16 */
|
||||
5, /* 24 */
|
||||
@ -697,33 +692,6 @@ static u8 size_index[24] __ro_after_init = {
|
||||
2 /* 192 */
|
||||
};
|
||||
|
||||
static inline unsigned int size_index_elem(unsigned int bytes)
|
||||
{
|
||||
return (bytes - 1) / 8;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the kmem_cache structure that serves a given size of
|
||||
* allocation
|
||||
*/
|
||||
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller)
|
||||
{
|
||||
unsigned int index;
|
||||
|
||||
if (size <= 192) {
|
||||
if (!size)
|
||||
return ZERO_SIZE_PTR;
|
||||
|
||||
index = size_index[size_index_elem(size)];
|
||||
} else {
|
||||
if (WARN_ON_ONCE(size > KMALLOC_MAX_CACHE_SIZE))
|
||||
return NULL;
|
||||
index = fls(size - 1);
|
||||
}
|
||||
|
||||
return kmalloc_caches[kmalloc_type(flags, caller)][index];
|
||||
}
|
||||
|
||||
size_t kmalloc_size_roundup(size_t size)
|
||||
{
|
||||
if (size && size <= KMALLOC_MAX_CACHE_SIZE) {
|
||||
@ -848,9 +816,9 @@ void __init setup_kmalloc_cache_index_table(void)
|
||||
for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
|
||||
unsigned int elem = size_index_elem(i);
|
||||
|
||||
if (elem >= ARRAY_SIZE(size_index))
|
||||
if (elem >= ARRAY_SIZE(kmalloc_size_index))
|
||||
break;
|
||||
size_index[elem] = KMALLOC_SHIFT_LOW;
|
||||
kmalloc_size_index[elem] = KMALLOC_SHIFT_LOW;
|
||||
}
|
||||
|
||||
if (KMALLOC_MIN_SIZE >= 64) {
|
||||
@ -859,7 +827,7 @@ void __init setup_kmalloc_cache_index_table(void)
|
||||
* is 64 byte.
|
||||
*/
|
||||
for (i = 64 + 8; i <= 96; i += 8)
|
||||
size_index[size_index_elem(i)] = 7;
|
||||
kmalloc_size_index[size_index_elem(i)] = 7;
|
||||
|
||||
}
|
||||
|
||||
@ -870,7 +838,7 @@ void __init setup_kmalloc_cache_index_table(void)
|
||||
* instead.
|
||||
*/
|
||||
for (i = 128 + 8; i <= 192; i += 8)
|
||||
size_index[size_index_elem(i)] = 8;
|
||||
kmalloc_size_index[size_index_elem(i)] = 8;
|
||||
}
|
||||
}
|
||||
|
||||
@ -968,95 +936,6 @@ void __init create_kmalloc_caches(slab_flags_t flags)
|
||||
slab_state = UP;
|
||||
}
|
||||
|
||||
void free_large_kmalloc(struct folio *folio, void *object)
|
||||
{
|
||||
unsigned int order = folio_order(folio);
|
||||
|
||||
if (WARN_ON_ONCE(order == 0))
|
||||
pr_warn_once("object pointer: 0x%p\n", object);
|
||||
|
||||
kmemleak_free(object);
|
||||
kasan_kfree_large(object);
|
||||
kmsan_kfree_large(object);
|
||||
|
||||
mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
|
||||
-(PAGE_SIZE << order));
|
||||
__free_pages(folio_page(folio, 0), order);
|
||||
}
|
||||
|
||||
static void *__kmalloc_large_node(size_t size, gfp_t flags, int node);
|
||||
static __always_inline
|
||||
void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
|
||||
{
|
||||
struct kmem_cache *s;
|
||||
void *ret;
|
||||
|
||||
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
|
||||
ret = __kmalloc_large_node(size, flags, node);
|
||||
trace_kmalloc(caller, ret, size,
|
||||
PAGE_SIZE << get_order(size), flags, node);
|
||||
return ret;
|
||||
}
|
||||
|
||||
s = kmalloc_slab(size, flags, caller);
|
||||
|
||||
if (unlikely(ZERO_OR_NULL_PTR(s)))
|
||||
return s;
|
||||
|
||||
ret = __kmem_cache_alloc_node(s, flags, node, size, caller);
|
||||
ret = kasan_kmalloc(s, ret, size, flags);
|
||||
trace_kmalloc(caller, ret, size, s->size, flags, node);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *__kmalloc_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
return __do_kmalloc_node(size, flags, node, _RET_IP_);
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc_node);
|
||||
|
||||
void *__kmalloc(size_t size, gfp_t flags)
|
||||
{
|
||||
return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc);
|
||||
|
||||
void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
|
||||
int node, unsigned long caller)
|
||||
{
|
||||
return __do_kmalloc_node(size, flags, node, caller);
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc_node_track_caller);
|
||||
|
||||
/**
|
||||
* kfree - free previously allocated memory
|
||||
* @object: pointer returned by kmalloc() or kmem_cache_alloc()
|
||||
*
|
||||
* If @object is NULL, no operation is performed.
|
||||
*/
|
||||
void kfree(const void *object)
|
||||
{
|
||||
struct folio *folio;
|
||||
struct slab *slab;
|
||||
struct kmem_cache *s;
|
||||
|
||||
trace_kfree(_RET_IP_, object);
|
||||
|
||||
if (unlikely(ZERO_OR_NULL_PTR(object)))
|
||||
return;
|
||||
|
||||
folio = virt_to_folio(object);
|
||||
if (unlikely(!folio_test_slab(folio))) {
|
||||
free_large_kmalloc(folio, (void *)object);
|
||||
return;
|
||||
}
|
||||
|
||||
slab = folio_slab(folio);
|
||||
s = slab->slab_cache;
|
||||
__kmem_cache_free(s, (void *)object, _RET_IP_);
|
||||
}
|
||||
EXPORT_SYMBOL(kfree);
|
||||
|
||||
/**
|
||||
* __ksize -- Report full size of underlying allocation
|
||||
* @object: pointer to the object
|
||||
@ -1093,30 +972,6 @@ size_t __ksize(const void *object)
|
||||
return slab_ksize(folio_slab(folio)->slab_cache);
|
||||
}
|
||||
|
||||
void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
|
||||
{
|
||||
void *ret = __kmem_cache_alloc_node(s, gfpflags, NUMA_NO_NODE,
|
||||
size, _RET_IP_);
|
||||
|
||||
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, NUMA_NO_NODE);
|
||||
|
||||
ret = kasan_kmalloc(s, ret, size, gfpflags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_trace);
|
||||
|
||||
void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
|
||||
int node, size_t size)
|
||||
{
|
||||
void *ret = __kmem_cache_alloc_node(s, gfpflags, node, size, _RET_IP_);
|
||||
|
||||
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, node);
|
||||
|
||||
ret = kasan_kmalloc(s, ret, size, gfpflags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_node_trace);
|
||||
|
||||
gfp_t kmalloc_fix_flags(gfp_t flags)
|
||||
{
|
||||
gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
|
||||
@ -1129,57 +984,6 @@ gfp_t kmalloc_fix_flags(gfp_t flags)
|
||||
return flags;
|
||||
}
|
||||
|
||||
/*
|
||||
* To avoid unnecessary overhead, we pass through large allocation requests
|
||||
* directly to the page allocator. We use __GFP_COMP, because we will need to
|
||||
* know the allocation order to free the pages properly in kfree.
|
||||
*/
|
||||
|
||||
static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
struct page *page;
|
||||
void *ptr = NULL;
|
||||
unsigned int order = get_order(size);
|
||||
|
||||
if (unlikely(flags & GFP_SLAB_BUG_MASK))
|
||||
flags = kmalloc_fix_flags(flags);
|
||||
|
||||
flags |= __GFP_COMP;
|
||||
page = alloc_pages_node(node, flags, order);
|
||||
if (page) {
|
||||
ptr = page_address(page);
|
||||
mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
|
||||
PAGE_SIZE << order);
|
||||
}
|
||||
|
||||
ptr = kasan_kmalloc_large(ptr, size, flags);
|
||||
/* As ptr might get tagged, call kmemleak hook after KASAN. */
|
||||
kmemleak_alloc(ptr, size, 1, flags);
|
||||
kmsan_kmalloc_large(ptr, size, flags);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void *kmalloc_large(size_t size, gfp_t flags)
|
||||
{
|
||||
void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
|
||||
|
||||
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
|
||||
flags, NUMA_NO_NODE);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_large);
|
||||
|
||||
void *kmalloc_large_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
void *ret = __kmalloc_large_node(size, flags, node);
|
||||
|
||||
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
|
||||
flags, node);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_large_node);
|
||||
|
||||
#ifdef CONFIG_SLAB_FREELIST_RANDOM
|
||||
/* Randomize a generic freelist */
|
||||
static void freelist_randomize(unsigned int *list,
|
||||
@ -1222,12 +1026,8 @@ void cache_random_seq_destroy(struct kmem_cache *cachep)
|
||||
}
|
||||
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
|
||||
|
||||
#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
|
||||
#ifdef CONFIG_SLAB
|
||||
#define SLABINFO_RIGHTS (0600)
|
||||
#else
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
#define SLABINFO_RIGHTS (0400)
|
||||
#endif
|
||||
|
||||
static void print_slabinfo_header(struct seq_file *m)
|
||||
{
|
||||
@ -1235,18 +1035,10 @@ static void print_slabinfo_header(struct seq_file *m)
|
||||
* Output format version, so at least we can change it
|
||||
* without _too_ many complaints.
|
||||
*/
|
||||
#ifdef CONFIG_DEBUG_SLAB
|
||||
seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
|
||||
#else
|
||||
seq_puts(m, "slabinfo - version: 2.1\n");
|
||||
#endif
|
||||
seq_puts(m, "# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
|
||||
seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
|
||||
seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
|
||||
#ifdef CONFIG_DEBUG_SLAB
|
||||
seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
|
||||
seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
|
||||
#endif
|
||||
seq_putc(m, '\n');
|
||||
}
|
||||
|
||||
@ -1370,7 +1162,7 @@ static int __init slab_proc_init(void)
|
||||
}
|
||||
module_init(slab_proc_init);
|
||||
|
||||
#endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */
|
||||
#endif /* CONFIG_SLUB_DEBUG */
|
||||
|
||||
static __always_inline __realloc_size(2) void *
|
||||
__do_krealloc(const void *p, size_t new_size, gfp_t flags)
|
||||
@ -1488,10 +1280,3 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
|
||||
EXPORT_TRACEPOINT_SYMBOL(kfree);
|
||||
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
|
||||
|
||||
int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
|
||||
{
|
||||
if (__should_failslab(s, gfpflags))
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
|
||||
|
745
mm/slub.c
745
mm/slub.c
@ -34,6 +34,7 @@
|
||||
#include <linux/memory.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/fault-inject.h>
|
||||
#include <linux/kmemleak.h>
|
||||
#include <linux/stacktrace.h>
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/memcontrol.h>
|
||||
@ -345,6 +346,60 @@ static void debugfs_slab_add(struct kmem_cache *);
|
||||
static inline void debugfs_slab_add(struct kmem_cache *s) { }
|
||||
#endif
|
||||
|
||||
enum stat_item {
|
||||
ALLOC_FASTPATH, /* Allocation from cpu slab */
|
||||
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
|
||||
FREE_FASTPATH, /* Free to cpu slab */
|
||||
FREE_SLOWPATH, /* Freeing not to cpu slab */
|
||||
FREE_FROZEN, /* Freeing to frozen slab */
|
||||
FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
|
||||
FREE_REMOVE_PARTIAL, /* Freeing removes last object */
|
||||
ALLOC_FROM_PARTIAL, /* Cpu slab acquired from node partial list */
|
||||
ALLOC_SLAB, /* Cpu slab acquired from page allocator */
|
||||
ALLOC_REFILL, /* Refill cpu slab from slab freelist */
|
||||
ALLOC_NODE_MISMATCH, /* Switching cpu slab */
|
||||
FREE_SLAB, /* Slab freed to the page allocator */
|
||||
CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
|
||||
DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
|
||||
DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
|
||||
DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
|
||||
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
|
||||
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
|
||||
DEACTIVATE_BYPASS, /* Implicit deactivation */
|
||||
ORDER_FALLBACK, /* Number of times fallback was necessary */
|
||||
CMPXCHG_DOUBLE_CPU_FAIL,/* Failures of this_cpu_cmpxchg_double */
|
||||
CMPXCHG_DOUBLE_FAIL, /* Failures of slab freelist update */
|
||||
CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */
|
||||
CPU_PARTIAL_FREE, /* Refill cpu partial on free */
|
||||
CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */
|
||||
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
|
||||
NR_SLUB_STAT_ITEMS
|
||||
};
|
||||
|
||||
#ifndef CONFIG_SLUB_TINY
|
||||
/*
|
||||
* When changing the layout, make sure freelist and tid are still compatible
|
||||
* with this_cpu_cmpxchg_double() alignment requirements.
|
||||
*/
|
||||
struct kmem_cache_cpu {
|
||||
union {
|
||||
struct {
|
||||
void **freelist; /* Pointer to next available object */
|
||||
unsigned long tid; /* Globally unique transaction id */
|
||||
};
|
||||
freelist_aba_t freelist_tid;
|
||||
};
|
||||
struct slab *slab; /* The slab from which we are allocating */
|
||||
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
||||
struct slab *partial; /* Partially allocated frozen slabs */
|
||||
#endif
|
||||
local_lock_t lock; /* Protects the fields above */
|
||||
#ifdef CONFIG_SLUB_STATS
|
||||
unsigned int stat[NR_SLUB_STAT_ITEMS];
|
||||
#endif
|
||||
};
|
||||
#endif /* CONFIG_SLUB_TINY */
|
||||
|
||||
static inline void stat(const struct kmem_cache *s, enum stat_item si)
|
||||
{
|
||||
#ifdef CONFIG_SLUB_STATS
|
||||
@ -356,6 +411,41 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline
|
||||
void stat_add(const struct kmem_cache *s, enum stat_item si, int v)
|
||||
{
|
||||
#ifdef CONFIG_SLUB_STATS
|
||||
raw_cpu_add(s->cpu_slab->stat[si], v);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* The slab lists for all objects.
|
||||
*/
|
||||
struct kmem_cache_node {
|
||||
spinlock_t list_lock;
|
||||
unsigned long nr_partial;
|
||||
struct list_head partial;
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
atomic_long_t nr_slabs;
|
||||
atomic_long_t total_objects;
|
||||
struct list_head full;
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
|
||||
{
|
||||
return s->node[node];
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterator over all nodes. The body will be executed for each node that has
|
||||
* a kmem_cache_node structure allocated (which is true for all online nodes)
|
||||
*/
|
||||
#define for_each_kmem_cache_node(__s, __node, __n) \
|
||||
for (__node = 0; __node < nr_node_ids; __node++) \
|
||||
if ((__n = get_node(__s, __node)))
|
||||
|
||||
/*
|
||||
* Tracks for which NUMA nodes we have kmem_cache_nodes allocated.
|
||||
* Corresponds to node_state[N_NORMAL_MEMORY], but can temporarily
|
||||
@ -1774,12 +1864,214 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
|
||||
#endif
|
||||
#endif /* CONFIG_SLUB_DEBUG */
|
||||
|
||||
static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
|
||||
{
|
||||
return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
|
||||
NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
static inline void memcg_free_slab_cgroups(struct slab *slab)
|
||||
{
|
||||
kfree(slab_objcgs(slab));
|
||||
slab->memcg_data = 0;
|
||||
}
|
||||
|
||||
static inline size_t obj_full_size(struct kmem_cache *s)
|
||||
{
|
||||
/*
|
||||
* For each accounted object there is an extra space which is used
|
||||
* to store obj_cgroup membership. Charge it too.
|
||||
*/
|
||||
return s->size + sizeof(struct obj_cgroup *);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns false if the allocation should fail.
|
||||
*/
|
||||
static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s,
|
||||
struct list_lru *lru,
|
||||
struct obj_cgroup **objcgp,
|
||||
size_t objects, gfp_t flags)
|
||||
{
|
||||
/*
|
||||
* The obtained objcg pointer is safe to use within the current scope,
|
||||
* defined by current task or set_active_memcg() pair.
|
||||
* obj_cgroup_get() is used to get a permanent reference.
|
||||
*/
|
||||
struct obj_cgroup *objcg = current_obj_cgroup();
|
||||
if (!objcg)
|
||||
return true;
|
||||
|
||||
if (lru) {
|
||||
int ret;
|
||||
struct mem_cgroup *memcg;
|
||||
|
||||
memcg = get_mem_cgroup_from_objcg(objcg);
|
||||
ret = memcg_list_lru_alloc(memcg, lru, flags);
|
||||
css_put(&memcg->css);
|
||||
|
||||
if (ret)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s)))
|
||||
return false;
|
||||
|
||||
*objcgp = objcg;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns false if the allocation should fail.
|
||||
*/
|
||||
static __fastpath_inline
|
||||
bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
|
||||
struct obj_cgroup **objcgp, size_t objects,
|
||||
gfp_t flags)
|
||||
{
|
||||
if (!memcg_kmem_online())
|
||||
return true;
|
||||
|
||||
if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)))
|
||||
return true;
|
||||
|
||||
return likely(__memcg_slab_pre_alloc_hook(s, lru, objcgp, objects,
|
||||
flags));
|
||||
}
|
||||
|
||||
static void __memcg_slab_post_alloc_hook(struct kmem_cache *s,
|
||||
struct obj_cgroup *objcg,
|
||||
gfp_t flags, size_t size,
|
||||
void **p)
|
||||
{
|
||||
struct slab *slab;
|
||||
unsigned long off;
|
||||
size_t i;
|
||||
|
||||
flags &= gfp_allowed_mask;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
if (likely(p[i])) {
|
||||
slab = virt_to_slab(p[i]);
|
||||
|
||||
if (!slab_objcgs(slab) &&
|
||||
memcg_alloc_slab_cgroups(slab, s, flags, false)) {
|
||||
obj_cgroup_uncharge(objcg, obj_full_size(s));
|
||||
continue;
|
||||
}
|
||||
|
||||
off = obj_to_index(s, slab, p[i]);
|
||||
obj_cgroup_get(objcg);
|
||||
slab_objcgs(slab)[off] = objcg;
|
||||
mod_objcg_state(objcg, slab_pgdat(slab),
|
||||
cache_vmstat_idx(s), obj_full_size(s));
|
||||
} else {
|
||||
obj_cgroup_uncharge(objcg, obj_full_size(s));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static __fastpath_inline
|
||||
void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
|
||||
gfp_t flags, size_t size, void **p)
|
||||
{
|
||||
if (likely(!memcg_kmem_online() || !objcg))
|
||||
return;
|
||||
|
||||
return __memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
|
||||
}
|
||||
|
||||
static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
|
||||
void **p, int objects,
|
||||
struct obj_cgroup **objcgs)
|
||||
{
|
||||
for (int i = 0; i < objects; i++) {
|
||||
struct obj_cgroup *objcg;
|
||||
unsigned int off;
|
||||
|
||||
off = obj_to_index(s, slab, p[i]);
|
||||
objcg = objcgs[off];
|
||||
if (!objcg)
|
||||
continue;
|
||||
|
||||
objcgs[off] = NULL;
|
||||
obj_cgroup_uncharge(objcg, obj_full_size(s));
|
||||
mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
|
||||
-obj_full_size(s));
|
||||
obj_cgroup_put(objcg);
|
||||
}
|
||||
}
|
||||
|
||||
static __fastpath_inline
|
||||
void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
|
||||
int objects)
|
||||
{
|
||||
struct obj_cgroup **objcgs;
|
||||
|
||||
if (!memcg_kmem_online())
|
||||
return;
|
||||
|
||||
objcgs = slab_objcgs(slab);
|
||||
if (likely(!objcgs))
|
||||
return;
|
||||
|
||||
__memcg_slab_free_hook(s, slab, p, objects, objcgs);
|
||||
}
|
||||
|
||||
static inline
|
||||
void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
|
||||
struct obj_cgroup *objcg)
|
||||
{
|
||||
if (objcg)
|
||||
obj_cgroup_uncharge(objcg, objects * obj_full_size(s));
|
||||
}
|
||||
#else /* CONFIG_MEMCG_KMEM */
|
||||
static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void memcg_free_slab_cgroups(struct slab *slab)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
|
||||
struct list_lru *lru,
|
||||
struct obj_cgroup **objcgp,
|
||||
size_t objects, gfp_t flags)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
|
||||
struct obj_cgroup *objcg,
|
||||
gfp_t flags, size_t size,
|
||||
void **p)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
|
||||
void **p, int objects)
|
||||
{
|
||||
}
|
||||
|
||||
static inline
|
||||
void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
|
||||
struct obj_cgroup *objcg)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_MEMCG_KMEM */
|
||||
|
||||
/*
|
||||
* Hooks for other subsystems that check memory allocations. In a typical
|
||||
* production configuration these hooks all should produce no code at all.
|
||||
*
|
||||
* Returns true if freeing of the object can proceed, false if its reuse
|
||||
* was delayed by KASAN quarantine, or it was returned to KFENCE.
|
||||
*/
|
||||
static __always_inline bool slab_free_hook(struct kmem_cache *s,
|
||||
void *x, bool init)
|
||||
static __always_inline
|
||||
bool slab_free_hook(struct kmem_cache *s, void *x, bool init)
|
||||
{
|
||||
kmemleak_free_recursive(x, s->flags);
|
||||
kmsan_slab_free(s, x);
|
||||
@ -1794,6 +2086,9 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
|
||||
__kcsan_check_access(x, s->object_size,
|
||||
KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
|
||||
|
||||
if (kfence_free(x))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* As memory initialization might be integrated into KASAN,
|
||||
* kasan_slab_free and initialization memset's must be
|
||||
@ -1802,7 +2097,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
|
||||
* The initialization memset's clear the object and the metadata,
|
||||
* but don't touch the SLAB redzone.
|
||||
*/
|
||||
if (init) {
|
||||
if (unlikely(init)) {
|
||||
int rsize;
|
||||
|
||||
if (!kasan_has_integrated_init())
|
||||
@ -1812,7 +2107,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
|
||||
s->size - s->inuse - rsize);
|
||||
}
|
||||
/* KASAN might put x into memory quarantine, delaying its reuse. */
|
||||
return kasan_slab_free(s, x, init);
|
||||
return !kasan_slab_free(s, x, init);
|
||||
}
|
||||
|
||||
static inline bool slab_free_freelist_hook(struct kmem_cache *s,
|
||||
@ -1822,23 +2117,26 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
|
||||
|
||||
void *object;
|
||||
void *next = *head;
|
||||
void *old_tail = *tail ? *tail : *head;
|
||||
void *old_tail = *tail;
|
||||
bool init;
|
||||
|
||||
if (is_kfence_address(next)) {
|
||||
slab_free_hook(s, next, false);
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Head and tail of the reconstructed freelist */
|
||||
*head = NULL;
|
||||
*tail = NULL;
|
||||
|
||||
init = slab_want_init_on_free(s);
|
||||
|
||||
do {
|
||||
object = next;
|
||||
next = get_freepointer(s, object);
|
||||
|
||||
/* If object's reuse doesn't have to be delayed */
|
||||
if (!slab_free_hook(s, object, slab_want_init_on_free(s))) {
|
||||
if (likely(slab_free_hook(s, object, init))) {
|
||||
/* Move object to the new freelist */
|
||||
set_freepointer(s, object, *head);
|
||||
*head = object;
|
||||
@ -1853,9 +2151,6 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
|
||||
}
|
||||
} while (object != old_tail);
|
||||
|
||||
if (*head == *tail)
|
||||
*tail = NULL;
|
||||
|
||||
return *head != NULL;
|
||||
}
|
||||
|
||||
@ -2008,6 +2303,26 @@ static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
|
||||
}
|
||||
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
|
||||
|
||||
static __always_inline void account_slab(struct slab *slab, int order,
|
||||
struct kmem_cache *s, gfp_t gfp)
|
||||
{
|
||||
if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
|
||||
memcg_alloc_slab_cgroups(slab, s, gfp, true);
|
||||
|
||||
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
|
||||
PAGE_SIZE << order);
|
||||
}
|
||||
|
||||
static __always_inline void unaccount_slab(struct slab *slab, int order,
|
||||
struct kmem_cache *s)
|
||||
{
|
||||
if (memcg_kmem_online())
|
||||
memcg_free_slab_cgroups(slab);
|
||||
|
||||
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
|
||||
-(PAGE_SIZE << order));
|
||||
}
|
||||
|
||||
static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
|
||||
{
|
||||
struct slab *slab;
|
||||
@ -3420,6 +3735,86 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
|
||||
0, sizeof(void *));
|
||||
}
|
||||
|
||||
noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
|
||||
{
|
||||
if (__should_failslab(s, gfpflags))
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
|
||||
|
||||
static __fastpath_inline
|
||||
struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
|
||||
struct list_lru *lru,
|
||||
struct obj_cgroup **objcgp,
|
||||
size_t size, gfp_t flags)
|
||||
{
|
||||
flags &= gfp_allowed_mask;
|
||||
|
||||
might_alloc(flags);
|
||||
|
||||
if (unlikely(should_failslab(s, flags)))
|
||||
return NULL;
|
||||
|
||||
if (unlikely(!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags)))
|
||||
return NULL;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static __fastpath_inline
|
||||
void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
|
||||
gfp_t flags, size_t size, void **p, bool init,
|
||||
unsigned int orig_size)
|
||||
{
|
||||
unsigned int zero_size = s->object_size;
|
||||
bool kasan_init = init;
|
||||
size_t i;
|
||||
gfp_t init_flags = flags & gfp_allowed_mask;
|
||||
|
||||
/*
|
||||
* For kmalloc object, the allocated memory size(object_size) is likely
|
||||
* larger than the requested size(orig_size). If redzone check is
|
||||
* enabled for the extra space, don't zero it, as it will be redzoned
|
||||
* soon. The redzone operation for this extra space could be seen as a
|
||||
* replacement of current poisoning under certain debug option, and
|
||||
* won't break other sanity checks.
|
||||
*/
|
||||
if (kmem_cache_debug_flags(s, SLAB_STORE_USER | SLAB_RED_ZONE) &&
|
||||
(s->flags & SLAB_KMALLOC))
|
||||
zero_size = orig_size;
|
||||
|
||||
/*
|
||||
* When slub_debug is enabled, avoid memory initialization integrated
|
||||
* into KASAN and instead zero out the memory via the memset below with
|
||||
* the proper size. Otherwise, KASAN might overwrite SLUB redzones and
|
||||
* cause false-positive reports. This does not lead to a performance
|
||||
* penalty on production builds, as slub_debug is not intended to be
|
||||
* enabled there.
|
||||
*/
|
||||
if (__slub_debug_enabled())
|
||||
kasan_init = false;
|
||||
|
||||
/*
|
||||
* As memory initialization might be integrated into KASAN,
|
||||
* kasan_slab_alloc and initialization memset must be
|
||||
* kept together to avoid discrepancies in behavior.
|
||||
*
|
||||
* As p[i] might get tagged, memset and kmemleak hook come after KASAN.
|
||||
*/
|
||||
for (i = 0; i < size; i++) {
|
||||
p[i] = kasan_slab_alloc(s, p[i], init_flags, kasan_init);
|
||||
if (p[i] && init && (!kasan_init ||
|
||||
!kasan_has_integrated_init()))
|
||||
memset(p[i], 0, zero_size);
|
||||
kmemleak_alloc_recursive(p[i], s->object_size, 1,
|
||||
s->flags, init_flags);
|
||||
kmsan_slab_alloc(s, p[i], init_flags);
|
||||
}
|
||||
|
||||
memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
|
||||
}
|
||||
|
||||
/*
|
||||
* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
|
||||
* have the fastpath folded into their functions. So no function call
|
||||
@ -3438,7 +3833,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
|
||||
bool init = false;
|
||||
|
||||
s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
|
||||
if (!s)
|
||||
if (unlikely(!s))
|
||||
return NULL;
|
||||
|
||||
object = kfence_alloc(s, orig_size, gfpflags);
|
||||
@ -3460,44 +3855,42 @@ out:
|
||||
return object;
|
||||
}
|
||||
|
||||
static __fastpath_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru,
|
||||
gfp_t gfpflags, unsigned long addr, size_t orig_size)
|
||||
void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
|
||||
{
|
||||
return slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, addr, orig_size);
|
||||
}
|
||||
|
||||
static __fastpath_inline
|
||||
void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
|
||||
gfp_t gfpflags)
|
||||
{
|
||||
void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
|
||||
void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE, _RET_IP_,
|
||||
s->object_size);
|
||||
|
||||
trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, NUMA_NO_NODE);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
|
||||
{
|
||||
return __kmem_cache_alloc_lru(s, NULL, gfpflags);
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_alloc);
|
||||
|
||||
void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
|
||||
gfp_t gfpflags)
|
||||
{
|
||||
return __kmem_cache_alloc_lru(s, lru, gfpflags);
|
||||
void *ret = slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, _RET_IP_,
|
||||
s->object_size);
|
||||
|
||||
trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, NUMA_NO_NODE);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_alloc_lru);
|
||||
|
||||
void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags,
|
||||
int node, size_t orig_size,
|
||||
unsigned long caller)
|
||||
{
|
||||
return slab_alloc_node(s, NULL, gfpflags, node,
|
||||
caller, orig_size);
|
||||
}
|
||||
|
||||
/**
|
||||
* kmem_cache_alloc_node - Allocate an object on the specified node
|
||||
* @s: The cache to allocate from.
|
||||
* @gfpflags: See kmalloc().
|
||||
* @node: node number of the target node.
|
||||
*
|
||||
* Identical to kmem_cache_alloc but it will allocate memory on the given
|
||||
* node, which can improve the performance for cpu bound structures.
|
||||
*
|
||||
* Fallback to other node is possible if __GFP_THISNODE is not set.
|
||||
*
|
||||
* Return: pointer to the new object or %NULL in case of error
|
||||
*/
|
||||
void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
|
||||
{
|
||||
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
|
||||
@ -3508,6 +3901,124 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_alloc_node);
|
||||
|
||||
/*
|
||||
* To avoid unnecessary overhead, we pass through large allocation requests
|
||||
* directly to the page allocator. We use __GFP_COMP, because we will need to
|
||||
* know the allocation order to free the pages properly in kfree.
|
||||
*/
|
||||
static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
struct page *page;
|
||||
void *ptr = NULL;
|
||||
unsigned int order = get_order(size);
|
||||
|
||||
if (unlikely(flags & GFP_SLAB_BUG_MASK))
|
||||
flags = kmalloc_fix_flags(flags);
|
||||
|
||||
flags |= __GFP_COMP;
|
||||
page = alloc_pages_node(node, flags, order);
|
||||
if (page) {
|
||||
ptr = page_address(page);
|
||||
mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
|
||||
PAGE_SIZE << order);
|
||||
}
|
||||
|
||||
ptr = kasan_kmalloc_large(ptr, size, flags);
|
||||
/* As ptr might get tagged, call kmemleak hook after KASAN. */
|
||||
kmemleak_alloc(ptr, size, 1, flags);
|
||||
kmsan_kmalloc_large(ptr, size, flags);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void *kmalloc_large(size_t size, gfp_t flags)
|
||||
{
|
||||
void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
|
||||
|
||||
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
|
||||
flags, NUMA_NO_NODE);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_large);
|
||||
|
||||
void *kmalloc_large_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
void *ret = __kmalloc_large_node(size, flags, node);
|
||||
|
||||
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
|
||||
flags, node);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_large_node);
|
||||
|
||||
static __always_inline
|
||||
void *__do_kmalloc_node(size_t size, gfp_t flags, int node,
|
||||
unsigned long caller)
|
||||
{
|
||||
struct kmem_cache *s;
|
||||
void *ret;
|
||||
|
||||
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
|
||||
ret = __kmalloc_large_node(size, flags, node);
|
||||
trace_kmalloc(caller, ret, size,
|
||||
PAGE_SIZE << get_order(size), flags, node);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (unlikely(!size))
|
||||
return ZERO_SIZE_PTR;
|
||||
|
||||
s = kmalloc_slab(size, flags, caller);
|
||||
|
||||
ret = slab_alloc_node(s, NULL, flags, node, caller, size);
|
||||
ret = kasan_kmalloc(s, ret, size, flags);
|
||||
trace_kmalloc(caller, ret, size, s->size, flags, node);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *__kmalloc_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
return __do_kmalloc_node(size, flags, node, _RET_IP_);
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc_node);
|
||||
|
||||
void *__kmalloc(size_t size, gfp_t flags)
|
||||
{
|
||||
return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc);
|
||||
|
||||
void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
|
||||
int node, unsigned long caller)
|
||||
{
|
||||
return __do_kmalloc_node(size, flags, node, caller);
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc_node_track_caller);
|
||||
|
||||
void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
|
||||
{
|
||||
void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE,
|
||||
_RET_IP_, size);
|
||||
|
||||
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, NUMA_NO_NODE);
|
||||
|
||||
ret = kasan_kmalloc(s, ret, size, gfpflags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_trace);
|
||||
|
||||
void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
|
||||
int node, size_t size)
|
||||
{
|
||||
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
|
||||
|
||||
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, node);
|
||||
|
||||
ret = kasan_kmalloc(s, ret, size, gfpflags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_node_trace);
|
||||
|
||||
static noinline void free_to_partial_list(
|
||||
struct kmem_cache *s, struct slab *slab,
|
||||
void *head, void *tail, int bulk_cnt,
|
||||
@ -3592,9 +4103,6 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
|
||||
|
||||
stat(s, FREE_SLOWPATH);
|
||||
|
||||
if (kfence_free(head))
|
||||
return;
|
||||
|
||||
if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
|
||||
free_to_partial_list(s, slab, head, tail, cnt, addr);
|
||||
return;
|
||||
@ -3716,7 +4224,6 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
|
||||
struct slab *slab, void *head, void *tail,
|
||||
int cnt, unsigned long addr)
|
||||
{
|
||||
void *tail_obj = tail ? : head;
|
||||
struct kmem_cache_cpu *c;
|
||||
unsigned long tid;
|
||||
void **freelist;
|
||||
@ -3735,14 +4242,14 @@ redo:
|
||||
barrier();
|
||||
|
||||
if (unlikely(slab != c->slab)) {
|
||||
__slab_free(s, slab, head, tail_obj, cnt, addr);
|
||||
__slab_free(s, slab, head, tail, cnt, addr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (USE_LOCKLESS_FAST_PATH()) {
|
||||
freelist = READ_ONCE(c->freelist);
|
||||
|
||||
set_freepointer(s, tail_obj, freelist);
|
||||
set_freepointer(s, tail, freelist);
|
||||
|
||||
if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) {
|
||||
note_cmpxchg_failure("slab_free", s, tid);
|
||||
@ -3759,60 +4266,143 @@ redo:
|
||||
tid = c->tid;
|
||||
freelist = c->freelist;
|
||||
|
||||
set_freepointer(s, tail_obj, freelist);
|
||||
set_freepointer(s, tail, freelist);
|
||||
c->freelist = head;
|
||||
c->tid = next_tid(tid);
|
||||
|
||||
local_unlock(&s->cpu_slab->lock);
|
||||
}
|
||||
stat(s, FREE_FASTPATH);
|
||||
stat_add(s, FREE_FASTPATH, cnt);
|
||||
}
|
||||
#else /* CONFIG_SLUB_TINY */
|
||||
static void do_slab_free(struct kmem_cache *s,
|
||||
struct slab *slab, void *head, void *tail,
|
||||
int cnt, unsigned long addr)
|
||||
{
|
||||
void *tail_obj = tail ? : head;
|
||||
|
||||
__slab_free(s, slab, head, tail_obj, cnt, addr);
|
||||
__slab_free(s, slab, head, tail, cnt, addr);
|
||||
}
|
||||
#endif /* CONFIG_SLUB_TINY */
|
||||
|
||||
static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab,
|
||||
void *head, void *tail, void **p, int cnt,
|
||||
unsigned long addr)
|
||||
static __fastpath_inline
|
||||
void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
|
||||
unsigned long addr)
|
||||
{
|
||||
memcg_slab_free_hook(s, slab, &object, 1);
|
||||
|
||||
if (likely(slab_free_hook(s, object, slab_want_init_on_free(s))))
|
||||
do_slab_free(s, slab, object, object, 1, addr);
|
||||
}
|
||||
|
||||
static __fastpath_inline
|
||||
void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head,
|
||||
void *tail, void **p, int cnt, unsigned long addr)
|
||||
{
|
||||
memcg_slab_free_hook(s, slab, p, cnt);
|
||||
/*
|
||||
* With KASAN enabled slab_free_freelist_hook modifies the freelist
|
||||
* to remove objects, whose reuse must be delayed.
|
||||
*/
|
||||
if (slab_free_freelist_hook(s, &head, &tail, &cnt))
|
||||
if (likely(slab_free_freelist_hook(s, &head, &tail, &cnt)))
|
||||
do_slab_free(s, slab, head, tail, cnt, addr);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KASAN_GENERIC
|
||||
void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
|
||||
{
|
||||
do_slab_free(cache, virt_to_slab(x), x, NULL, 1, addr);
|
||||
do_slab_free(cache, virt_to_slab(x), x, x, 1, addr);
|
||||
}
|
||||
#endif
|
||||
|
||||
void __kmem_cache_free(struct kmem_cache *s, void *x, unsigned long caller)
|
||||
static inline struct kmem_cache *virt_to_cache(const void *obj)
|
||||
{
|
||||
slab_free(s, virt_to_slab(x), x, NULL, &x, 1, caller);
|
||||
struct slab *slab;
|
||||
|
||||
slab = virt_to_slab(obj);
|
||||
if (WARN_ONCE(!slab, "%s: Object is not a Slab page!\n", __func__))
|
||||
return NULL;
|
||||
return slab->slab_cache;
|
||||
}
|
||||
|
||||
static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
|
||||
{
|
||||
struct kmem_cache *cachep;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) &&
|
||||
!kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS))
|
||||
return s;
|
||||
|
||||
cachep = virt_to_cache(x);
|
||||
if (WARN(cachep && cachep != s,
|
||||
"%s: Wrong slab cache. %s but object is from %s\n",
|
||||
__func__, s->name, cachep->name))
|
||||
print_tracking(cachep, x);
|
||||
return cachep;
|
||||
}
|
||||
|
||||
/**
|
||||
* kmem_cache_free - Deallocate an object
|
||||
* @s: The cache the allocation was from.
|
||||
* @x: The previously allocated object.
|
||||
*
|
||||
* Free an object which was previously allocated from this
|
||||
* cache.
|
||||
*/
|
||||
void kmem_cache_free(struct kmem_cache *s, void *x)
|
||||
{
|
||||
s = cache_from_obj(s, x);
|
||||
if (!s)
|
||||
return;
|
||||
trace_kmem_cache_free(_RET_IP_, x, s);
|
||||
slab_free(s, virt_to_slab(x), x, NULL, &x, 1, _RET_IP_);
|
||||
slab_free(s, virt_to_slab(x), x, _RET_IP_);
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_free);
|
||||
|
||||
static void free_large_kmalloc(struct folio *folio, void *object)
|
||||
{
|
||||
unsigned int order = folio_order(folio);
|
||||
|
||||
if (WARN_ON_ONCE(order == 0))
|
||||
pr_warn_once("object pointer: 0x%p\n", object);
|
||||
|
||||
kmemleak_free(object);
|
||||
kasan_kfree_large(object);
|
||||
kmsan_kfree_large(object);
|
||||
|
||||
mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
|
||||
-(PAGE_SIZE << order));
|
||||
__free_pages(folio_page(folio, 0), order);
|
||||
}
|
||||
|
||||
/**
|
||||
* kfree - free previously allocated memory
|
||||
* @object: pointer returned by kmalloc() or kmem_cache_alloc()
|
||||
*
|
||||
* If @object is NULL, no operation is performed.
|
||||
*/
|
||||
void kfree(const void *object)
|
||||
{
|
||||
struct folio *folio;
|
||||
struct slab *slab;
|
||||
struct kmem_cache *s;
|
||||
void *x = (void *)object;
|
||||
|
||||
trace_kfree(_RET_IP_, object);
|
||||
|
||||
if (unlikely(ZERO_OR_NULL_PTR(object)))
|
||||
return;
|
||||
|
||||
folio = virt_to_folio(object);
|
||||
if (unlikely(!folio_test_slab(folio))) {
|
||||
free_large_kmalloc(folio, (void *)object);
|
||||
return;
|
||||
}
|
||||
|
||||
slab = folio_slab(folio);
|
||||
s = slab->slab_cache;
|
||||
slab_free(s, slab, x, _RET_IP_);
|
||||
}
|
||||
EXPORT_SYMBOL(kfree);
|
||||
|
||||
struct detached_freelist {
|
||||
struct slab *slab;
|
||||
void *tail;
|
||||
@ -3892,6 +4482,27 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
|
||||
return same;
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal bulk free of objects that were not initialised by the post alloc
|
||||
* hooks and thus should not be processed by the free hooks
|
||||
*/
|
||||
static void __kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
|
||||
{
|
||||
if (!size)
|
||||
return;
|
||||
|
||||
do {
|
||||
struct detached_freelist df;
|
||||
|
||||
size = build_detached_freelist(s, size, p, &df);
|
||||
if (!df.slab)
|
||||
continue;
|
||||
|
||||
do_slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt,
|
||||
_RET_IP_);
|
||||
} while (likely(size));
|
||||
}
|
||||
|
||||
/* Note that interrupts must be enabled when calling this function. */
|
||||
void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
|
||||
{
|
||||
@ -3905,15 +4516,16 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
|
||||
if (!df.slab)
|
||||
continue;
|
||||
|
||||
slab_free(df.s, df.slab, df.freelist, df.tail, &p[size], df.cnt,
|
||||
_RET_IP_);
|
||||
slab_free_bulk(df.s, df.slab, df.freelist, df.tail, &p[size],
|
||||
df.cnt, _RET_IP_);
|
||||
} while (likely(size));
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_free_bulk);
|
||||
|
||||
#ifndef CONFIG_SLUB_TINY
|
||||
static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
|
||||
size_t size, void **p, struct obj_cgroup *objcg)
|
||||
static inline
|
||||
int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
|
||||
void **p)
|
||||
{
|
||||
struct kmem_cache_cpu *c;
|
||||
unsigned long irqflags;
|
||||
@ -3967,6 +4579,7 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
|
||||
c->freelist = get_freepointer(s, object);
|
||||
p[i] = object;
|
||||
maybe_wipe_obj_freeptr(s, p[i]);
|
||||
stat(s, ALLOC_FASTPATH);
|
||||
}
|
||||
c->tid = next_tid(c->tid);
|
||||
local_unlock_irqrestore(&s->cpu_slab->lock, irqflags);
|
||||
@ -3976,14 +4589,13 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
|
||||
|
||||
error:
|
||||
slub_put_cpu_ptr(s->cpu_slab);
|
||||
slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
|
||||
kmem_cache_free_bulk(s, i, p);
|
||||
__kmem_cache_free_bulk(s, i, p);
|
||||
return 0;
|
||||
|
||||
}
|
||||
#else /* CONFIG_SLUB_TINY */
|
||||
static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
|
||||
size_t size, void **p, struct obj_cgroup *objcg)
|
||||
size_t size, void **p)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -4006,8 +4618,7 @@ static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
|
||||
return i;
|
||||
|
||||
error:
|
||||
slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
|
||||
kmem_cache_free_bulk(s, i, p);
|
||||
__kmem_cache_free_bulk(s, i, p);
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_SLUB_TINY */
|
||||
@ -4027,15 +4638,19 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
|
||||
if (unlikely(!s))
|
||||
return 0;
|
||||
|
||||
i = __kmem_cache_alloc_bulk(s, flags, size, p, objcg);
|
||||
i = __kmem_cache_alloc_bulk(s, flags, size, p);
|
||||
|
||||
/*
|
||||
* memcg and kmem_cache debug support and memory initialization.
|
||||
* Done outside of the IRQ disabled fastpath loop.
|
||||
*/
|
||||
if (i != 0)
|
||||
if (likely(i != 0)) {
|
||||
slab_post_alloc_hook(s, objcg, flags, size, p,
|
||||
slab_want_init_on_alloc(flags, s), s->object_size);
|
||||
} else {
|
||||
memcg_slab_alloc_error_hook(s, size, objcg);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_alloc_bulk);
|
||||
|
Loading…
Reference in New Issue
Block a user