2019-06-01 11:08:18 +03:00
// SPDX-License-Identifier: GPL-2.0-only
2007-12-03 20:16:57 +03:00
/*
* DMA Pool allocator
*
* Copyright 2001 David Brownell
* Copyright 2007 Intel Corporation
* Author : Matthew Wilcox < willy @ linux . intel . com >
*
* This allocator returns small blocks of a given size which are DMA - able by
* the given device . It uses the dma_alloc_coherent page allocator to get
* new pages , then splits them up into blocks of the required size .
* Many older drivers still have their own code to do this .
*
* The current design of this allocator is fairly simple . The pool is
* represented by the ' struct dma_pool ' which keeps a doubly - linked list of
* allocated pages . Each page in the page_list is split into blocks of at
2007-12-03 22:08:28 +03:00
* least ' size ' bytes . Free blocks are tracked in an unsorted singly - linked
* list of free blocks within the page . Used blocks aren ' t tracked , but we
* keep a count of how many are currently allocated from each page .
2007-12-03 20:16:57 +03:00
*/
2005-04-17 02:20:36 +04:00
# include <linux/device.h>
# include <linux/dma-mapping.h>
# include <linux/dmapool.h>
2007-12-03 20:16:57 +03:00
# include <linux/kernel.h>
# include <linux/list.h>
2011-10-16 10:01:52 +04:00
# include <linux/export.h>
2007-12-03 20:16:57 +03:00
# include <linux/mutex.h>
2006-06-27 13:53:52 +04:00
# include <linux/poison.h>
Detach sched.h from mm.h
First thing mm.h does is including sched.h solely for can_do_mlock() inline
function which has "current" dereference inside. By dealing with can_do_mlock()
mm.h can be detached from sched.h which is good. See below, why.
This patch
a) removes unconditional inclusion of sched.h from mm.h
b) makes can_do_mlock() normal function in mm/mlock.c
c) exports can_do_mlock() to not break compilation
d) adds sched.h inclusions back to files that were getting it indirectly.
e) adds less bloated headers to some files (asm/signal.h, jiffies.h) that were
getting them indirectly
Net result is:
a) mm.h users would get less code to open, read, preprocess, parse, ... if
they don't need sched.h
b) sched.h stops being dependency for significant number of files:
on x86_64 allmodconfig touching sched.h results in recompile of 4083 files,
after patch it's only 3744 (-8.3%).
Cross-compile tested on
all arm defconfigs, all mips defconfigs, all powerpc defconfigs,
alpha alpha-up
arm
i386 i386-up i386-defconfig i386-allnoconfig
ia64 ia64-up
m68k
mips
parisc parisc-up
powerpc powerpc-up
s390 s390-up
sparc sparc-up
sparc64 sparc64-up
um-x86_64
x86_64 x86_64-up x86_64-defconfig x86_64-allnoconfig
as well as my two usual configs.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-21 01:22:52 +04:00
# include <linux/sched.h>
2021-02-26 04:18:41 +03:00
# include <linux/sched/mm.h>
2007-12-03 20:16:57 +03:00
# include <linux/slab.h>
2011-10-16 10:03:46 +04:00
# include <linux/stat.h>
2007-12-03 20:16:57 +03:00
# include <linux/spinlock.h>
# include <linux/string.h>
# include <linux/types.h>
# include <linux/wait.h>
2005-04-17 02:20:36 +04:00
2008-04-28 13:12:37 +04:00
# if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
# define DMAPOOL_DEBUG 1
# endif
2007-12-03 20:04:31 +03:00
struct dma_pool { /* the pool */
struct list_head page_list ;
spinlock_t lock ;
size_t size ;
struct device * dev ;
size_t allocation ;
2007-12-03 22:16:24 +03:00
size_t boundary ;
2007-12-03 20:04:31 +03:00
char name [ 32 ] ;
struct list_head pools ;
2005-04-17 02:20:36 +04:00
} ;
2007-12-03 20:04:31 +03:00
struct dma_page { /* cacheable header for 'allocation' bytes */
struct list_head page_list ;
void * vaddr ;
dma_addr_t dma ;
2007-12-03 22:08:28 +03:00
unsigned int in_use ;
unsigned int offset ;
2005-04-17 02:20:36 +04:00
} ;
2007-12-03 20:04:31 +03:00
static DEFINE_MUTEX ( pools_lock ) ;
2014-10-10 02:28:50 +04:00
static DEFINE_MUTEX ( pools_reg_lock ) ;
2005-04-17 02:20:36 +04:00
static ssize_t
2007-12-03 20:04:31 +03:00
show_pools ( struct device * dev , struct device_attribute * attr , char * buf )
2005-04-17 02:20:36 +04:00
{
unsigned temp ;
unsigned size ;
char * next ;
struct dma_page * page ;
struct dma_pool * pool ;
next = buf ;
size = PAGE_SIZE ;
temp = scnprintf ( next , size , " poolinfo - 0.1 \n " ) ;
size - = temp ;
next + = temp ;
2007-04-25 00:45:25 +04:00
mutex_lock ( & pools_lock ) ;
2005-04-17 02:20:36 +04:00
list_for_each_entry ( pool , & dev - > dma_pools , pools ) {
unsigned pages = 0 ;
unsigned blocks = 0 ;
2009-06-30 22:41:25 +04:00
spin_lock_irq ( & pool - > lock ) ;
2005-04-17 02:20:36 +04:00
list_for_each_entry ( page , & pool - > page_list , page_list ) {
pages + + ;
blocks + = page - > in_use ;
}
2009-06-30 22:41:25 +04:00
spin_unlock_irq ( & pool - > lock ) ;
2005-04-17 02:20:36 +04:00
/* per-pool info, no real statistics yet */
2017-02-28 01:30:02 +03:00
temp = scnprintf ( next , size , " %-16s %4u %4zu %4zu %2u \n " ,
2007-12-03 22:08:28 +03:00
pool - > name , blocks ,
pages * ( pool - > allocation / pool - > size ) ,
2007-12-03 20:04:31 +03:00
pool - > size , pages ) ;
2005-04-17 02:20:36 +04:00
size - = temp ;
next + = temp ;
}
2007-04-25 00:45:25 +04:00
mutex_unlock ( & pools_lock ) ;
2005-04-17 02:20:36 +04:00
return PAGE_SIZE - size ;
}
2007-12-03 20:04:31 +03:00
2018-06-15 01:27:58 +03:00
static DEVICE_ATTR ( pools , 0444 , show_pools , NULL ) ;
2005-04-17 02:20:36 +04:00
/**
* dma_pool_create - Creates a pool of consistent memory blocks , for dma .
* @ name : name of pool , for diagnostics
* @ dev : device that will be doing the DMA
* @ size : size of the blocks in this pool .
* @ align : alignment requirement for blocks ; must be a power of two
2007-12-03 22:16:24 +03:00
* @ boundary : returned blocks won ' t cross this power of two boundary
2019-03-06 02:48:42 +03:00
* Context : not in_interrupt ( )
2005-04-17 02:20:36 +04:00
*
2019-03-06 02:48:42 +03:00
* Given one of these pools , dma_pool_alloc ( )
2005-04-17 02:20:36 +04:00
* may be used to allocate memory . Such memory will all have " consistent "
* DMA mappings , accessible by the device and its driver without using
* cache flushing primitives . The actual size of blocks allocated may be
* larger than requested because of alignment .
*
2007-12-03 22:16:24 +03:00
* If @ boundary is nonzero , objects returned from dma_pool_alloc ( ) won ' t
2005-04-17 02:20:36 +04:00
* cross that size boundary . This is useful for devices which have
* addressing restrictions on individual DMA transfers , such as not crossing
* boundaries of 4 KBytes .
2019-03-06 02:48:42 +03:00
*
* Return : a dma allocation pool with the requested characteristics , or
* % NULL if one can ' t be created .
2005-04-17 02:20:36 +04:00
*/
2007-12-03 20:04:31 +03:00
struct dma_pool * dma_pool_create ( const char * name , struct device * dev ,
2007-12-03 22:16:24 +03:00
size_t size , size_t align , size_t boundary )
2005-04-17 02:20:36 +04:00
{
2007-12-03 20:04:31 +03:00
struct dma_pool * retval ;
2007-12-03 22:16:24 +03:00
size_t allocation ;
2014-10-10 02:28:50 +04:00
bool empty = false ;
2005-04-17 02:20:36 +04:00
2014-10-10 02:29:11 +04:00
if ( align = = 0 )
2005-04-17 02:20:36 +04:00
align = 1 ;
2014-10-10 02:29:11 +04:00
else if ( align & ( align - 1 ) )
2005-04-17 02:20:36 +04:00
return NULL ;
2014-10-10 02:29:11 +04:00
if ( size = = 0 )
2007-12-03 20:10:24 +03:00
return NULL ;
2014-10-10 02:29:11 +04:00
else if ( size < 4 )
2007-12-03 22:08:28 +03:00
size = 4 ;
2007-12-03 20:10:24 +03:00
2020-04-07 06:08:49 +03:00
size = ALIGN ( size , align ) ;
2007-12-03 22:16:24 +03:00
allocation = max_t ( size_t , size , PAGE_SIZE ) ;
2014-10-10 02:29:11 +04:00
if ( ! boundary )
2007-12-03 22:16:24 +03:00
boundary = allocation ;
2014-10-10 02:29:11 +04:00
else if ( ( boundary < size ) | | ( boundary & ( boundary - 1 ) ) )
2005-04-17 02:20:36 +04:00
return NULL ;
2007-12-03 22:16:24 +03:00
retval = kmalloc_node ( sizeof ( * retval ) , GFP_KERNEL , dev_to_node ( dev ) ) ;
if ( ! retval )
2005-04-17 02:20:36 +04:00
return retval ;
2021-04-30 08:57:55 +03:00
strscpy ( retval - > name , name , sizeof ( retval - > name ) ) ;
2005-04-17 02:20:36 +04:00
retval - > dev = dev ;
2007-12-03 20:04:31 +03:00
INIT_LIST_HEAD ( & retval - > page_list ) ;
spin_lock_init ( & retval - > lock ) ;
2005-04-17 02:20:36 +04:00
retval - > size = size ;
2007-12-03 22:16:24 +03:00
retval - > boundary = boundary ;
2005-04-17 02:20:36 +04:00
retval - > allocation = allocation ;
2014-06-05 03:08:05 +04:00
INIT_LIST_HEAD ( & retval - > pools ) ;
2014-10-10 02:28:50 +04:00
/*
* pools_lock ensures that the - > dma_pools list does not get corrupted .
* pools_reg_lock ensures that there is not a race between
* dma_pool_create ( ) and dma_pool_destroy ( ) or within dma_pool_create ( )
* when the first invocation of dma_pool_create ( ) failed on
* device_create_file ( ) and the second assumes that it has been done ( I
* know it is a short window ) .
*/
mutex_lock ( & pools_reg_lock ) ;
2014-06-05 03:08:05 +04:00
mutex_lock ( & pools_lock ) ;
2014-10-10 02:28:50 +04:00
if ( list_empty ( & dev - > dma_pools ) )
empty = true ;
list_add ( & retval - > pools , & dev - > dma_pools ) ;
2014-06-05 03:08:05 +04:00
mutex_unlock ( & pools_lock ) ;
2014-10-10 02:28:50 +04:00
if ( empty ) {
int err ;
err = device_create_file ( dev , & dev_attr_pools ) ;
if ( err ) {
mutex_lock ( & pools_lock ) ;
list_del ( & retval - > pools ) ;
mutex_unlock ( & pools_lock ) ;
mutex_unlock ( & pools_reg_lock ) ;
kfree ( retval ) ;
return NULL ;
}
}
mutex_unlock ( & pools_reg_lock ) ;
2005-04-17 02:20:36 +04:00
return retval ;
}
2007-12-03 20:04:31 +03:00
EXPORT_SYMBOL ( dma_pool_create ) ;
2005-04-17 02:20:36 +04:00
2007-12-03 22:08:28 +03:00
static void pool_initialise_page ( struct dma_pool * pool , struct dma_page * page )
{
unsigned int offset = 0 ;
2007-12-03 22:16:24 +03:00
unsigned int next_boundary = pool - > boundary ;
2007-12-03 22:08:28 +03:00
do {
unsigned int next = offset + pool - > size ;
2007-12-03 22:16:24 +03:00
if ( unlikely ( ( next + pool - > size ) > = next_boundary ) ) {
next = next_boundary ;
next_boundary + = pool - > boundary ;
}
2007-12-03 22:08:28 +03:00
* ( int * ) ( page - > vaddr + offset ) = next ;
offset = next ;
} while ( offset < pool - > allocation ) ;
}
2007-12-03 20:04:31 +03:00
static struct dma_page * pool_alloc_page ( struct dma_pool * pool , gfp_t mem_flags )
2005-04-17 02:20:36 +04:00
{
2007-12-03 20:04:31 +03:00
struct dma_page * page ;
2005-04-17 02:20:36 +04:00
2007-12-03 22:08:28 +03:00
page = kmalloc ( sizeof ( * page ) , mem_flags ) ;
2005-04-17 02:20:36 +04:00
if ( ! page )
return NULL ;
2007-12-03 22:08:28 +03:00
page - > vaddr = dma_alloc_coherent ( pool - > dev , pool - > allocation ,
2007-12-03 20:04:31 +03:00
& page - > dma , mem_flags ) ;
2005-04-17 02:20:36 +04:00
if ( page - > vaddr ) {
2008-04-28 13:12:37 +04:00
# ifdef DMAPOOL_DEBUG
2007-12-03 20:04:31 +03:00
memset ( page - > vaddr , POOL_POISON_FREED , pool - > allocation ) ;
2005-04-17 02:20:36 +04:00
# endif
2007-12-03 22:08:28 +03:00
pool_initialise_page ( pool , page ) ;
2005-04-17 02:20:36 +04:00
page - > in_use = 0 ;
2007-12-03 22:08:28 +03:00
page - > offset = 0 ;
2005-04-17 02:20:36 +04:00
} else {
2007-12-03 20:04:31 +03:00
kfree ( page ) ;
2005-04-17 02:20:36 +04:00
page = NULL ;
}
return page ;
}
2015-09-05 01:48:19 +03:00
static inline bool is_page_busy ( struct dma_page * page )
2005-04-17 02:20:36 +04:00
{
2007-12-03 22:08:28 +03:00
return page - > in_use ! = 0 ;
2005-04-17 02:20:36 +04:00
}
2007-12-03 20:04:31 +03:00
static void pool_free_page ( struct dma_pool * pool , struct dma_page * page )
2005-04-17 02:20:36 +04:00
{
2007-12-03 20:04:31 +03:00
dma_addr_t dma = page - > dma ;
2005-04-17 02:20:36 +04:00
2008-04-28 13:12:37 +04:00
# ifdef DMAPOOL_DEBUG
2007-12-03 20:04:31 +03:00
memset ( page - > vaddr , POOL_POISON_FREED , pool - > allocation ) ;
2005-04-17 02:20:36 +04:00
# endif
2007-12-03 20:04:31 +03:00
dma_free_coherent ( pool - > dev , pool - > allocation , page - > vaddr , dma ) ;
list_del ( & page - > page_list ) ;
kfree ( page ) ;
2005-04-17 02:20:36 +04:00
}
/**
* dma_pool_destroy - destroys a pool of dma memory blocks .
* @ pool : dma pool that will be destroyed
* Context : ! in_interrupt ( )
*
* Caller guarantees that no more memory from the pool is in use ,
* and that nothing will try to use the pool after this call .
*/
2007-12-03 20:04:31 +03:00
void dma_pool_destroy ( struct dma_pool * pool )
2005-04-17 02:20:36 +04:00
{
2020-10-14 02:54:35 +03:00
struct dma_page * page , * tmp ;
2014-10-10 02:28:50 +04:00
bool empty = false ;
2015-09-09 01:00:56 +03:00
if ( unlikely ( ! pool ) )
return ;
2014-10-10 02:28:50 +04:00
mutex_lock ( & pools_reg_lock ) ;
2007-04-25 00:45:25 +04:00
mutex_lock ( & pools_lock ) ;
2007-12-03 20:04:31 +03:00
list_del ( & pool - > pools ) ;
if ( pool - > dev & & list_empty ( & pool - > dev - > dma_pools ) )
2014-10-10 02:28:50 +04:00
empty = true ;
2007-04-25 00:45:25 +04:00
mutex_unlock ( & pools_lock ) ;
2014-10-10 02:28:50 +04:00
if ( empty )
device_remove_file ( pool - > dev , & dev_attr_pools ) ;
mutex_unlock ( & pools_reg_lock ) ;
2005-04-17 02:20:36 +04:00
2020-10-14 02:54:35 +03:00
list_for_each_entry_safe ( page , tmp , & pool - > page_list , page_list ) {
2007-12-03 22:08:28 +03:00
if ( is_page_busy ( page ) ) {
2005-04-17 02:20:36 +04:00
if ( pool - > dev )
2020-10-14 02:54:38 +03:00
dev_err ( pool - > dev , " %s %s, %p busy \n " , __func__ ,
2005-04-17 02:20:36 +04:00
pool - > name , page - > vaddr ) ;
else
2020-10-14 02:54:38 +03:00
pr_err ( " %s %s, %p busy \n " , __func__ ,
2007-12-03 20:04:31 +03:00
pool - > name , page - > vaddr ) ;
2005-04-17 02:20:36 +04:00
/* leak the still-in-use consistent memory */
2007-12-03 20:04:31 +03:00
list_del ( & page - > page_list ) ;
kfree ( page ) ;
2005-04-17 02:20:36 +04:00
} else
2007-12-03 20:04:31 +03:00
pool_free_page ( pool , page ) ;
2005-04-17 02:20:36 +04:00
}
2007-12-03 20:04:31 +03:00
kfree ( pool ) ;
2005-04-17 02:20:36 +04:00
}
2007-12-03 20:04:31 +03:00
EXPORT_SYMBOL ( dma_pool_destroy ) ;
2005-04-17 02:20:36 +04:00
/**
* dma_pool_alloc - get a block of consistent memory
* @ pool : dma pool that will produce the block
* @ mem_flags : GFP_ * bitmask
* @ handle : pointer to dma address of block
*
2019-03-06 02:48:42 +03:00
* Return : the kernel virtual address of a currently unused block ,
2005-04-17 02:20:36 +04:00
* and reports its dma address through the handle .
2007-12-03 20:16:57 +03:00
* If such a memory block can ' t be allocated , % NULL is returned .
2005-04-17 02:20:36 +04:00
*/
2007-12-03 20:04:31 +03:00
void * dma_pool_alloc ( struct dma_pool * pool , gfp_t mem_flags ,
dma_addr_t * handle )
2005-04-17 02:20:36 +04:00
{
2007-12-03 20:04:31 +03:00
unsigned long flags ;
struct dma_page * page ;
size_t offset ;
void * retval ;
2021-02-26 04:18:41 +03:00
might_alloc ( mem_flags ) ;
2010-10-27 01:21:54 +04:00
2007-12-03 20:04:31 +03:00
spin_lock_irqsave ( & pool - > lock , flags ) ;
2005-04-17 02:20:36 +04:00
list_for_each_entry ( page , & pool - > page_list , page_list ) {
2007-12-03 22:08:28 +03:00
if ( page - > offset < pool - > allocation )
goto ready ;
2005-04-17 02:20:36 +04:00
}
2012-11-07 18:37:07 +04:00
/* pool_alloc_page() might sleep, so temporarily drop &pool->lock */
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
2005-04-17 02:20:36 +04:00
2015-09-09 01:02:24 +03:00
page = pool_alloc_page ( pool , mem_flags & ( ~ __GFP_ZERO ) ) ;
2012-11-07 18:37:07 +04:00
if ( ! page )
return NULL ;
2005-04-17 02:20:36 +04:00
2012-11-07 18:37:07 +04:00
spin_lock_irqsave ( & pool - > lock , flags ) ;
2005-04-17 02:20:36 +04:00
2012-11-07 18:37:07 +04:00
list_add ( & page - > page_list , & pool - > page_list ) ;
2007-12-03 20:04:31 +03:00
ready :
2005-04-17 02:20:36 +04:00
page - > in_use + + ;
2007-12-03 22:08:28 +03:00
offset = page - > offset ;
page - > offset = * ( int * ) ( page - > vaddr + offset ) ;
2005-04-17 02:20:36 +04:00
retval = offset + page - > vaddr ;
* handle = offset + page - > dma ;
2008-04-28 13:12:37 +04:00
# ifdef DMAPOOL_DEBUG
2012-12-12 04:01:31 +04:00
{
int i ;
u8 * data = retval ;
/* page->offset is stored in first 4 bytes */
for ( i = sizeof ( page - > offset ) ; i < pool - > size ; i + + ) {
if ( data [ i ] = = POOL_POISON_FREED )
continue ;
if ( pool - > dev )
2020-10-14 02:54:38 +03:00
dev_err ( pool - > dev , " %s %s, %p (corrupted) \n " ,
__func__ , pool - > name , retval ) ;
2012-12-12 04:01:31 +04:00
else
2020-10-14 02:54:38 +03:00
pr_err ( " %s %s, %p (corrupted) \n " ,
__func__ , pool - > name , retval ) ;
2012-12-12 04:01:31 +04:00
/*
* Dump the first 4 bytes even if they are not
* POOL_POISON_FREED
*/
print_hex_dump ( KERN_ERR , " " , DUMP_PREFIX_OFFSET , 16 , 1 ,
data , pool - > size , 1 ) ;
break ;
}
}
2015-09-09 01:02:24 +03:00
if ( ! ( mem_flags & __GFP_ZERO ) )
memset ( retval , POOL_POISON_ALLOCATED , pool - > size ) ;
2005-04-17 02:20:36 +04:00
# endif
2007-12-03 20:04:31 +03:00
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
2015-09-09 01:02:24 +03:00
mm: security: introduce init_on_alloc=1 and init_on_free=1 boot options
Patch series "add init_on_alloc/init_on_free boot options", v10.
Provide init_on_alloc and init_on_free boot options.
These are aimed at preventing possible information leaks and making the
control-flow bugs that depend on uninitialized values more deterministic.
Enabling either of the options guarantees that the memory returned by the
page allocator and SL[AU]B is initialized with zeroes. SLOB allocator
isn't supported at the moment, as its emulation of kmem caches complicates
handling of SLAB_TYPESAFE_BY_RCU caches correctly.
Enabling init_on_free also guarantees that pages and heap objects are
initialized right after they're freed, so it won't be possible to access
stale data by using a dangling pointer.
As suggested by Michal Hocko, right now we don't let the heap users to
disable initialization for certain allocations. There's not enough
evidence that doing so can speed up real-life cases, and introducing ways
to opt-out may result in things going out of control.
This patch (of 2):
The new options are needed to prevent possible information leaks and make
control-flow bugs that depend on uninitialized values more deterministic.
This is expected to be on-by-default on Android and Chrome OS. And it
gives the opportunity for anyone else to use it under distros too via the
boot args. (The init_on_free feature is regularly requested by folks
where memory forensics is included in their threat models.)
init_on_alloc=1 makes the kernel initialize newly allocated pages and heap
objects with zeroes. Initialization is done at allocation time at the
places where checks for __GFP_ZERO are performed.
init_on_free=1 makes the kernel initialize freed pages and heap objects
with zeroes upon their deletion. This helps to ensure sensitive data
doesn't leak via use-after-free accesses.
Both init_on_alloc=1 and init_on_free=1 guarantee that the allocator
returns zeroed memory. The two exceptions are slab caches with
constructors and SLAB_TYPESAFE_BY_RCU flag. Those are never
zero-initialized to preserve their semantics.
Both init_on_alloc and init_on_free default to zero, but those defaults
can be overridden with CONFIG_INIT_ON_ALLOC_DEFAULT_ON and
CONFIG_INIT_ON_FREE_DEFAULT_ON.
If either SLUB poisoning or page poisoning is enabled, those options take
precedence over init_on_alloc and init_on_free: initialization is only
applied to unpoisoned allocations.
Slowdown for the new features compared to init_on_free=0, init_on_alloc=0:
hackbench, init_on_free=1: +7.62% sys time (st.err 0.74%)
hackbench, init_on_alloc=1: +7.75% sys time (st.err 2.14%)
Linux build with -j12, init_on_free=1: +8.38% wall time (st.err 0.39%)
Linux build with -j12, init_on_free=1: +24.42% sys time (st.err 0.52%)
Linux build with -j12, init_on_alloc=1: -0.13% wall time (st.err 0.42%)
Linux build with -j12, init_on_alloc=1: +0.57% sys time (st.err 0.40%)
The slowdown for init_on_free=0, init_on_alloc=0 compared to the baseline
is within the standard error.
The new features are also going to pave the way for hardware memory
tagging (e.g. arm64's MTE), which will require both on_alloc and on_free
hooks to set the tags for heap objects. With MTE, tagging will have the
same cost as memory initialization.
Although init_on_free is rather costly, there are paranoid use-cases where
in-memory data lifetime is desired to be minimized. There are various
arguments for/against the realism of the associated threat models, but
given that we'll need the infrastructure for MTE anyway, and there are
people who want wipe-on-free behavior no matter what the performance cost,
it seems reasonable to include it in this series.
[glider@google.com: v8]
Link: http://lkml.kernel.org/r/20190626121943.131390-2-glider@google.com
[glider@google.com: v9]
Link: http://lkml.kernel.org/r/20190627130316.254309-2-glider@google.com
[glider@google.com: v10]
Link: http://lkml.kernel.org/r/20190628093131.199499-2-glider@google.com
Link: http://lkml.kernel.org/r/20190617151050.92663-2-glider@google.com
Signed-off-by: Alexander Potapenko <glider@google.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Michal Hocko <mhocko@suse.cz> [page and dmapool parts
Acked-by: James Morris <jamorris@linux.microsoft.com>]
Cc: Christoph Lameter <cl@linux.com>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Kostya Serebryany <kcc@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Sandeep Patil <sspatil@android.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Jann Horn <jannh@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Marco Elver <elver@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-07-12 06:59:19 +03:00
if ( want_init_on_alloc ( mem_flags ) )
2015-09-09 01:02:24 +03:00
memset ( retval , 0 , pool - > size ) ;
2005-04-17 02:20:36 +04:00
return retval ;
}
2007-12-03 20:04:31 +03:00
EXPORT_SYMBOL ( dma_pool_alloc ) ;
2005-04-17 02:20:36 +04:00
2007-12-03 20:04:31 +03:00
static struct dma_page * pool_find_page ( struct dma_pool * pool , dma_addr_t dma )
2005-04-17 02:20:36 +04:00
{
2007-12-03 20:04:31 +03:00
struct dma_page * page ;
2005-04-17 02:20:36 +04:00
list_for_each_entry ( page , & pool - > page_list , page_list ) {
if ( dma < page - > dma )
continue ;
2015-10-02 01:37:19 +03:00
if ( ( dma - page - > dma ) < pool - > allocation )
2011-01-14 02:47:24 +03:00
return page ;
2005-04-17 02:20:36 +04:00
}
2011-01-14 02:47:24 +03:00
return NULL ;
2005-04-17 02:20:36 +04:00
}
/**
* dma_pool_free - put block back into dma pool
* @ pool : the dma pool holding the block
* @ vaddr : virtual address of block
* @ dma : dma address of block
*
* Caller promises neither device nor driver will again touch this block
* unless it is first re - allocated .
*/
2007-12-03 20:04:31 +03:00
void dma_pool_free ( struct dma_pool * pool , void * vaddr , dma_addr_t dma )
2005-04-17 02:20:36 +04:00
{
2007-12-03 20:04:31 +03:00
struct dma_page * page ;
unsigned long flags ;
2007-12-03 22:08:28 +03:00
unsigned int offset ;
2005-04-17 02:20:36 +04:00
2011-01-14 02:47:24 +03:00
spin_lock_irqsave ( & pool - > lock , flags ) ;
2007-12-03 20:04:31 +03:00
page = pool_find_page ( pool , dma ) ;
if ( ! page ) {
2011-01-14 02:47:24 +03:00
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
2005-04-17 02:20:36 +04:00
if ( pool - > dev )
2020-10-14 02:54:38 +03:00
dev_err ( pool - > dev , " %s %s, %p/%pad (bad dma) \n " ,
__func__ , pool - > name , vaddr , & dma ) ;
2005-04-17 02:20:36 +04:00
else
2020-10-14 02:54:38 +03:00
pr_err ( " %s %s, %p/%pad (bad dma) \n " ,
__func__ , pool - > name , vaddr , & dma ) ;
2005-04-17 02:20:36 +04:00
return ;
}
2007-12-03 22:08:28 +03:00
offset = vaddr - page - > vaddr ;
mm: security: introduce init_on_alloc=1 and init_on_free=1 boot options
Patch series "add init_on_alloc/init_on_free boot options", v10.
Provide init_on_alloc and init_on_free boot options.
These are aimed at preventing possible information leaks and making the
control-flow bugs that depend on uninitialized values more deterministic.
Enabling either of the options guarantees that the memory returned by the
page allocator and SL[AU]B is initialized with zeroes. SLOB allocator
isn't supported at the moment, as its emulation of kmem caches complicates
handling of SLAB_TYPESAFE_BY_RCU caches correctly.
Enabling init_on_free also guarantees that pages and heap objects are
initialized right after they're freed, so it won't be possible to access
stale data by using a dangling pointer.
As suggested by Michal Hocko, right now we don't let the heap users to
disable initialization for certain allocations. There's not enough
evidence that doing so can speed up real-life cases, and introducing ways
to opt-out may result in things going out of control.
This patch (of 2):
The new options are needed to prevent possible information leaks and make
control-flow bugs that depend on uninitialized values more deterministic.
This is expected to be on-by-default on Android and Chrome OS. And it
gives the opportunity for anyone else to use it under distros too via the
boot args. (The init_on_free feature is regularly requested by folks
where memory forensics is included in their threat models.)
init_on_alloc=1 makes the kernel initialize newly allocated pages and heap
objects with zeroes. Initialization is done at allocation time at the
places where checks for __GFP_ZERO are performed.
init_on_free=1 makes the kernel initialize freed pages and heap objects
with zeroes upon their deletion. This helps to ensure sensitive data
doesn't leak via use-after-free accesses.
Both init_on_alloc=1 and init_on_free=1 guarantee that the allocator
returns zeroed memory. The two exceptions are slab caches with
constructors and SLAB_TYPESAFE_BY_RCU flag. Those are never
zero-initialized to preserve their semantics.
Both init_on_alloc and init_on_free default to zero, but those defaults
can be overridden with CONFIG_INIT_ON_ALLOC_DEFAULT_ON and
CONFIG_INIT_ON_FREE_DEFAULT_ON.
If either SLUB poisoning or page poisoning is enabled, those options take
precedence over init_on_alloc and init_on_free: initialization is only
applied to unpoisoned allocations.
Slowdown for the new features compared to init_on_free=0, init_on_alloc=0:
hackbench, init_on_free=1: +7.62% sys time (st.err 0.74%)
hackbench, init_on_alloc=1: +7.75% sys time (st.err 2.14%)
Linux build with -j12, init_on_free=1: +8.38% wall time (st.err 0.39%)
Linux build with -j12, init_on_free=1: +24.42% sys time (st.err 0.52%)
Linux build with -j12, init_on_alloc=1: -0.13% wall time (st.err 0.42%)
Linux build with -j12, init_on_alloc=1: +0.57% sys time (st.err 0.40%)
The slowdown for init_on_free=0, init_on_alloc=0 compared to the baseline
is within the standard error.
The new features are also going to pave the way for hardware memory
tagging (e.g. arm64's MTE), which will require both on_alloc and on_free
hooks to set the tags for heap objects. With MTE, tagging will have the
same cost as memory initialization.
Although init_on_free is rather costly, there are paranoid use-cases where
in-memory data lifetime is desired to be minimized. There are various
arguments for/against the realism of the associated threat models, but
given that we'll need the infrastructure for MTE anyway, and there are
people who want wipe-on-free behavior no matter what the performance cost,
it seems reasonable to include it in this series.
[glider@google.com: v8]
Link: http://lkml.kernel.org/r/20190626121943.131390-2-glider@google.com
[glider@google.com: v9]
Link: http://lkml.kernel.org/r/20190627130316.254309-2-glider@google.com
[glider@google.com: v10]
Link: http://lkml.kernel.org/r/20190628093131.199499-2-glider@google.com
Link: http://lkml.kernel.org/r/20190617151050.92663-2-glider@google.com
Signed-off-by: Alexander Potapenko <glider@google.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Michal Hocko <mhocko@suse.cz> [page and dmapool parts
Acked-by: James Morris <jamorris@linux.microsoft.com>]
Cc: Christoph Lameter <cl@linux.com>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Kostya Serebryany <kcc@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Sandeep Patil <sspatil@android.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Jann Horn <jannh@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Marco Elver <elver@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-07-12 06:59:19 +03:00
if ( want_init_on_free ( ) )
memset ( vaddr , 0 , pool - > size ) ;
2008-04-28 13:12:37 +04:00
# ifdef DMAPOOL_DEBUG
2007-12-03 22:08:28 +03:00
if ( ( dma - page - > dma ) ! = offset ) {
2011-01-14 02:47:24 +03:00
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
2005-04-17 02:20:36 +04:00
if ( pool - > dev )
2020-10-14 02:54:38 +03:00
dev_err ( pool - > dev , " %s %s, %p (bad vaddr)/%pad \n " ,
__func__ , pool - > name , vaddr , & dma ) ;
2005-04-17 02:20:36 +04:00
else
2020-10-14 02:54:38 +03:00
pr_err ( " %s %s, %p (bad vaddr)/%pad \n " ,
__func__ , pool - > name , vaddr , & dma ) ;
2005-04-17 02:20:36 +04:00
return ;
}
2007-12-03 22:08:28 +03:00
{
unsigned int chain = page - > offset ;
while ( chain < pool - > allocation ) {
if ( chain ! = offset ) {
chain = * ( int * ) ( page - > vaddr + chain ) ;
continue ;
}
2011-01-14 02:47:24 +03:00
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
2007-12-03 22:08:28 +03:00
if ( pool - > dev )
2020-10-14 02:54:38 +03:00
dev_err ( pool - > dev , " %s %s, dma %pad already free \n " ,
__func__ , pool - > name , & dma ) ;
2007-12-03 22:08:28 +03:00
else
2020-10-14 02:54:38 +03:00
pr_err ( " %s %s, dma %pad already free \n " ,
__func__ , pool - > name , & dma ) ;
2007-12-03 22:08:28 +03:00
return ;
}
2005-04-17 02:20:36 +04:00
}
2007-12-03 20:04:31 +03:00
memset ( vaddr , POOL_POISON_FREED , pool - > size ) ;
2005-04-17 02:20:36 +04:00
# endif
page - > in_use - - ;
2007-12-03 22:08:28 +03:00
* ( int * ) vaddr = page - > offset ;
page - > offset = offset ;
2005-04-17 02:20:36 +04:00
/*
* Resist a temptation to do
2007-12-03 22:08:28 +03:00
* if ( ! is_page_busy ( page ) ) pool_free_page ( pool , page ) ;
2005-04-17 02:20:36 +04:00
* Better have a few empty pages hang around .
*/
2007-12-03 20:04:31 +03:00
spin_unlock_irqrestore ( & pool - > lock , flags ) ;
2005-04-17 02:20:36 +04:00
}
2007-12-03 20:04:31 +03:00
EXPORT_SYMBOL ( dma_pool_free ) ;
2005-04-17 02:20:36 +04:00
devres: device resource management
Implement device resource management, in short, devres. A device
driver can allocate arbirary size of devres data which is associated
with a release function. On driver detach, release function is
invoked on the devres data, then, devres data is freed.
devreses are typed by associated release functions. Some devreses are
better represented by single instance of the type while others need
multiple instances sharing the same release function. Both usages are
supported.
devreses can be grouped using devres group such that a device driver
can easily release acquired resources halfway through initialization
or selectively release resources (e.g. resources for port 1 out of 4
ports).
This patch adds devres core including documentation and the following
managed interfaces.
* alloc/free : devm_kzalloc(), devm_kzfree()
* IO region : devm_request_region(), devm_release_region()
* IRQ : devm_request_irq(), devm_free_irq()
* DMA : dmam_alloc_coherent(), dmam_free_coherent(),
dmam_declare_coherent_memory(), dmam_pool_create(),
dmam_pool_destroy()
* PCI : pcim_enable_device(), pcim_pin_device(), pci_is_managed()
* iomap : devm_ioport_map(), devm_ioport_unmap(), devm_ioremap(),
devm_ioremap_nocache(), devm_iounmap(), pcim_iomap_table(),
pcim_iomap(), pcim_iounmap()
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-01-20 10:00:26 +03:00
/*
* Managed DMA pool
*/
static void dmam_pool_release ( struct device * dev , void * res )
{
struct dma_pool * pool = * ( struct dma_pool * * ) res ;
dma_pool_destroy ( pool ) ;
}
static int dmam_pool_match ( struct device * dev , void * res , void * match_data )
{
return * ( struct dma_pool * * ) res = = match_data ;
}
/**
* dmam_pool_create - Managed dma_pool_create ( )
* @ name : name of pool , for diagnostics
* @ dev : device that will be doing the DMA
* @ size : size of the blocks in this pool .
* @ align : alignment requirement for blocks ; must be a power of two
* @ allocation : returned blocks won ' t cross this boundary ( or zero )
*
* Managed dma_pool_create ( ) . DMA pool created with this function is
* automatically destroyed on driver detach .
2019-03-06 02:48:42 +03:00
*
* Return : a managed dma allocation pool with the requested
* characteristics , or % NULL if one can ' t be created .
devres: device resource management
Implement device resource management, in short, devres. A device
driver can allocate arbirary size of devres data which is associated
with a release function. On driver detach, release function is
invoked on the devres data, then, devres data is freed.
devreses are typed by associated release functions. Some devreses are
better represented by single instance of the type while others need
multiple instances sharing the same release function. Both usages are
supported.
devreses can be grouped using devres group such that a device driver
can easily release acquired resources halfway through initialization
or selectively release resources (e.g. resources for port 1 out of 4
ports).
This patch adds devres core including documentation and the following
managed interfaces.
* alloc/free : devm_kzalloc(), devm_kzfree()
* IO region : devm_request_region(), devm_release_region()
* IRQ : devm_request_irq(), devm_free_irq()
* DMA : dmam_alloc_coherent(), dmam_free_coherent(),
dmam_declare_coherent_memory(), dmam_pool_create(),
dmam_pool_destroy()
* PCI : pcim_enable_device(), pcim_pin_device(), pci_is_managed()
* iomap : devm_ioport_map(), devm_ioport_unmap(), devm_ioremap(),
devm_ioremap_nocache(), devm_iounmap(), pcim_iomap_table(),
pcim_iomap(), pcim_iounmap()
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-01-20 10:00:26 +03:00
*/
struct dma_pool * dmam_pool_create ( const char * name , struct device * dev ,
size_t size , size_t align , size_t allocation )
{
struct dma_pool * * ptr , * pool ;
ptr = devres_alloc ( dmam_pool_release , sizeof ( * ptr ) , GFP_KERNEL ) ;
if ( ! ptr )
return NULL ;
pool = * ptr = dma_pool_create ( name , dev , size , align , allocation ) ;
if ( pool )
devres_add ( dev , ptr ) ;
else
devres_free ( ptr ) ;
return pool ;
}
2007-12-03 20:04:31 +03:00
EXPORT_SYMBOL ( dmam_pool_create ) ;
devres: device resource management
Implement device resource management, in short, devres. A device
driver can allocate arbirary size of devres data which is associated
with a release function. On driver detach, release function is
invoked on the devres data, then, devres data is freed.
devreses are typed by associated release functions. Some devreses are
better represented by single instance of the type while others need
multiple instances sharing the same release function. Both usages are
supported.
devreses can be grouped using devres group such that a device driver
can easily release acquired resources halfway through initialization
or selectively release resources (e.g. resources for port 1 out of 4
ports).
This patch adds devres core including documentation and the following
managed interfaces.
* alloc/free : devm_kzalloc(), devm_kzfree()
* IO region : devm_request_region(), devm_release_region()
* IRQ : devm_request_irq(), devm_free_irq()
* DMA : dmam_alloc_coherent(), dmam_free_coherent(),
dmam_declare_coherent_memory(), dmam_pool_create(),
dmam_pool_destroy()
* PCI : pcim_enable_device(), pcim_pin_device(), pci_is_managed()
* iomap : devm_ioport_map(), devm_ioport_unmap(), devm_ioremap(),
devm_ioremap_nocache(), devm_iounmap(), pcim_iomap_table(),
pcim_iomap(), pcim_iounmap()
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-01-20 10:00:26 +03:00
/**
* dmam_pool_destroy - Managed dma_pool_destroy ( )
* @ pool : dma pool that will be destroyed
*
* Managed dma_pool_destroy ( ) .
*/
void dmam_pool_destroy ( struct dma_pool * pool )
{
struct device * dev = pool - > dev ;
2014-06-05 03:10:02 +04:00
WARN_ON ( devres_release ( dev , dmam_pool_release , dmam_pool_match , pool ) ) ;
devres: device resource management
Implement device resource management, in short, devres. A device
driver can allocate arbirary size of devres data which is associated
with a release function. On driver detach, release function is
invoked on the devres data, then, devres data is freed.
devreses are typed by associated release functions. Some devreses are
better represented by single instance of the type while others need
multiple instances sharing the same release function. Both usages are
supported.
devreses can be grouped using devres group such that a device driver
can easily release acquired resources halfway through initialization
or selectively release resources (e.g. resources for port 1 out of 4
ports).
This patch adds devres core including documentation and the following
managed interfaces.
* alloc/free : devm_kzalloc(), devm_kzfree()
* IO region : devm_request_region(), devm_release_region()
* IRQ : devm_request_irq(), devm_free_irq()
* DMA : dmam_alloc_coherent(), dmam_free_coherent(),
dmam_declare_coherent_memory(), dmam_pool_create(),
dmam_pool_destroy()
* PCI : pcim_enable_device(), pcim_pin_device(), pci_is_managed()
* iomap : devm_ioport_map(), devm_ioport_unmap(), devm_ioremap(),
devm_ioremap_nocache(), devm_iounmap(), pcim_iomap_table(),
pcim_iomap(), pcim_iounmap()
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-01-20 10:00:26 +03:00
}
2007-12-03 20:04:31 +03:00
EXPORT_SYMBOL ( dmam_pool_destroy ) ;