2010-04-09 13:57:01 +04:00
/*
* mm / percpu - vm . c - vmalloc area based chunk allocation
*
* Copyright ( C ) 2010 SUSE Linux Products GmbH
* Copyright ( C ) 2010 Tejun Heo < tj @ kernel . org >
*
* This file is released under the GPLv2 .
*
* Chunks are mapped into vmalloc areas and populated page by page .
* This is the default chunk allocator .
*/
static struct page * pcpu_chunk_page ( struct pcpu_chunk * chunk ,
unsigned int cpu , int page_idx )
{
/* must not be used on pre-mapped chunk */
WARN_ON ( chunk - > immutable ) ;
return vmalloc_to_page ( ( void * ) pcpu_chunk_addr ( chunk , cpu , page_idx ) ) ;
}
/**
2014-09-02 22:46:01 +04:00
* pcpu_get_pages - get temp pages array
2010-04-09 13:57:01 +04:00
*
2014-09-02 22:46:01 +04:00
* Returns pointer to array of pointers to struct page which can be indexed
2014-09-02 22:46:01 +04:00
* with pcpu_page_idx ( ) . Note that there is only one array and accesses
* should be serialized by pcpu_alloc_mutex .
2010-04-09 13:57:01 +04:00
*
* RETURNS :
2014-09-02 22:46:01 +04:00
* Pointer to temp pages array on success .
2010-04-09 13:57:01 +04:00
*/
2017-02-25 23:59:26 +03:00
static struct page * * pcpu_get_pages ( void )
2010-04-09 13:57:01 +04:00
{
static struct page * * pages ;
size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof ( pages [ 0 ] ) ;
2014-09-02 22:46:01 +04:00
lockdep_assert_held ( & pcpu_alloc_mutex ) ;
if ( ! pages )
2018-02-16 21:09:58 +03:00
pages = pcpu_mem_zalloc ( pages_size , GFP_KERNEL ) ;
2010-04-09 13:57:01 +04:00
return pages ;
}
/**
* pcpu_free_pages - free pages which were allocated for @ chunk
* @ chunk : chunk pages were allocated for
* @ pages : array of pages to be freed , indexed by pcpu_page_idx ( )
* @ page_start : page index of the first page to be freed
* @ page_end : page index of the last page to be freed + 1
*
* Free pages [ @ page_start and @ page_end ) in @ pages for all units .
* The pages were allocated for @ chunk .
*/
static void pcpu_free_pages ( struct pcpu_chunk * chunk ,
2014-09-02 22:46:01 +04:00
struct page * * pages , int page_start , int page_end )
2010-04-09 13:57:01 +04:00
{
unsigned int cpu ;
int i ;
for_each_possible_cpu ( cpu ) {
for ( i = page_start ; i < page_end ; i + + ) {
struct page * page = pages [ pcpu_page_idx ( cpu , i ) ] ;
if ( page )
__free_page ( page ) ;
}
}
}
/**
* pcpu_alloc_pages - allocates pages for @ chunk
* @ chunk : target chunk
* @ pages : array to put the allocated pages into , indexed by pcpu_page_idx ( )
* @ page_start : page index of the first page to be allocated
* @ page_end : page index of the last page to be allocated + 1
2018-02-16 21:07:19 +03:00
* @ gfp : allocation flags passed to the underlying allocator
2010-04-09 13:57:01 +04:00
*
* Allocate pages [ @ page_start , @ page_end ) into @ pages for all units .
* The allocation is for @ chunk . Percpu core doesn ' t care about the
* content of @ pages and will pass it verbatim to pcpu_map_pages ( ) .
*/
static int pcpu_alloc_pages ( struct pcpu_chunk * chunk ,
2018-02-16 21:07:19 +03:00
struct page * * pages , int page_start , int page_end ,
gfp_t gfp )
2010-04-09 13:57:01 +04:00
{
2014-08-16 00:06:06 +04:00
unsigned int cpu , tcpu ;
2010-04-09 13:57:01 +04:00
int i ;
2018-02-16 21:09:58 +03:00
gfp | = __GFP_HIGHMEM ;
2018-02-16 21:07:19 +03:00
2010-04-09 13:57:01 +04:00
for_each_possible_cpu ( cpu ) {
for ( i = page_start ; i < page_end ; i + + ) {
struct page * * pagep = & pages [ pcpu_page_idx ( cpu , i ) ] ;
* pagep = alloc_pages_node ( cpu_to_node ( cpu ) , gfp , 0 ) ;
2014-08-16 00:06:06 +04:00
if ( ! * pagep )
goto err ;
2010-04-09 13:57:01 +04:00
}
}
return 0 ;
2014-08-16 00:06:06 +04:00
err :
while ( - - i > = page_start )
__free_page ( pages [ pcpu_page_idx ( cpu , i ) ] ) ;
for_each_possible_cpu ( tcpu ) {
if ( tcpu = = cpu )
break ;
for ( i = page_start ; i < page_end ; i + + )
__free_page ( pages [ pcpu_page_idx ( tcpu , i ) ] ) ;
}
return - ENOMEM ;
2010-04-09 13:57:01 +04:00
}
/**
* pcpu_pre_unmap_flush - flush cache prior to unmapping
* @ chunk : chunk the regions to be flushed belongs to
* @ page_start : page index of the first page to be flushed
* @ page_end : page index of the last page to be flushed + 1
*
* Pages in [ @ page_start , @ page_end ) of @ chunk are about to be
* unmapped . Flush cache . As each flushing trial can be very
* expensive , issue flush on the whole region at once rather than
* doing it for each cpu . This could be an overkill but is more
* scalable .
*/
static void pcpu_pre_unmap_flush ( struct pcpu_chunk * chunk ,
int page_start , int page_end )
{
flush_cache_vunmap (
2011-11-18 22:55:35 +04:00
pcpu_chunk_addr ( chunk , pcpu_low_unit_cpu , page_start ) ,
pcpu_chunk_addr ( chunk , pcpu_high_unit_cpu , page_end ) ) ;
2010-04-09 13:57:01 +04:00
}
static void __pcpu_unmap_pages ( unsigned long addr , int nr_pages )
{
unmap_kernel_range_noflush ( addr , nr_pages < < PAGE_SHIFT ) ;
}
/**
* pcpu_unmap_pages - unmap pages out of a pcpu_chunk
* @ chunk : chunk of interest
* @ pages : pages array which can be used to pass information to free
* @ page_start : page index of the first page to unmap
* @ page_end : page index of the last page to unmap + 1
*
* For each cpu , unmap pages [ @ page_start , @ page_end ) out of @ chunk .
* Corresponding elements in @ pages were cleared by the caller and can
* be used to carry information to pcpu_free_pages ( ) which will be
* called after all unmaps are finished . The caller should call
* proper pre / post flush functions .
*/
static void pcpu_unmap_pages ( struct pcpu_chunk * chunk ,
2014-09-02 22:46:01 +04:00
struct page * * pages , int page_start , int page_end )
2010-04-09 13:57:01 +04:00
{
unsigned int cpu ;
int i ;
for_each_possible_cpu ( cpu ) {
for ( i = page_start ; i < page_end ; i + + ) {
struct page * page ;
page = pcpu_chunk_page ( chunk , cpu , i ) ;
WARN_ON ( ! page ) ;
pages [ pcpu_page_idx ( cpu , i ) ] = page ;
}
__pcpu_unmap_pages ( pcpu_chunk_addr ( chunk , cpu , page_start ) ,
page_end - page_start ) ;
}
}
/**
* pcpu_post_unmap_tlb_flush - flush TLB after unmapping
* @ chunk : pcpu_chunk the regions to be flushed belong to
* @ page_start : page index of the first page to be flushed
* @ page_end : page index of the last page to be flushed + 1
*
* Pages [ @ page_start , @ page_end ) of @ chunk have been unmapped . Flush
* TLB for the regions . This can be skipped if the area is to be
* returned to vmalloc as vmalloc will handle TLB flushing lazily .
*
* As with pcpu_pre_unmap_flush ( ) , TLB flushing also is done at once
* for the whole region .
*/
static void pcpu_post_unmap_tlb_flush ( struct pcpu_chunk * chunk ,
int page_start , int page_end )
{
flush_tlb_kernel_range (
2011-11-18 22:55:35 +04:00
pcpu_chunk_addr ( chunk , pcpu_low_unit_cpu , page_start ) ,
pcpu_chunk_addr ( chunk , pcpu_high_unit_cpu , page_end ) ) ;
2010-04-09 13:57:01 +04:00
}
static int __pcpu_map_pages ( unsigned long addr , struct page * * pages ,
int nr_pages )
{
return map_kernel_range_noflush ( addr , nr_pages < < PAGE_SHIFT ,
PAGE_KERNEL , pages ) ;
}
/**
* pcpu_map_pages - map pages into a pcpu_chunk
* @ chunk : chunk of interest
* @ pages : pages array containing pages to be mapped
* @ page_start : page index of the first page to map
* @ page_end : page index of the last page to map + 1
*
* For each cpu , map pages [ @ page_start , @ page_end ) into @ chunk . The
* caller is responsible for calling pcpu_post_map_flush ( ) after all
* mappings are complete .
*
2014-09-02 22:46:01 +04:00
* This function is responsible for setting up whatever is necessary for
* reverse lookup ( addr - > chunk ) .
2010-04-09 13:57:01 +04:00
*/
static int pcpu_map_pages ( struct pcpu_chunk * chunk ,
2014-09-02 22:46:01 +04:00
struct page * * pages , int page_start , int page_end )
2010-04-09 13:57:01 +04:00
{
unsigned int cpu , tcpu ;
int i , err ;
for_each_possible_cpu ( cpu ) {
err = __pcpu_map_pages ( pcpu_chunk_addr ( chunk , cpu , page_start ) ,
& pages [ pcpu_page_idx ( cpu , page_start ) ] ,
page_end - page_start ) ;
if ( err < 0 )
goto err ;
2014-09-02 22:46:01 +04:00
for ( i = page_start ; i < page_end ; i + + )
2010-04-09 13:57:01 +04:00
pcpu_set_page_chunk ( pages [ pcpu_page_idx ( cpu , i ) ] ,
chunk ) ;
}
return 0 ;
err :
for_each_possible_cpu ( tcpu ) {
if ( tcpu = = cpu )
break ;
__pcpu_unmap_pages ( pcpu_chunk_addr ( chunk , tcpu , page_start ) ,
page_end - page_start ) ;
}
2014-08-16 00:06:10 +04:00
pcpu_post_unmap_tlb_flush ( chunk , page_start , page_end ) ;
2010-04-09 13:57:01 +04:00
return err ;
}
/**
* pcpu_post_map_flush - flush cache after mapping
* @ chunk : pcpu_chunk the regions to be flushed belong to
* @ page_start : page index of the first page to be flushed
* @ page_end : page index of the last page to be flushed + 1
*
* Pages [ @ page_start , @ page_end ) of @ chunk have been mapped . Flush
* cache .
*
* As with pcpu_pre_unmap_flush ( ) , TLB flushing also is done at once
* for the whole region .
*/
static void pcpu_post_map_flush ( struct pcpu_chunk * chunk ,
int page_start , int page_end )
{
flush_cache_vmap (
2011-11-18 22:55:35 +04:00
pcpu_chunk_addr ( chunk , pcpu_low_unit_cpu , page_start ) ,
pcpu_chunk_addr ( chunk , pcpu_high_unit_cpu , page_end ) ) ;
2010-04-09 13:57:01 +04:00
}
/**
* pcpu_populate_chunk - populate and map an area of a pcpu_chunk
* @ chunk : chunk of interest
2014-09-02 22:46:02 +04:00
* @ page_start : the start page
* @ page_end : the end page
2018-02-16 21:07:19 +03:00
* @ gfp : allocation flags passed to the underlying memory allocator
2010-04-09 13:57:01 +04:00
*
* For each cpu , populate and map pages [ @ page_start , @ page_end ) into
2014-09-02 22:46:01 +04:00
* @ chunk .
2010-04-09 13:57:01 +04:00
*
* CONTEXT :
* pcpu_alloc_mutex , does GFP_KERNEL allocation .
*/
2014-09-02 22:46:02 +04:00
static int pcpu_populate_chunk ( struct pcpu_chunk * chunk ,
2018-02-16 21:07:19 +03:00
int page_start , int page_end , gfp_t gfp )
2010-04-09 13:57:01 +04:00
{
struct page * * pages ;
2017-02-25 23:59:26 +03:00
pages = pcpu_get_pages ( ) ;
2010-04-09 13:57:01 +04:00
if ( ! pages )
return - ENOMEM ;
2018-02-16 21:07:19 +03:00
if ( pcpu_alloc_pages ( chunk , pages , page_start , page_end , gfp ) )
2014-09-02 22:46:02 +04:00
return - ENOMEM ;
2010-04-09 13:57:01 +04:00
2014-09-02 22:46:02 +04:00
if ( pcpu_map_pages ( chunk , pages , page_start , page_end ) ) {
pcpu_free_pages ( chunk , pages , page_start , page_end ) ;
return - ENOMEM ;
2010-04-09 13:57:01 +04:00
}
pcpu_post_map_flush ( chunk , page_start , page_end ) ;
return 0 ;
}
/**
* pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk
* @ chunk : chunk to depopulate
2014-09-02 22:46:02 +04:00
* @ page_start : the start page
* @ page_end : the end page
2010-04-09 13:57:01 +04:00
*
* For each cpu , depopulate and unmap pages [ @ page_start , @ page_end )
2014-09-02 22:46:02 +04:00
* from @ chunk .
2010-04-09 13:57:01 +04:00
*
* CONTEXT :
* pcpu_alloc_mutex .
*/
2014-09-02 22:46:02 +04:00
static void pcpu_depopulate_chunk ( struct pcpu_chunk * chunk ,
int page_start , int page_end )
2010-04-09 13:57:01 +04:00
{
struct page * * pages ;
/*
* If control reaches here , there must have been at least one
* successful population attempt so the temp pages array must
* be available now .
*/
2017-02-25 23:59:26 +03:00
pages = pcpu_get_pages ( ) ;
2010-04-09 13:57:01 +04:00
BUG_ON ( ! pages ) ;
/* unmap and free */
pcpu_pre_unmap_flush ( chunk , page_start , page_end ) ;
2014-09-02 22:46:02 +04:00
pcpu_unmap_pages ( chunk , pages , page_start , page_end ) ;
2010-04-09 13:57:01 +04:00
/* no need to flush tlb, vmalloc will handle it lazily */
2014-09-02 22:46:02 +04:00
pcpu_free_pages ( chunk , pages , page_start , page_end ) ;
2010-04-09 13:57:01 +04:00
}
2018-02-16 21:07:19 +03:00
static struct pcpu_chunk * pcpu_create_chunk ( gfp_t gfp )
2010-04-09 13:57:01 +04:00
{
struct pcpu_chunk * chunk ;
struct vm_struct * * vms ;
2018-02-16 21:07:19 +03:00
chunk = pcpu_alloc_chunk ( gfp ) ;
2010-04-09 13:57:01 +04:00
if ( ! chunk )
return NULL ;
vms = pcpu_get_vm_areas ( pcpu_group_offsets , pcpu_group_sizes ,
2011-01-14 02:46:01 +03:00
pcpu_nr_groups , pcpu_atom_size ) ;
2010-04-09 13:57:01 +04:00
if ( ! vms ) {
pcpu_free_chunk ( chunk ) ;
return NULL ;
}
chunk - > data = vms ;
chunk - > base_addr = vms [ 0 ] - > addr - pcpu_group_offsets [ 0 ] ;
2017-06-20 02:28:31 +03:00
pcpu_stats_chunk_alloc ( ) ;
2017-06-20 02:28:32 +03:00
trace_percpu_create_chunk ( chunk - > base_addr ) ;
2017-06-20 02:28:31 +03:00
2010-04-09 13:57:01 +04:00
return chunk ;
}
static void pcpu_destroy_chunk ( struct pcpu_chunk * chunk )
{
2017-06-29 17:56:26 +03:00
if ( ! chunk )
return ;
2017-06-20 02:28:31 +03:00
pcpu_stats_chunk_dealloc ( ) ;
2017-06-20 02:28:32 +03:00
trace_percpu_destroy_chunk ( chunk - > base_addr ) ;
2017-06-20 02:28:31 +03:00
2017-06-29 17:56:26 +03:00
if ( chunk - > data )
2010-04-09 13:57:01 +04:00
pcpu_free_vm_areas ( chunk - > data , pcpu_nr_groups ) ;
pcpu_free_chunk ( chunk ) ;
}
static struct page * pcpu_addr_to_page ( void * addr )
{
return vmalloc_to_page ( addr ) ;
}
static int __init pcpu_verify_alloc_info ( const struct pcpu_alloc_info * ai )
{
/* no extra restriction */
return 0 ;
}