2005-04-17 02:20:36 +04:00
/*
* IA - 32 Huge TLB Page Support for Kernel .
*
* Copyright ( C ) 2002 , Rohit Seth < rohit . seth @ intel . com >
*/
# include <linux/init.h>
# include <linux/fs.h>
# include <linux/mm.h>
# include <linux/hugetlb.h>
# include <linux/pagemap.h>
# include <linux/slab.h>
# include <linux/err.h>
# include <linux/sysctl.h>
# include <asm/mman.h>
# include <asm/tlb.h>
# include <asm/tlbflush.h>
2008-01-30 15:33:39 +03:00
# include <asm/pgalloc.h>
2005-04-17 02:20:36 +04:00
2006-12-07 07:32:03 +03:00
static unsigned long page_table_shareable ( struct vm_area_struct * svma ,
struct vm_area_struct * vma ,
unsigned long addr , pgoff_t idx )
{
unsigned long saddr = ( ( idx - svma - > vm_pgoff ) < < PAGE_SHIFT ) +
svma - > vm_start ;
unsigned long sbase = saddr & PUD_MASK ;
unsigned long s_end = sbase + PUD_SIZE ;
/*
* match the virtual addresses , permission and the alignment of the
* page table page .
*/
if ( pmd_index ( addr ) ! = pmd_index ( saddr ) | |
vma - > vm_flags ! = svma - > vm_flags | |
sbase < svma - > vm_start | | svma - > vm_end < s_end )
return 0 ;
return saddr ;
}
static int vma_shareable ( struct vm_area_struct * vma , unsigned long addr )
{
unsigned long base = addr & PUD_MASK ;
unsigned long end = base + PUD_SIZE ;
/*
* check on proper vm_flags and page table alignment
*/
if ( vma - > vm_flags & VM_MAYSHARE & &
vma - > vm_start < = base & & end < = vma - > vm_end )
return 1 ;
return 0 ;
}
/*
* search for a shareable pmd page for hugetlb .
*/
static void huge_pmd_share ( struct mm_struct * mm , unsigned long addr , pud_t * pud )
{
struct vm_area_struct * vma = find_vma ( mm , addr ) ;
struct address_space * mapping = vma - > vm_file - > f_mapping ;
pgoff_t idx = ( ( addr - vma - > vm_start ) > > PAGE_SHIFT ) +
vma - > vm_pgoff ;
struct prio_tree_iter iter ;
struct vm_area_struct * svma ;
unsigned long saddr ;
pte_t * spte = NULL ;
if ( ! vma_shareable ( vma , addr ) )
return ;
spin_lock ( & mapping - > i_mmap_lock ) ;
vma_prio_tree_foreach ( svma , & iter , & mapping - > i_mmap , idx , idx ) {
if ( svma = = vma )
continue ;
saddr = page_table_shareable ( svma , vma , addr , idx ) ;
if ( saddr ) {
spte = huge_pte_offset ( svma - > vm_mm , saddr ) ;
if ( spte ) {
get_page ( virt_to_page ( spte ) ) ;
break ;
}
}
}
if ( ! spte )
goto out ;
spin_lock ( & mm - > page_table_lock ) ;
if ( pud_none ( * pud ) )
2008-01-30 15:33:39 +03:00
pud_populate ( mm , pud , ( pmd_t * ) ( ( unsigned long ) spte & PAGE_MASK ) ) ;
2006-12-07 07:32:03 +03:00
else
put_page ( virt_to_page ( spte ) ) ;
spin_unlock ( & mm - > page_table_lock ) ;
out :
spin_unlock ( & mapping - > i_mmap_lock ) ;
}
/*
* unmap huge page backed by shared pte .
*
* Hugetlb pte page is ref counted at the time of mapping . If pte is shared
* indicated by page_count > 1 , unmap is achieved by clearing pud and
* decrementing the ref count . If count = = 1 , the pte page is not shared .
*
* called with vma - > vm_mm - > page_table_lock held .
*
* returns : 1 successfully unmapped a shared pte page
* 0 the underlying pte page is not shared , or it is the last user
*/
int huge_pmd_unshare ( struct mm_struct * mm , unsigned long * addr , pte_t * ptep )
{
pgd_t * pgd = pgd_offset ( mm , * addr ) ;
pud_t * pud = pud_offset ( pgd , * addr ) ;
BUG_ON ( page_count ( virt_to_page ( ptep ) ) = = 0 ) ;
if ( page_count ( virt_to_page ( ptep ) ) = = 1 )
return 0 ;
pud_clear ( pud ) ;
put_page ( virt_to_page ( ptep ) ) ;
* addr = ALIGN ( * addr , HPAGE_SIZE * PTRS_PER_PTE ) - HPAGE_SIZE ;
return 1 ;
}
2008-07-24 08:27:41 +04:00
pte_t * huge_pte_alloc ( struct mm_struct * mm ,
unsigned long addr , unsigned long sz )
2005-04-17 02:20:36 +04:00
{
pgd_t * pgd ;
pud_t * pud ;
2005-09-04 02:55:00 +04:00
pte_t * pte = NULL ;
2005-04-17 02:20:36 +04:00
pgd = pgd_offset ( mm , addr ) ;
pud = pud_alloc ( mm , pgd , addr ) ;
2006-12-07 07:32:03 +03:00
if ( pud ) {
2008-07-24 08:27:50 +04:00
if ( sz = = PUD_SIZE ) {
pte = ( pte_t * ) pud ;
} else {
BUG_ON ( sz ! = PMD_SIZE ) ;
if ( pud_none ( * pud ) )
huge_pmd_share ( mm , addr , pud ) ;
pte = ( pte_t * ) pmd_alloc ( mm , pud , addr ) ;
}
2006-12-07 07:32:03 +03:00
}
2005-09-04 02:55:02 +04:00
BUG_ON ( pte & & ! pte_none ( * pte ) & & ! pte_huge ( * pte ) ) ;
2005-09-04 02:55:00 +04:00
return pte ;
2005-04-17 02:20:36 +04:00
}
2005-06-22 04:14:44 +04:00
pte_t * huge_pte_offset ( struct mm_struct * mm , unsigned long addr )
2005-04-17 02:20:36 +04:00
{
pgd_t * pgd ;
pud_t * pud ;
pmd_t * pmd = NULL ;
pgd = pgd_offset ( mm , addr ) ;
2005-09-04 02:55:01 +04:00
if ( pgd_present ( * pgd ) ) {
pud = pud_offset ( pgd , addr ) ;
2008-07-24 08:27:50 +04:00
if ( pud_present ( * pud ) ) {
if ( pud_large ( * pud ) )
return ( pte_t * ) pud ;
2005-09-04 02:55:01 +04:00
pmd = pmd_offset ( pud , addr ) ;
2008-07-24 08:27:50 +04:00
}
2005-09-04 02:55:01 +04:00
}
2005-04-17 02:20:36 +04:00
return ( pte_t * ) pmd ;
}
#if 0 /* This is just for testing */
struct page *
follow_huge_addr ( struct mm_struct * mm , unsigned long address , int write )
{
unsigned long start = address ;
int length = 1 ;
int nr ;
struct page * page ;
struct vm_area_struct * vma ;
vma = find_vma ( mm , addr ) ;
if ( ! vma | | ! is_vm_hugetlb_page ( vma ) )
return ERR_PTR ( - EINVAL ) ;
pte = huge_pte_offset ( mm , address ) ;
/* hugetlb should be locked, and hence, prefaulted */
WARN_ON ( ! pte | | pte_none ( * pte ) ) ;
page = & pte_page ( * pte ) [ vpfn % ( HPAGE_SIZE / PAGE_SIZE ) ] ;
2008-03-27 07:03:04 +03:00
WARN_ON ( ! PageHead ( page ) ) ;
2005-04-17 02:20:36 +04:00
return page ;
}
int pmd_huge ( pmd_t pmd )
{
return 0 ;
}
2008-07-24 08:27:50 +04:00
int pud_huge ( pud_t pud )
{
return 0 ;
}
2005-04-17 02:20:36 +04:00
struct page *
follow_huge_pmd ( struct mm_struct * mm , unsigned long address ,
pmd_t * pmd , int write )
{
return NULL ;
}
# else
struct page *
follow_huge_addr ( struct mm_struct * mm , unsigned long address , int write )
{
return ERR_PTR ( - EINVAL ) ;
}
int pmd_huge ( pmd_t pmd )
{
return ! ! ( pmd_val ( pmd ) & _PAGE_PSE ) ;
}
2008-07-24 08:27:50 +04:00
int pud_huge ( pud_t pud )
{
2008-07-24 08:27:50 +04:00
return ! ! ( pud_val ( pud ) & _PAGE_PSE ) ;
2008-07-24 08:27:50 +04:00
}
2005-04-17 02:20:36 +04:00
struct page *
follow_huge_pmd ( struct mm_struct * mm , unsigned long address ,
pmd_t * pmd , int write )
{
struct page * page ;
page = pte_page ( * ( pte_t * ) pmd ) ;
if ( page )
2008-07-24 08:27:50 +04:00
page + = ( ( address & ~ PMD_MASK ) > > PAGE_SHIFT ) ;
2005-04-17 02:20:36 +04:00
return page ;
}
2008-07-24 08:27:50 +04:00
struct page *
follow_huge_pud ( struct mm_struct * mm , unsigned long address ,
pud_t * pud , int write )
{
struct page * page ;
page = pte_page ( * ( pte_t * ) pud ) ;
if ( page )
page + = ( ( address & ~ PUD_MASK ) > > PAGE_SHIFT ) ;
return page ;
}
2005-04-17 02:20:36 +04:00
# endif
/* x86_64 also uses this file */
# ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
static unsigned long hugetlb_get_unmapped_area_bottomup ( struct file * file ,
unsigned long addr , unsigned long len ,
unsigned long pgoff , unsigned long flags )
{
2008-07-24 08:27:50 +04:00
struct hstate * h = hstate_file ( file ) ;
2005-04-17 02:20:36 +04:00
struct mm_struct * mm = current - > mm ;
struct vm_area_struct * vma ;
unsigned long start_addr ;
2005-06-22 04:14:49 +04:00
if ( len > mm - > cached_hole_size ) {
start_addr = mm - > free_area_cache ;
} else {
start_addr = TASK_UNMAPPED_BASE ;
mm - > cached_hole_size = 0 ;
}
2005-04-17 02:20:36 +04:00
full_search :
2008-07-24 08:27:50 +04:00
addr = ALIGN ( start_addr , huge_page_size ( h ) ) ;
2005-04-17 02:20:36 +04:00
for ( vma = find_vma ( mm , addr ) ; ; vma = vma - > vm_next ) {
/* At this point: (!vma || addr < vma->vm_end). */
if ( TASK_SIZE - len < addr ) {
/*
* Start a new search - just in case we missed
* some holes .
*/
if ( start_addr ! = TASK_UNMAPPED_BASE ) {
start_addr = TASK_UNMAPPED_BASE ;
2005-06-22 04:14:49 +04:00
mm - > cached_hole_size = 0 ;
2005-04-17 02:20:36 +04:00
goto full_search ;
}
return - ENOMEM ;
}
if ( ! vma | | addr + len < = vma - > vm_start ) {
mm - > free_area_cache = addr + len ;
return addr ;
}
2005-06-22 04:14:49 +04:00
if ( addr + mm - > cached_hole_size < vma - > vm_start )
mm - > cached_hole_size = vma - > vm_start - addr ;
2008-07-24 08:27:50 +04:00
addr = ALIGN ( vma - > vm_end , huge_page_size ( h ) ) ;
2005-04-17 02:20:36 +04:00
}
}
static unsigned long hugetlb_get_unmapped_area_topdown ( struct file * file ,
unsigned long addr0 , unsigned long len ,
unsigned long pgoff , unsigned long flags )
{
2008-07-24 08:27:50 +04:00
struct hstate * h = hstate_file ( file ) ;
2005-04-17 02:20:36 +04:00
struct mm_struct * mm = current - > mm ;
struct vm_area_struct * vma , * prev_vma ;
unsigned long base = mm - > mmap_base , addr = addr0 ;
2005-06-22 04:14:49 +04:00
unsigned long largest_hole = mm - > cached_hole_size ;
2005-04-17 02:20:36 +04:00
int first_time = 1 ;
/* don't allow allocations above current base */
if ( mm - > free_area_cache > base )
mm - > free_area_cache = base ;
2005-06-22 04:14:49 +04:00
if ( len < = largest_hole ) {
largest_hole = 0 ;
mm - > free_area_cache = base ;
}
2005-04-17 02:20:36 +04:00
try_again :
/* make sure it can fit in the remaining address space */
if ( mm - > free_area_cache < len )
goto fail ;
/* either no address requested or cant fit in requested address hole */
2008-07-24 08:27:50 +04:00
addr = ( mm - > free_area_cache - len ) & huge_page_mask ( h ) ;
2005-04-17 02:20:36 +04:00
do {
/*
* Lookup failure means no vma is above this address ,
* i . e . return with success :
*/
if ( ! ( vma = find_vma_prev ( mm , addr , & prev_vma ) ) )
return addr ;
/*
* new region fits between prev_vma - > vm_end and
* vma - > vm_start , use it :
*/
if ( addr + len < = vma - > vm_start & &
2005-06-22 04:14:49 +04:00
( ! prev_vma | | ( addr > = prev_vma - > vm_end ) ) ) {
2005-04-17 02:20:36 +04:00
/* remember the address as a hint for next time */
2005-06-22 04:14:49 +04:00
mm - > cached_hole_size = largest_hole ;
return ( mm - > free_area_cache = addr ) ;
} else {
2005-04-17 02:20:36 +04:00
/* pull free_area_cache down to the first hole */
2005-06-22 04:14:49 +04:00
if ( mm - > free_area_cache = = vma - > vm_end ) {
2005-04-17 02:20:36 +04:00
mm - > free_area_cache = vma - > vm_start ;
2005-06-22 04:14:49 +04:00
mm - > cached_hole_size = largest_hole ;
}
}
/* remember the largest hole we saw so far */
if ( addr + largest_hole < vma - > vm_start )
largest_hole = vma - > vm_start - addr ;
2005-04-17 02:20:36 +04:00
/* try just below the current vma->vm_start */
2008-07-24 08:27:50 +04:00
addr = ( vma - > vm_start - len ) & huge_page_mask ( h ) ;
2005-04-17 02:20:36 +04:00
} while ( len < = vma - > vm_start ) ;
fail :
/*
* if hint left us with no space for the requested
* mapping then try again :
*/
if ( first_time ) {
mm - > free_area_cache = base ;
2005-06-22 04:14:49 +04:00
largest_hole = 0 ;
2005-04-17 02:20:36 +04:00
first_time = 0 ;
goto try_again ;
}
/*
* A failed mmap ( ) very likely causes application failure ,
* so fall back to the bottom - up function here . This scenario
* can happen with large stack limits and large mmap ( )
* allocations .
*/
mm - > free_area_cache = TASK_UNMAPPED_BASE ;
2005-06-22 04:14:49 +04:00
mm - > cached_hole_size = ~ 0UL ;
2005-04-17 02:20:36 +04:00
addr = hugetlb_get_unmapped_area_bottomup ( file , addr0 ,
len , pgoff , flags ) ;
/*
* Restore the topdown base :
*/
mm - > free_area_cache = base ;
2005-06-22 04:14:49 +04:00
mm - > cached_hole_size = ~ 0UL ;
2005-04-17 02:20:36 +04:00
return addr ;
}
unsigned long
hugetlb_get_unmapped_area ( struct file * file , unsigned long addr ,
unsigned long len , unsigned long pgoff , unsigned long flags )
{
2008-07-24 08:27:50 +04:00
struct hstate * h = hstate_file ( file ) ;
2005-04-17 02:20:36 +04:00
struct mm_struct * mm = current - > mm ;
struct vm_area_struct * vma ;
2008-07-24 08:27:50 +04:00
if ( len & ~ huge_page_mask ( h ) )
2005-04-17 02:20:36 +04:00
return - EINVAL ;
if ( len > TASK_SIZE )
return - ENOMEM ;
2007-05-07 01:50:08 +04:00
if ( flags & MAP_FIXED ) {
2008-07-24 08:27:41 +04:00
if ( prepare_hugepage_range ( file , addr , len ) )
2007-05-07 01:50:08 +04:00
return - EINVAL ;
return addr ;
}
2005-04-17 02:20:36 +04:00
if ( addr ) {
2008-07-24 08:27:50 +04:00
addr = ALIGN ( addr , huge_page_size ( h ) ) ;
2005-04-17 02:20:36 +04:00
vma = find_vma ( mm , addr ) ;
if ( TASK_SIZE - len > = addr & &
( ! vma | | addr + len < = vma - > vm_start ) )
return addr ;
}
if ( mm - > get_unmapped_area = = arch_get_unmapped_area )
return hugetlb_get_unmapped_area_bottomup ( file , addr , len ,
pgoff , flags ) ;
else
return hugetlb_get_unmapped_area_topdown ( file , addr , len ,
pgoff , flags ) ;
}
# endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
2008-07-24 08:27:51 +04:00
# ifdef CONFIG_X86_64
static __init int setup_hugepagesz ( char * opt )
{
unsigned long ps = memparse ( opt , & opt ) ;
if ( ps = = PMD_SIZE ) {
hugetlb_add_hstate ( PMD_SHIFT - PAGE_SHIFT ) ;
} else if ( ps = = PUD_SIZE & & cpu_has_gbpages ) {
hugetlb_add_hstate ( PUD_SHIFT - PAGE_SHIFT ) ;
} else {
printk ( KERN_ERR " hugepagesz: Unsupported page size %lu M \n " ,
ps > > 20 ) ;
return 0 ;
}
return 1 ;
}
__setup ( " hugepagesz= " , setup_hugepagesz ) ;
# endif