2005-04-16 15:20:36 -07:00
/*
* PPC64 ( POWER4 ) Huge TLB Page Support for Kernel .
*
* Copyright ( C ) 2003 David Gibson , IBM Corporation .
*
* Based on the IA - 32 version :
* Copyright ( C ) 2002 , Rohit Seth < rohit . seth @ intel . com >
*/
# include <linux/init.h>
# include <linux/fs.h>
# include <linux/mm.h>
# include <linux/hugetlb.h>
# include <linux/pagemap.h>
# include <linux/slab.h>
# include <linux/err.h>
# include <linux/sysctl.h>
# include <asm/mman.h>
# include <asm/pgalloc.h>
# include <asm/tlb.h>
# include <asm/tlbflush.h>
# include <asm/mmu_context.h>
# include <asm/machdep.h>
# include <asm/cputable.h>
2007-03-10 00:05:37 +01:00
# include <asm/spu.h>
2005-04-16 15:20:36 -07:00
2008-07-23 21:27:55 -07:00
# define PAGE_SHIFT_64K 16
# define PAGE_SHIFT_16M 24
# define PAGE_SHIFT_16G 34
2008-01-04 09:59:50 +11:00
2005-08-11 16:55:21 +10:00
# define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT)
# define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT)
2008-07-23 21:27:53 -07:00
# define MAX_NUMBER_GPAGES 1024
/* Tracks the 16G pages after the device tree is scanned and before the
* huge_boot_pages list is ready . */
static unsigned long gpage_freearray [ MAX_NUMBER_GPAGES ] ;
static unsigned nr_gpages ;
2005-08-11 16:55:21 +10:00
2008-07-23 21:27:56 -07:00
/* Array of valid huge page sizes - non-zero value(hugepte_shift) is
* stored for the huge page sizes that are valid .
*/
unsigned int mmu_huge_psizes [ MMU_PAGE_COUNT ] = { } ; /* initialize all to 0 */
# define hugepte_shift mmu_huge_psizes
# define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize])
# define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize])
# define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \
+ hugepte_shift [ psize ] )
# define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize))
# define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1))
2006-04-28 15:02:51 +10:00
2008-07-23 21:27:56 -07:00
/* Subtract one from array size because we don't need a cache for 4K since
* is not a huge page size */
# define huge_pgtable_cache(psize) (pgtable_cache[HUGEPTE_CACHE_NUM \
+ psize - 1 ] )
# define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize])
2006-04-28 15:02:51 +10:00
2008-07-23 21:27:56 -07:00
static const char * huge_pgtable_cache_name [ MMU_PAGE_COUNT ] = {
" unused_4K " , " hugepte_cache_64K " , " unused_64K_AP " ,
" hugepte_cache_1M " , " hugepte_cache_16M " , " hugepte_cache_16G "
} ;
2006-04-28 15:02:51 +10:00
/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad()
* will choke on pointers to hugepte tables , which is handy for
* catching screwups early . */
# define HUGEPD_OK 0x1
typedef struct { unsigned long pd ; } hugepd_t ;
# define hugepd_none(hpd) ((hpd).pd == 0)
2008-07-23 21:27:56 -07:00
static inline int shift_to_mmu_psize ( unsigned int shift )
{
switch ( shift ) {
# ifndef CONFIG_PPC_64K_PAGES
case PAGE_SHIFT_64K :
return MMU_PAGE_64K ;
# endif
case PAGE_SHIFT_16M :
return MMU_PAGE_16M ;
case PAGE_SHIFT_16G :
return MMU_PAGE_16G ;
}
return - 1 ;
}
static inline unsigned int mmu_psize_to_shift ( unsigned int mmu_psize )
{
if ( mmu_psize_defs [ mmu_psize ] . shift )
return mmu_psize_defs [ mmu_psize ] . shift ;
BUG ( ) ;
}
2006-04-28 15:02:51 +10:00
static inline pte_t * hugepd_page ( hugepd_t hpd )
{
BUG_ON ( ! ( hpd . pd & HUGEPD_OK ) ) ;
return ( pte_t * ) ( hpd . pd & ~ HUGEPD_OK ) ;
}
2008-07-23 21:27:56 -07:00
static inline pte_t * hugepte_offset ( hugepd_t * hpdp , unsigned long addr ,
struct hstate * hstate )
2006-04-28 15:02:51 +10:00
{
2008-07-23 21:27:56 -07:00
unsigned int shift = huge_page_shift ( hstate ) ;
int psize = shift_to_mmu_psize ( shift ) ;
unsigned long idx = ( ( addr > > shift ) & ( PTRS_PER_HUGEPTE ( psize ) - 1 ) ) ;
2006-04-28 15:02:51 +10:00
pte_t * dir = hugepd_page ( * hpdp ) ;
return dir + idx ;
}
static int __hugepte_alloc ( struct mm_struct * mm , hugepd_t * hpdp ,
2008-07-23 21:27:56 -07:00
unsigned long address , unsigned int psize )
2006-04-28 15:02:51 +10:00
{
2008-07-25 19:45:34 -07:00
pte_t * new = kmem_cache_zalloc ( huge_pgtable_cache ( psize ) ,
2006-04-28 15:02:51 +10:00
GFP_KERNEL | __GFP_REPEAT ) ;
if ( ! new )
return - ENOMEM ;
spin_lock ( & mm - > page_table_lock ) ;
if ( ! hugepd_none ( * hpdp ) )
2008-07-23 21:27:56 -07:00
kmem_cache_free ( huge_pgtable_cache ( psize ) , new ) ;
2006-04-28 15:02:51 +10:00
else
hpdp - > pd = ( unsigned long ) new | HUGEPD_OK ;
spin_unlock ( & mm - > page_table_lock ) ;
return 0 ;
}
2008-09-05 11:49:54 +10:00
static pud_t * hpud_offset ( pgd_t * pgd , unsigned long addr , struct hstate * hstate )
{
if ( huge_page_shift ( hstate ) < PUD_SHIFT )
return pud_offset ( pgd , addr ) ;
else
return ( pud_t * ) pgd ;
}
static pud_t * hpud_alloc ( struct mm_struct * mm , pgd_t * pgd , unsigned long addr ,
struct hstate * hstate )
2008-01-04 09:59:50 +11:00
{
2008-09-05 11:49:54 +10:00
if ( huge_page_shift ( hstate ) < PUD_SHIFT )
return pud_alloc ( mm , pgd , addr ) ;
else
return ( pud_t * ) pgd ;
}
static pmd_t * hpmd_offset ( pud_t * pud , unsigned long addr , struct hstate * hstate )
{
if ( huge_page_shift ( hstate ) < PMD_SHIFT )
2008-01-04 09:59:50 +11:00
return pmd_offset ( pud , addr ) ;
else
return ( pmd_t * ) pud ;
}
2008-09-05 11:49:54 +10:00
static pmd_t * hpmd_alloc ( struct mm_struct * mm , pud_t * pud , unsigned long addr ,
struct hstate * hstate )
2008-01-04 09:59:50 +11:00
{
2008-09-05 11:49:54 +10:00
if ( huge_page_shift ( hstate ) < PMD_SHIFT )
2008-01-04 09:59:50 +11:00
return pmd_alloc ( mm , pud , addr ) ;
else
return ( pmd_t * ) pud ;
}
2008-07-23 21:27:54 -07:00
/* Build list of addresses of gigantic pages. This function is used in early
* boot before the buddy or bootmem allocator is setup .
*/
void add_gpage ( unsigned long addr , unsigned long page_size ,
unsigned long number_of_pages )
{
if ( ! addr )
return ;
while ( number_of_pages > 0 ) {
gpage_freearray [ nr_gpages ] = addr ;
nr_gpages + + ;
number_of_pages - - ;
addr + = page_size ;
}
}
2008-07-23 21:27:53 -07:00
/* Moves the gigantic page addresses from the temporary list to the
2008-07-23 21:27:56 -07:00
* huge_boot_pages list .
*/
int alloc_bootmem_huge_page ( struct hstate * hstate )
2008-07-23 21:27:53 -07:00
{
struct huge_bootmem_page * m ;
if ( nr_gpages = = 0 )
return 0 ;
m = phys_to_virt ( gpage_freearray [ - - nr_gpages ] ) ;
gpage_freearray [ nr_gpages ] = 0 ;
list_add ( & m - > list , & huge_boot_pages ) ;
2008-07-23 21:27:56 -07:00
m - > hstate = hstate ;
2008-07-23 21:27:53 -07:00
return 1 ;
}
2005-08-05 19:39:06 +10:00
/* Modelled after find_linux_pte() */
pte_t * huge_pte_offset ( struct mm_struct * mm , unsigned long addr )
2005-04-16 15:20:36 -07:00
{
2005-08-05 19:39:06 +10:00
pgd_t * pg ;
pud_t * pu ;
2008-01-04 09:59:50 +11:00
pmd_t * pm ;
2005-04-16 15:20:36 -07:00
2008-07-23 21:27:56 -07:00
unsigned int psize ;
unsigned int shift ;
unsigned long sz ;
struct hstate * hstate ;
psize = get_slice_psize ( mm , addr ) ;
shift = mmu_psize_to_shift ( psize ) ;
sz = ( ( 1UL ) < < shift ) ;
hstate = size_to_hstate ( sz ) ;
2005-04-16 15:20:36 -07:00
2008-07-23 21:27:56 -07:00
addr & = hstate - > mask ;
2005-08-05 19:39:06 +10:00
pg = pgd_offset ( mm , addr ) ;
if ( ! pgd_none ( * pg ) ) {
2008-09-05 11:49:54 +10:00
pu = hpud_offset ( pg , addr , hstate ) ;
2005-08-05 19:39:06 +10:00
if ( ! pud_none ( * pu ) ) {
2008-07-23 21:27:56 -07:00
pm = hpmd_offset ( pu , addr , hstate ) ;
2006-04-28 15:02:51 +10:00
if ( ! pmd_none ( * pm ) )
2008-07-23 21:27:56 -07:00
return hugepte_offset ( ( hugepd_t * ) pm , addr ,
hstate ) ;
2005-08-05 19:39:06 +10:00
}
}
2005-04-16 15:20:36 -07:00
2005-08-05 19:39:06 +10:00
return NULL ;
2005-04-16 15:20:36 -07:00
}
2008-07-23 21:27:41 -07:00
pte_t * huge_pte_alloc ( struct mm_struct * mm ,
unsigned long addr , unsigned long sz )
2005-04-16 15:20:36 -07:00
{
2005-08-05 19:39:06 +10:00
pgd_t * pg ;
pud_t * pu ;
2008-01-04 09:59:50 +11:00
pmd_t * pm ;
2006-04-28 15:02:51 +10:00
hugepd_t * hpdp = NULL ;
2008-07-23 21:27:56 -07:00
struct hstate * hstate ;
unsigned int psize ;
hstate = size_to_hstate ( sz ) ;
2005-04-16 15:20:36 -07:00
2008-07-23 21:27:56 -07:00
psize = get_slice_psize ( mm , addr ) ;
BUG_ON ( ! mmu_huge_psizes [ psize ] ) ;
2005-04-16 15:20:36 -07:00
2008-07-23 21:27:56 -07:00
addr & = hstate - > mask ;
2005-04-16 15:20:36 -07:00
2005-08-05 19:39:06 +10:00
pg = pgd_offset ( mm , addr ) ;
2008-09-05 11:49:54 +10:00
pu = hpud_alloc ( mm , pg , addr , hstate ) ;
2005-04-16 15:20:36 -07:00
2005-08-05 19:39:06 +10:00
if ( pu ) {
2008-07-23 21:27:56 -07:00
pm = hpmd_alloc ( mm , pu , addr , hstate ) ;
2006-04-28 15:02:51 +10:00
if ( pm )
hpdp = ( hugepd_t * ) pm ;
}
if ( ! hpdp )
return NULL ;
2008-07-23 21:27:56 -07:00
if ( hugepd_none ( * hpdp ) & & __hugepte_alloc ( mm , hpdp , addr , psize ) )
2006-04-28 15:02:51 +10:00
return NULL ;
2008-07-23 21:27:56 -07:00
return hugepte_offset ( hpdp , addr , hstate ) ;
2006-04-28 15:02:51 +10:00
}
2006-12-06 20:32:03 -08:00
int huge_pmd_unshare ( struct mm_struct * mm , unsigned long * addr , pte_t * ptep )
{
return 0 ;
}
2008-07-23 21:27:56 -07:00
static void free_hugepte_range ( struct mmu_gather * tlb , hugepd_t * hpdp ,
unsigned int psize )
2006-04-28 15:02:51 +10:00
{
pte_t * hugepte = hugepd_page ( * hpdp ) ;
hpdp - > pd = 0 ;
tlb - > need_flush = 1 ;
2008-07-23 21:27:56 -07:00
pgtable_free_tlb ( tlb , pgtable_free_cache ( hugepte ,
HUGEPTE_CACHE_NUM + psize - 1 ,
2006-08-18 11:22:21 -07:00
PGF_CACHENUM_MASK ) ) ;
2006-04-28 15:02:51 +10:00
}
static void hugetlb_free_pmd_range ( struct mmu_gather * tlb , pud_t * pud ,
unsigned long addr , unsigned long end ,
2008-07-23 21:27:56 -07:00
unsigned long floor , unsigned long ceiling ,
unsigned int psize )
2006-04-28 15:02:51 +10:00
{
pmd_t * pmd ;
unsigned long next ;
unsigned long start ;
start = addr ;
pmd = pmd_offset ( pud , addr ) ;
do {
next = pmd_addr_end ( addr , end ) ;
if ( pmd_none ( * pmd ) )
continue ;
2008-07-23 21:27:56 -07:00
free_hugepte_range ( tlb , ( hugepd_t * ) pmd , psize ) ;
2006-04-28 15:02:51 +10:00
} while ( pmd + + , addr = next , addr ! = end ) ;
start & = PUD_MASK ;
if ( start < floor )
return ;
if ( ceiling ) {
ceiling & = PUD_MASK ;
if ( ! ceiling )
return ;
2005-04-16 15:20:36 -07:00
}
2006-04-28 15:02:51 +10:00
if ( end - 1 > ceiling - 1 )
return ;
2005-04-16 15:20:36 -07:00
2006-04-28 15:02:51 +10:00
pmd = pmd_offset ( pud , start ) ;
pud_clear ( pud ) ;
pmd_free_tlb ( tlb , pmd ) ;
}
static void hugetlb_free_pud_range ( struct mmu_gather * tlb , pgd_t * pgd ,
unsigned long addr , unsigned long end ,
unsigned long floor , unsigned long ceiling )
{
pud_t * pud ;
unsigned long next ;
unsigned long start ;
2008-07-23 21:27:56 -07:00
unsigned int shift ;
unsigned int psize = get_slice_psize ( tlb - > mm , addr ) ;
shift = mmu_psize_to_shift ( psize ) ;
2006-04-28 15:02:51 +10:00
start = addr ;
pud = pud_offset ( pgd , addr ) ;
do {
next = pud_addr_end ( addr , end ) ;
2008-09-05 11:49:54 +10:00
if ( shift < PMD_SHIFT ) {
2008-01-04 09:59:50 +11:00
if ( pud_none_or_clear_bad ( pud ) )
continue ;
2008-07-23 21:27:56 -07:00
hugetlb_free_pmd_range ( tlb , pud , addr , next , floor ,
ceiling , psize ) ;
2008-01-04 09:59:50 +11:00
} else {
if ( pud_none ( * pud ) )
continue ;
2008-07-23 21:27:56 -07:00
free_hugepte_range ( tlb , ( hugepd_t * ) pud , psize ) ;
2008-01-04 09:59:50 +11:00
}
2006-04-28 15:02:51 +10:00
} while ( pud + + , addr = next , addr ! = end ) ;
start & = PGDIR_MASK ;
if ( start < floor )
return ;
if ( ceiling ) {
ceiling & = PGDIR_MASK ;
if ( ! ceiling )
return ;
}
if ( end - 1 > ceiling - 1 )
return ;
pud = pud_offset ( pgd , start ) ;
pgd_clear ( pgd ) ;
pud_free_tlb ( tlb , pud ) ;
}
/*
* This function frees user - level page tables of a process .
*
* Must be called with pagetable lock held .
*/
2008-07-23 21:27:10 -07:00
void hugetlb_free_pgd_range ( struct mmu_gather * tlb ,
2006-04-28 15:02:51 +10:00
unsigned long addr , unsigned long end ,
unsigned long floor , unsigned long ceiling )
{
pgd_t * pgd ;
unsigned long next ;
unsigned long start ;
/*
* Comments below take from the normal free_pgd_range ( ) . They
* apply here too . The tests against HUGEPD_MASK below are
* essential , because we * don ' t * test for this at the bottom
* level . Without them we ' ll attempt to free a hugepte table
* when we unmap just part of it , even if there are other
* active mappings using it .
*
* The next few lines have given us lots of grief . . .
*
* Why are we testing HUGEPD * at this top level ? Because
* often there will be no work to do at all , and we ' d prefer
* not to go all the way down to the bottom just to discover
* that .
*
* Why all these " - 1 " s ? Because 0 represents both the bottom
* of the address space and the top of it ( using - 1 for the
* top wouldn ' t help much : the masks would do the wrong thing ) .
* The rule is that addr 0 and floor 0 refer to the bottom of
* the address space , but end 0 and ceiling 0 refer to the top
* Comparisons need to use " end - 1 " and " ceiling - 1 " ( though
* that end 0 case should be mythical ) .
*
* Wherever addr is brought up or ceiling brought down , we
* must be careful to reject " the opposite 0 " before it
* confuses the subsequent tests . But what about where end is
* brought down by HUGEPD_SIZE below ? no , end can ' t go down to
* 0 there .
*
* Whereas we round start ( addr ) and ceiling down , by different
* masks at different levels , in order to test whether a table
* now has no other vmas using it , so can be freed , we don ' t
* bother to round floor or end up - the tests don ' t need that .
*/
2008-07-23 21:27:56 -07:00
unsigned int psize = get_slice_psize ( tlb - > mm , addr ) ;
2006-04-28 15:02:51 +10:00
2008-07-23 21:27:56 -07:00
addr & = HUGEPD_MASK ( psize ) ;
2006-04-28 15:02:51 +10:00
if ( addr < floor ) {
2008-07-23 21:27:56 -07:00
addr + = HUGEPD_SIZE ( psize ) ;
2006-04-28 15:02:51 +10:00
if ( ! addr )
return ;
}
if ( ceiling ) {
2008-07-23 21:27:56 -07:00
ceiling & = HUGEPD_MASK ( psize ) ;
2006-04-28 15:02:51 +10:00
if ( ! ceiling )
return ;
}
if ( end - 1 > ceiling - 1 )
2008-07-23 21:27:56 -07:00
end - = HUGEPD_SIZE ( psize ) ;
2006-04-28 15:02:51 +10:00
if ( addr > end - 1 )
return ;
start = addr ;
2008-07-23 21:27:10 -07:00
pgd = pgd_offset ( tlb - > mm , addr ) ;
2006-04-28 15:02:51 +10:00
do {
2008-07-23 21:27:56 -07:00
psize = get_slice_psize ( tlb - > mm , addr ) ;
BUG_ON ( ! mmu_huge_psizes [ psize ] ) ;
2006-04-28 15:02:51 +10:00
next = pgd_addr_end ( addr , end ) ;
2008-09-05 11:49:54 +10:00
if ( mmu_psize_to_shift ( psize ) < PUD_SHIFT ) {
if ( pgd_none_or_clear_bad ( pgd ) )
continue ;
hugetlb_free_pud_range ( tlb , pgd , addr , next , floor , ceiling ) ;
} else {
if ( pgd_none ( * pgd ) )
continue ;
free_hugepte_range ( tlb , ( hugepd_t * ) pgd , psize ) ;
}
2006-04-28 15:02:51 +10:00
} while ( pgd + + , addr = next , addr ! = end ) ;
2005-04-16 15:20:36 -07:00
}
2005-08-05 19:39:06 +10:00
void set_huge_pte_at ( struct mm_struct * mm , unsigned long addr ,
pte_t * ptep , pte_t pte )
{
if ( pte_present ( * ptep ) ) {
2005-11-07 11:06:55 +11:00
/* We open-code pte_clear because we need to pass the right
2007-04-10 17:09:37 +10:00
* argument to hpte_need_flush ( huge / ! huge ) . Might not be
* necessary anymore if we make hpte_need_flush ( ) get the
* page size from the slices
2005-11-07 11:06:55 +11:00
*/
2008-07-23 21:27:56 -07:00
unsigned int psize = get_slice_psize ( mm , addr ) ;
unsigned int shift = mmu_psize_to_shift ( psize ) ;
unsigned long sz = ( ( 1UL ) < < shift ) ;
struct hstate * hstate = size_to_hstate ( sz ) ;
pte_update ( mm , addr & hstate - > mask , ptep , ~ 0UL , 1 ) ;
2005-08-05 19:39:06 +10:00
}
2005-11-07 11:06:55 +11:00
* ptep = __pte ( pte_val ( pte ) & ~ _PAGE_HPTEFLAGS ) ;
2005-04-16 15:20:36 -07:00
}
2005-08-05 19:39:06 +10:00
pte_t huge_ptep_get_and_clear ( struct mm_struct * mm , unsigned long addr ,
pte_t * ptep )
2005-04-16 15:20:36 -07:00
{
2007-04-10 17:09:37 +10:00
unsigned long old = pte_update ( mm , addr , ptep , ~ 0UL , 1 ) ;
2005-08-05 19:39:06 +10:00
return __pte ( old ) ;
2005-04-16 15:20:36 -07:00
}
struct page *
follow_huge_addr ( struct mm_struct * mm , unsigned long address , int write )
{
pte_t * ptep ;
struct page * page ;
2008-07-23 21:27:56 -07:00
unsigned int mmu_psize = get_slice_psize ( mm , address ) ;
2005-04-16 15:20:36 -07:00
2008-07-23 21:27:56 -07:00
/* Verify it is a huge page else bail. */
if ( ! mmu_huge_psizes [ mmu_psize ] )
2005-04-16 15:20:36 -07:00
return ERR_PTR ( - EINVAL ) ;
ptep = huge_pte_offset ( mm , address ) ;
page = pte_page ( * ptep ) ;
2008-07-23 21:27:56 -07:00
if ( page ) {
unsigned int shift = mmu_psize_to_shift ( mmu_psize ) ;
unsigned long sz = ( ( 1UL ) < < shift ) ;
page + = ( address % sz ) / PAGE_SIZE ;
}
2005-04-16 15:20:36 -07:00
return page ;
}
int pmd_huge ( pmd_t pmd )
{
return 0 ;
}
2008-07-23 21:27:50 -07:00
int pud_huge ( pud_t pud )
{
return 0 ;
}
2005-04-16 15:20:36 -07:00
struct page *
follow_huge_pmd ( struct mm_struct * mm , unsigned long address ,
pmd_t * pmd , int write )
{
BUG ( ) ;
return NULL ;
}
unsigned long hugetlb_get_unmapped_area ( struct file * file , unsigned long addr ,
unsigned long len , unsigned long pgoff ,
unsigned long flags )
{
2008-07-23 21:27:56 -07:00
struct hstate * hstate = hstate_file ( file ) ;
int mmu_psize = shift_to_mmu_psize ( huge_page_shift ( hstate ) ) ;
return slice_get_unmapped_area ( addr , len , flags , mmu_psize , 1 , 0 ) ;
2005-04-16 15:20:36 -07:00
}
2005-12-09 14:20:52 +11:00
/*
* Called by asm hashtable . S for doing lazy icache flush
*/
static unsigned int hash_huge_page_do_lazy_icache ( unsigned long rflags ,
2008-07-23 21:27:56 -07:00
pte_t pte , int trap , unsigned long sz )
2005-12-09 14:20:52 +11:00
{
struct page * page ;
int i ;
if ( ! pfn_valid ( pte_pfn ( pte ) ) )
return rflags ;
page = pte_page ( pte ) ;
/* page is dirty */
if ( ! test_bit ( PG_arch_1 , & page - > flags ) & & ! PageReserved ( page ) ) {
if ( trap = = 0x400 ) {
2008-07-23 21:27:56 -07:00
for ( i = 0 ; i < ( sz / PAGE_SIZE ) ; i + + )
2005-12-09 14:20:52 +11:00
__flush_dcache_icache ( page_address ( page + i ) ) ;
set_bit ( PG_arch_1 , & page - > flags ) ;
} else {
rflags | = HPTE_R_N ;
}
}
return rflags ;
}
2005-04-16 15:20:36 -07:00
int hash_huge_page ( struct mm_struct * mm , unsigned long access ,
2005-12-09 14:20:52 +11:00
unsigned long ea , unsigned long vsid , int local ,
unsigned long trap )
2005-04-16 15:20:36 -07:00
{
pte_t * ptep ;
2005-11-07 11:06:55 +11:00
unsigned long old_pte , new_pte ;
2008-07-23 21:27:56 -07:00
unsigned long va , rflags , pa , sz ;
2005-04-16 15:20:36 -07:00
long slot ;
int err = 1 ;
2007-10-11 20:37:10 +10:00
int ssize = user_segment_size ( ea ) ;
2008-07-23 21:27:56 -07:00
unsigned int mmu_psize ;
int shift ;
mmu_psize = get_slice_psize ( mm , ea ) ;
2005-04-16 15:20:36 -07:00
2008-07-23 21:27:56 -07:00
if ( ! mmu_huge_psizes [ mmu_psize ] )
goto out ;
2005-04-16 15:20:36 -07:00
ptep = huge_pte_offset ( mm , ea ) ;
/* Search the Linux page table for a match with va */
2007-10-11 20:37:10 +10:00
va = hpt_va ( ea , vsid , ssize ) ;
2005-04-16 15:20:36 -07:00
/*
* If no pte found or not present , send the problem up to
* do_page_fault
*/
if ( unlikely ( ! ptep | | pte_none ( * ptep ) ) )
goto out ;
/*
* Check the user ' s access rights to the page . If access should be
* prevented then send the problem up to do_page_fault .
*/
if ( unlikely ( access & ~ pte_val ( * ptep ) ) )
goto out ;
/*
* At this point , we have a pte ( old_pte ) which can be used to build
* or update an HPTE . There are 2 cases :
*
* 1. There is a valid ( present ) pte with no associated HPTE ( this is
* the most common case )
* 2. There is a valid ( present ) pte with an associated HPTE . The
* current values of the pp bits in the HPTE prevent access
* because we are doing software DIRTY bit management and the
* page is currently not DIRTY .
*/
2005-11-07 11:06:55 +11:00
do {
old_pte = pte_val ( * ptep ) ;
if ( old_pte & _PAGE_BUSY )
goto out ;
2008-06-11 15:37:10 +10:00
new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED ;
2005-11-07 11:06:55 +11:00
} while ( old_pte ! = __cmpxchg_u64 ( ( unsigned long * ) ptep ,
old_pte , new_pte ) ) ;
rflags = 0x2 | ( ! ( new_pte & _PAGE_RW ) ) ;
2005-04-16 15:20:36 -07:00
/* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
2005-11-07 11:06:55 +11:00
rflags | = ( ( new_pte & _PAGE_EXEC ) ? 0 : HPTE_R_N ) ;
2008-07-23 21:27:56 -07:00
shift = mmu_psize_to_shift ( mmu_psize ) ;
sz = ( ( 1UL ) < < shift ) ;
2005-12-09 14:20:52 +11:00
if ( ! cpu_has_feature ( CPU_FTR_COHERENT_ICACHE ) )
/* No CPU has hugepages but lacks no execute, so we
* don ' t need to worry about that case */
rflags = hash_huge_page_do_lazy_icache ( rflags , __pte ( old_pte ) ,
2008-07-23 21:27:56 -07:00
trap , sz ) ;
2005-04-16 15:20:36 -07:00
/* Check if pte already has an hpte (case 2) */
2005-11-07 11:06:55 +11:00
if ( unlikely ( old_pte & _PAGE_HASHPTE ) ) {
2005-04-16 15:20:36 -07:00
/* There MIGHT be an HPTE for this pte */
unsigned long hash , slot ;
2008-07-23 21:27:56 -07:00
hash = hpt_hash ( va , shift , ssize ) ;
2005-11-07 11:06:55 +11:00
if ( old_pte & _PAGE_F_SECOND )
2005-04-16 15:20:36 -07:00
hash = ~ hash ;
slot = ( hash & htab_hash_mask ) * HPTES_PER_GROUP ;
2005-11-07 11:06:55 +11:00
slot + = ( old_pte & _PAGE_F_GIX ) > > 12 ;
2005-04-16 15:20:36 -07:00
2008-07-23 21:27:56 -07:00
if ( ppc_md . hpte_updatepp ( slot , rflags , va , mmu_psize ,
2007-10-11 20:37:10 +10:00
ssize , local ) = = - 1 )
2005-11-07 11:06:55 +11:00
old_pte & = ~ _PAGE_HPTEFLAGS ;
2005-04-16 15:20:36 -07:00
}
2005-11-07 11:06:55 +11:00
if ( likely ( ! ( old_pte & _PAGE_HASHPTE ) ) ) {
2008-07-23 21:27:56 -07:00
unsigned long hash = hpt_hash ( va , shift , ssize ) ;
2005-04-16 15:20:36 -07:00
unsigned long hpte_group ;
2005-11-07 11:06:55 +11:00
pa = pte_pfn ( __pte ( old_pte ) ) < < PAGE_SHIFT ;
2005-04-16 15:20:36 -07:00
repeat :
hpte_group = ( ( hash & htab_hash_mask ) *
HPTES_PER_GROUP ) & ~ 0x7UL ;
2005-11-07 11:06:55 +11:00
/* clear HPTE slot informations in new PTE */
2008-06-11 15:37:10 +10:00
# ifdef CONFIG_PPC_64K_PAGES
new_pte = ( new_pte & ~ _PAGE_HPTEFLAGS ) | _PAGE_HPTE_SUB0 ;
# else
2005-11-07 11:06:55 +11:00
new_pte = ( new_pte & ~ _PAGE_HPTEFLAGS ) | _PAGE_HASHPTE ;
2008-06-11 15:37:10 +10:00
# endif
2005-04-16 15:20:36 -07:00
/* Add in WIMG bits */
2008-06-19 08:32:56 +10:00
rflags | = ( new_pte & ( _PAGE_WRITETHRU | _PAGE_NO_CACHE |
_PAGE_COHERENT | _PAGE_GUARDED ) ) ;
2005-04-16 15:20:36 -07:00
2005-11-07 11:06:55 +11:00
/* Insert into the hash table, primary slot */
slot = ppc_md . hpte_insert ( hpte_group , va , pa , rflags , 0 ,
2008-07-23 21:27:56 -07:00
mmu_psize , ssize ) ;
2005-04-16 15:20:36 -07:00
/* Primary is full, try the secondary */
if ( unlikely ( slot = = - 1 ) ) {
hpte_group = ( ( ~ hash & htab_hash_mask ) *
HPTES_PER_GROUP ) & ~ 0x7UL ;
2005-11-07 11:06:55 +11:00
slot = ppc_md . hpte_insert ( hpte_group , va , pa , rflags ,
2005-09-23 13:24:07 -07:00
HPTE_V_SECONDARY ,
2008-07-23 21:27:56 -07:00
mmu_psize , ssize ) ;
2005-04-16 15:20:36 -07:00
if ( slot = = - 1 ) {
if ( mftb ( ) & 0x1 )
2005-09-23 13:24:07 -07:00
hpte_group = ( ( hash & htab_hash_mask ) *
HPTES_PER_GROUP ) & ~ 0x7UL ;
2005-04-16 15:20:36 -07:00
ppc_md . hpte_remove ( hpte_group ) ;
goto repeat ;
}
}
if ( unlikely ( slot = = - 2 ) )
panic ( " hash_huge_page: pte_insert failed \n " ) ;
2007-01-12 09:54:39 +09:00
new_pte | = ( slot < < 12 ) & ( _PAGE_F_SECOND | _PAGE_F_GIX ) ;
2005-04-16 15:20:36 -07:00
}
2005-11-07 11:06:55 +11:00
/*
2005-11-23 13:37:39 -08:00
* No need to use ldarx / stdcx here
2005-11-07 11:06:55 +11:00
*/
* ptep = __pte ( new_pte & ~ _PAGE_BUSY ) ;
2005-04-16 15:20:36 -07:00
err = 0 ;
out :
return err ;
}
2006-04-28 15:02:51 +10:00
2008-11-22 17:33:44 +00:00
static void __init set_huge_psize ( int psize )
2008-01-04 09:59:50 +11:00
{
/* Check that it is a page size supported by the hardware and
* that it fits within pagetable limits . */
2008-07-23 21:27:55 -07:00
if ( mmu_psize_defs [ psize ] . shift & &
mmu_psize_defs [ psize ] . shift < SID_SHIFT_1T & &
2008-01-04 09:59:50 +11:00
( mmu_psize_defs [ psize ] . shift > MIN_HUGEPTE_SHIFT | |
2008-07-23 21:27:55 -07:00
mmu_psize_defs [ psize ] . shift = = PAGE_SHIFT_64K | |
mmu_psize_defs [ psize ] . shift = = PAGE_SHIFT_16G ) ) {
2008-07-23 21:27:56 -07:00
/* Return if huge page size has already been setup or is the
* same as the base page size . */
if ( mmu_huge_psizes [ psize ] | |
mmu_psize_defs [ psize ] . shift = = PAGE_SHIFT )
2008-07-23 21:27:55 -07:00
return ;
2008-07-23 21:27:56 -07:00
hugetlb_add_hstate ( mmu_psize_defs [ psize ] . shift - PAGE_SHIFT ) ;
2008-07-23 21:27:55 -07:00
2008-07-23 21:27:56 -07:00
switch ( mmu_psize_defs [ psize ] . shift ) {
2008-07-23 21:27:55 -07:00
case PAGE_SHIFT_64K :
/* We only allow 64k hpages with 4k base page,
* which was checked above , and always put them
* at the PMD */
2008-07-23 21:27:56 -07:00
hugepte_shift [ psize ] = PMD_SHIFT ;
2008-07-23 21:27:55 -07:00
break ;
case PAGE_SHIFT_16M :
/* 16M pages can be at two different levels
* of pagestables based on base page size */
if ( PAGE_SHIFT = = PAGE_SHIFT_64K )
2008-07-23 21:27:56 -07:00
hugepte_shift [ psize ] = PMD_SHIFT ;
2008-07-23 21:27:55 -07:00
else /* 4k base page */
2008-07-23 21:27:56 -07:00
hugepte_shift [ psize ] = PUD_SHIFT ;
2008-07-23 21:27:55 -07:00
break ;
case PAGE_SHIFT_16G :
/* 16G pages are always at PGD level */
2008-07-23 21:27:56 -07:00
hugepte_shift [ psize ] = PGDIR_SHIFT ;
2008-07-23 21:27:55 -07:00
break ;
}
2008-07-23 21:27:56 -07:00
hugepte_shift [ psize ] - = mmu_psize_defs [ psize ] . shift ;
2008-01-04 09:59:50 +11:00
} else
2008-07-23 21:27:56 -07:00
hugepte_shift [ psize ] = 0 ;
2008-01-04 09:59:50 +11:00
}
static int __init hugepage_setup_sz ( char * str )
{
unsigned long long size ;
2008-07-23 21:27:56 -07:00
int mmu_psize ;
2008-01-04 09:59:50 +11:00
int shift ;
size = memparse ( str , & str ) ;
shift = __ffs ( size ) ;
2008-07-23 21:27:56 -07:00
mmu_psize = shift_to_mmu_psize ( shift ) ;
if ( mmu_psize > = 0 & & mmu_psize_defs [ mmu_psize ] . shift )
2008-01-04 09:59:50 +11:00
set_huge_psize ( mmu_psize ) ;
else
printk ( KERN_WARNING " Invalid huge page size specified(%llu) \n " , size ) ;
return 1 ;
}
__setup ( " hugepagesz= " , hugepage_setup_sz ) ;
2006-04-28 15:02:51 +10:00
static int __init hugetlbpage_init ( void )
{
2008-07-23 21:27:56 -07:00
unsigned int psize ;
2006-04-28 15:02:51 +10:00
if ( ! cpu_has_feature ( CPU_FTR_16M_PAGE ) )
return - ENODEV ;
2008-07-28 16:13:18 +10:00
2008-07-23 21:27:56 -07:00
/* Add supported huge page sizes. Need to change HUGE_MAX_HSTATE
* and adjust PTE_NONCACHE_NUM if the number of supported huge page
* sizes changes .
*/
set_huge_psize ( MMU_PAGE_16M ) ;
set_huge_psize ( MMU_PAGE_16G ) ;
2008-07-28 16:13:18 +10:00
/* Temporarily disable support for 64K huge pages when 64K SPU local
* store support is enabled as the current implementation conflicts .
*/
# ifndef CONFIG_SPU_FS_64K_LS
set_huge_psize ( MMU_PAGE_64K ) ;
# endif
2008-07-23 21:27:56 -07:00
for ( psize = 0 ; psize < MMU_PAGE_COUNT ; + + psize ) {
if ( mmu_huge_psizes [ psize ] ) {
huge_pgtable_cache ( psize ) = kmem_cache_create (
HUGEPTE_CACHE_NAME ( psize ) ,
HUGEPTE_TABLE_SIZE ( psize ) ,
HUGEPTE_TABLE_SIZE ( psize ) ,
0 ,
2008-07-25 19:45:34 -07:00
NULL ) ;
2008-07-23 21:27:56 -07:00
if ( ! huge_pgtable_cache ( psize ) )
panic ( " hugetlbpage_init(): could not create %s " \
" \n " , HUGEPTE_CACHE_NAME ( psize ) ) ;
}
}
2006-04-28 15:02:51 +10:00
return 0 ;
}
module_init ( hugetlbpage_init ) ;