2008-03-18 02:36:55 +03:00
# include <linux/mm.h>
# include <asm/pgalloc.h>
2008-03-18 02:37:03 +03:00
# include <asm/pgtable.h>
2008-03-18 02:36:55 +03:00
# include <asm/tlb.h>
2008-06-20 17:34:46 +04:00
# include <asm/fixmap.h>
2008-03-18 02:36:55 +03:00
pte_t * pte_alloc_one_kernel ( struct mm_struct * mm , unsigned long address )
{
return ( pte_t * ) __get_free_page ( GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO ) ;
}
pgtable_t pte_alloc_one ( struct mm_struct * mm , unsigned long address )
{
struct page * pte ;
# ifdef CONFIG_HIGHPTE
pte = alloc_pages ( GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT | __GFP_ZERO , 0 ) ;
# else
pte = alloc_pages ( GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO , 0 ) ;
# endif
if ( pte )
pgtable_page_ctor ( pte ) ;
return pte ;
}
2008-03-18 02:36:57 +03:00
void __pte_free_tlb ( struct mmu_gather * tlb , struct page * pte )
{
pgtable_page_dtor ( pte ) ;
2008-03-18 02:37:01 +03:00
paravirt_release_pte ( page_to_pfn ( pte ) ) ;
2008-03-18 02:36:57 +03:00
tlb_remove_page ( tlb , pte ) ;
}
2008-03-18 02:36:58 +03:00
# if PAGETABLE_LEVELS > 2
void __pmd_free_tlb ( struct mmu_gather * tlb , pmd_t * pmd )
{
2008-03-18 02:37:01 +03:00
paravirt_release_pmd ( __pa ( pmd ) > > PAGE_SHIFT ) ;
2008-03-18 02:36:58 +03:00
tlb_remove_page ( tlb , virt_to_page ( pmd ) ) ;
}
2008-03-18 02:36:59 +03:00
# if PAGETABLE_LEVELS > 3
void __pud_free_tlb ( struct mmu_gather * tlb , pud_t * pud )
{
2008-03-18 02:37:02 +03:00
paravirt_release_pud ( __pa ( pud ) > > PAGE_SHIFT ) ;
2008-03-18 02:36:59 +03:00
tlb_remove_page ( tlb , virt_to_page ( pud ) ) ;
}
# endif /* PAGETABLE_LEVELS > 3 */
2008-03-18 02:36:58 +03:00
# endif /* PAGETABLE_LEVELS > 2 */
2008-03-18 02:36:55 +03:00
static inline void pgd_list_add ( pgd_t * pgd )
{
struct page * page = virt_to_page ( pgd ) ;
list_add ( & page - > lru , & pgd_list ) ;
}
static inline void pgd_list_del ( pgd_t * pgd )
{
struct page * page = virt_to_page ( pgd ) ;
list_del ( & page - > lru ) ;
}
# define UNSHARED_PTRS_PER_PGD \
2008-03-18 02:37:13 +03:00
( SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD )
2008-03-18 02:36:55 +03:00
2008-08-29 15:51:32 +04:00
static void pgd_ctor ( pgd_t * pgd )
2008-03-18 02:36:55 +03:00
{
/* If the pgd points to a shared pagetable level (either the
ptes in non - PAE , or shared PMD in PAE ) , then just copy the
references from swapper_pg_dir . */
if ( PAGETABLE_LEVELS = = 2 | |
2008-03-18 02:37:14 +03:00
( PAGETABLE_LEVELS = = 3 & & SHARED_KERNEL_PMD ) | |
PAGETABLE_LEVELS = = 4 ) {
2008-03-18 02:37:13 +03:00
clone_pgd_range ( pgd + KERNEL_PGD_BOUNDARY ,
swapper_pg_dir + KERNEL_PGD_BOUNDARY ,
2008-03-18 02:36:55 +03:00
KERNEL_PGD_PTRS ) ;
2008-03-18 02:37:01 +03:00
paravirt_alloc_pmd_clone ( __pa ( pgd ) > > PAGE_SHIFT ,
__pa ( swapper_pg_dir ) > > PAGE_SHIFT ,
2008-03-18 02:37:13 +03:00
KERNEL_PGD_BOUNDARY ,
2008-03-18 02:37:01 +03:00
KERNEL_PGD_PTRS ) ;
2008-03-18 02:36:55 +03:00
}
/* list required to sync kernel mapping updates */
if ( ! SHARED_KERNEL_PMD )
pgd_list_add ( pgd ) ;
}
2008-08-29 15:51:32 +04:00
static void pgd_dtor ( pgd_t * pgd )
2008-03-18 02:36:55 +03:00
{
unsigned long flags ; /* can be called from interrupt context */
if ( SHARED_KERNEL_PMD )
return ;
spin_lock_irqsave ( & pgd_lock , flags ) ;
pgd_list_del ( pgd ) ;
spin_unlock_irqrestore ( & pgd_lock , flags ) ;
}
2008-03-18 02:37:14 +03:00
/*
* List of all pgd ' s needed for non - PAE so it can invalidate entries
* in both cached and uncached pgd ' s ; not needed for PAE since the
* kernel pmd is shared . If PAE were not to share the pmd a similar
* tactic would be needed . This is essentially codepath - based locking
* against pageattr . c ; it is the unique case in which a valid change
* of kernel pagetables can ' t be lazily synchronized by vmalloc faults .
* vmalloc faults work because attached pagetables are never freed .
* - - wli
*/
2008-03-18 02:36:55 +03:00
# ifdef CONFIG_X86_PAE
2008-06-25 08:19:13 +04:00
/*
* In PAE mode , we need to do a cr3 reload ( = tlb flush ) when
* updating the top - level pagetable entries to guarantee the
* processor notices the update . Since this is expensive , and
* all 4 top - level entries are used almost immediately in a
* new process ' s life , we just pre - populate them here .
*
* Also , if we ' re in a paravirt environment where the kernel pmd is
* not shared between pagetables ( ! SHARED_KERNEL_PMDS ) , we allocate
* and initialize the kernel pmds here .
*/
# define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD
void pud_populate ( struct mm_struct * mm , pud_t * pudp , pmd_t * pmd )
{
paravirt_alloc_pmd ( mm , __pa ( pmd ) > > PAGE_SHIFT ) ;
/* Note: almost everything apart from _PAGE_PRESENT is
reserved at the pmd ( PDPT ) level . */
set_pud ( pudp , __pud ( __pa ( pmd ) | _PAGE_PRESENT ) ) ;
/*
* According to Intel App note " TLBs, Paging-Structure Caches,
* and Their Invalidation " , April 2007, document 317080-001,
* section 8.1 : in PAE mode we explicitly have to flush the
* TLB via cr3 if the top - level pgd is changed . . .
*/
if ( mm = = current - > active_mm )
write_cr3 ( read_cr3 ( ) ) ;
}
# else /* !CONFIG_X86_PAE */
/* No need to prepopulate any pagetable entries in non-PAE modes. */
# define PREALLOCATED_PMDS 0
# endif /* CONFIG_X86_PAE */
static void free_pmds ( pmd_t * pmds [ ] )
{
int i ;
for ( i = 0 ; i < PREALLOCATED_PMDS ; i + + )
if ( pmds [ i ] )
free_page ( ( unsigned long ) pmds [ i ] ) ;
}
static int preallocate_pmds ( pmd_t * pmds [ ] )
{
int i ;
bool failed = false ;
for ( i = 0 ; i < PREALLOCATED_PMDS ; i + + ) {
pmd_t * pmd = ( pmd_t * ) get_zeroed_page ( GFP_KERNEL | __GFP_REPEAT ) ;
if ( pmd = = NULL )
failed = true ;
pmds [ i ] = pmd ;
}
if ( failed ) {
free_pmds ( pmds ) ;
return - ENOMEM ;
}
return 0 ;
}
2008-03-18 02:36:55 +03:00
/*
* Mop up any pmd pages which may still be attached to the pgd .
* Normally they will be freed by munmap / exit_mmap , but any pmd we
* preallocate which never got a corresponding vma will need to be
* freed manually .
*/
static void pgd_mop_up_pmds ( struct mm_struct * mm , pgd_t * pgdp )
{
int i ;
2008-06-25 08:19:13 +04:00
for ( i = 0 ; i < PREALLOCATED_PMDS ; i + + ) {
2008-03-18 02:36:55 +03:00
pgd_t pgd = pgdp [ i ] ;
if ( pgd_val ( pgd ) ! = 0 ) {
pmd_t * pmd = ( pmd_t * ) pgd_page_vaddr ( pgd ) ;
pgdp [ i ] = native_make_pgd ( 0 ) ;
2008-03-18 02:37:01 +03:00
paravirt_release_pmd ( pgd_val ( pgd ) > > PAGE_SHIFT ) ;
2008-03-18 02:36:55 +03:00
pmd_free ( mm , pmd ) ;
}
}
}
2008-06-25 08:19:13 +04:00
static void pgd_prepopulate_pmd ( struct mm_struct * mm , pgd_t * pgd , pmd_t * pmds [ ] )
2008-03-18 02:36:55 +03:00
{
pud_t * pud ;
unsigned long addr ;
int i ;
2008-08-09 00:46:07 +04:00
if ( PREALLOCATED_PMDS = = 0 ) /* Work around gcc-3.4.x bug */
return ;
2008-03-18 02:36:55 +03:00
pud = pud_offset ( pgd , 0 ) ;
2008-06-25 08:19:13 +04:00
for ( addr = i = 0 ; i < PREALLOCATED_PMDS ;
i + + , pud + + , addr + = PUD_SIZE ) {
pmd_t * pmd = pmds [ i ] ;
2008-03-18 02:36:55 +03:00
2008-03-18 02:37:13 +03:00
if ( i > = KERNEL_PGD_BOUNDARY )
2008-03-18 02:36:55 +03:00
memcpy ( pmd , ( pmd_t * ) pgd_page_vaddr ( swapper_pg_dir [ i ] ) ,
sizeof ( pmd_t ) * PTRS_PER_PMD ) ;
pud_populate ( mm , pud , pmd ) ;
}
}
2008-03-19 22:30:40 +03:00
2008-06-25 08:19:13 +04:00
pgd_t * pgd_alloc ( struct mm_struct * mm )
2008-03-19 22:30:40 +03:00
{
2008-06-25 08:19:13 +04:00
pgd_t * pgd ;
pmd_t * pmds [ PREALLOCATED_PMDS ] ;
unsigned long flags ;
2008-03-19 22:30:40 +03:00
2008-06-25 08:19:13 +04:00
pgd = ( pgd_t * ) __get_free_page ( GFP_KERNEL | __GFP_ZERO ) ;
if ( pgd = = NULL )
goto out ;
mm - > pgd = pgd ;
if ( preallocate_pmds ( pmds ) ! = 0 )
goto out_free_pgd ;
if ( paravirt_pgd_alloc ( mm ) ! = 0 )
goto out_free_pmds ;
2008-03-19 22:30:40 +03:00
/*
2008-06-25 08:19:13 +04:00
* Make sure that pre - populating the pmds is atomic with
* respect to anything walking the pgd_list , so that they
* never see a partially populated pgd .
2008-03-19 22:30:40 +03:00
*/
2008-06-25 08:19:13 +04:00
spin_lock_irqsave ( & pgd_lock , flags ) ;
2008-03-18 02:36:55 +03:00
2008-06-25 08:19:13 +04:00
pgd_ctor ( pgd ) ;
pgd_prepopulate_pmd ( mm , pgd , pmds ) ;
2008-03-18 02:36:55 +03:00
2008-06-25 08:19:13 +04:00
spin_unlock_irqrestore ( & pgd_lock , flags ) ;
2008-03-18 02:36:55 +03:00
return pgd ;
2008-06-25 08:19:13 +04:00
out_free_pmds :
free_pmds ( pmds ) ;
out_free_pgd :
free_page ( ( unsigned long ) pgd ) ;
out :
return NULL ;
2008-03-18 02:36:55 +03:00
}
void pgd_free ( struct mm_struct * mm , pgd_t * pgd )
{
pgd_mop_up_pmds ( mm , pgd ) ;
pgd_dtor ( pgd ) ;
2008-06-25 08:19:12 +04:00
paravirt_pgd_free ( mm , pgd ) ;
2008-03-18 02:36:55 +03:00
free_page ( ( unsigned long ) pgd ) ;
}
2008-03-18 02:37:03 +03:00
int ptep_set_access_flags ( struct vm_area_struct * vma ,
unsigned long address , pte_t * ptep ,
pte_t entry , int dirty )
{
int changed = ! pte_same ( * ptep , entry ) ;
if ( changed & & dirty ) {
* ptep = entry ;
pte_update_defer ( vma - > vm_mm , address , ptep ) ;
flush_tlb_page ( vma , address ) ;
}
return changed ;
}
2008-03-18 02:37:04 +03:00
int ptep_test_and_clear_young ( struct vm_area_struct * vma ,
unsigned long addr , pte_t * ptep )
{
int ret = 0 ;
if ( pte_young ( * ptep ) )
ret = test_and_clear_bit ( _PAGE_BIT_ACCESSED ,
2008-05-24 19:24:34 +04:00
( unsigned long * ) & ptep - > pte ) ;
2008-03-18 02:37:04 +03:00
if ( ret )
pte_update ( vma - > vm_mm , addr , ptep ) ;
return ret ;
}
2008-03-18 02:37:05 +03:00
int ptep_clear_flush_young ( struct vm_area_struct * vma ,
unsigned long address , pte_t * ptep )
{
int young ;
young = ptep_test_and_clear_young ( vma , address , ptep ) ;
if ( young )
flush_tlb_page ( vma , address ) ;
return young ;
}
2008-06-17 22:41:54 +04:00
int fixmaps_set ;
2008-06-17 22:42:01 +04:00
void __native_set_fixmap ( enum fixed_addresses idx , pte_t pte )
2008-06-17 22:41:54 +04:00
{
unsigned long address = __fix_to_virt ( idx ) ;
if ( idx > = __end_of_fixed_addresses ) {
BUG ( ) ;
return ;
}
2008-06-17 22:42:01 +04:00
set_pte_vaddr ( address , pte ) ;
2008-06-17 22:41:54 +04:00
fixmaps_set + + ;
}
2008-06-17 22:42:01 +04:00
void native_set_fixmap ( enum fixed_addresses idx , unsigned long phys , pgprot_t flags )
{
__native_set_fixmap ( idx , pfn_pte ( phys > > PAGE_SHIFT , flags ) ) ;
}