2008-01-30 15:33:41 +03:00
/*
* Copyright 2002 Andi Kleen , SuSE Labs .
2005-04-17 02:20:36 +04:00
* Thanks to Ben LaHaise for precious feedback .
2008-01-30 15:33:41 +03:00
*/
2005-04-17 02:20:36 +04:00
# include <linux/highmem.h>
2008-01-30 15:34:04 +03:00
# include <linux/bootmem.h>
2005-04-17 02:20:36 +04:00
# include <linux/module.h>
2008-01-30 15:33:41 +03:00
# include <linux/sched.h>
2005-04-17 02:20:36 +04:00
# include <linux/slab.h>
2008-01-30 15:33:41 +03:00
# include <linux/mm.h>
2008-02-10 01:24:09 +03:00
# include <linux/interrupt.h>
2008-04-17 19:40:45 +04:00
# include <linux/seq_file.h>
# include <linux/debugfs.h>
2008-01-30 15:33:41 +03:00
2008-01-30 15:34:06 +03:00
# include <asm/e820.h>
2005-04-17 02:20:36 +04:00
# include <asm/processor.h>
# include <asm/tlbflush.h>
2006-01-06 11:12:10 +03:00
# include <asm/sections.h>
2008-01-30 15:33:41 +03:00
# include <asm/uaccess.h>
# include <asm/pgalloc.h>
2008-02-18 22:54:14 +03:00
# include <asm/proto.h>
2008-03-19 03:00:18 +03:00
# include <asm/pat.h>
2005-04-17 02:20:36 +04:00
2008-02-04 18:48:09 +03:00
/*
* The current flushing context - we pass it instead of 5 arguments :
*/
2008-02-04 18:48:07 +03:00
struct cpa_data {
2008-08-21 06:46:06 +04:00
unsigned long * vaddr ;
2008-02-04 18:48:07 +03:00
pgprot_t mask_set ;
pgprot_t mask_clr ;
2008-02-04 18:48:07 +03:00
int numpages ;
2008-08-21 06:46:06 +04:00
int flags ;
2008-02-18 22:54:14 +03:00
unsigned long pfn ;
2008-03-12 05:53:29 +03:00
unsigned force_split : 1 ;
2008-08-21 06:46:06 +04:00
int curpage ;
2008-02-04 18:48:07 +03:00
} ;
2008-08-21 06:46:06 +04:00
# define CPA_FLUSHTLB 1
# define CPA_ARRAY 2
2008-05-05 18:35:21 +04:00
# ifdef CONFIG_PROC_FS
2008-05-02 13:46:49 +04:00
static unsigned long direct_pages_count [ PG_LEVEL_NUM ] ;
2008-05-05 18:35:21 +04:00
void update_page_count ( int level , unsigned long pages )
2008-05-02 13:46:49 +04:00
{
unsigned long flags ;
2008-05-05 18:35:21 +04:00
2008-05-02 13:46:49 +04:00
/* Protect against CPA */
spin_lock_irqsave ( & pgd_lock , flags ) ;
direct_pages_count [ level ] + = pages ;
spin_unlock_irqrestore ( & pgd_lock , flags ) ;
2008-05-05 18:35:21 +04:00
}
static void split_page_count ( int level )
{
direct_pages_count [ level ] - - ;
direct_pages_count [ level - 1 ] + = PTRS_PER_PTE ;
}
int arch_report_meminfo ( char * page )
{
2008-08-15 16:58:32 +04:00
int n = sprintf ( page , " DirectMap4k: %8lu kB \n " ,
direct_pages_count [ PG_LEVEL_4K ] < < 2 ) ;
# if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
n + = sprintf ( page + n , " DirectMap2M: %8lu kB \n " ,
direct_pages_count [ PG_LEVEL_2M ] < < 11 ) ;
# else
n + = sprintf ( page + n , " DirectMap4M: %8lu kB \n " ,
direct_pages_count [ PG_LEVEL_2M ] < < 12 ) ;
# endif
2008-05-05 18:35:21 +04:00
# ifdef CONFIG_X86_64
2008-08-15 16:58:32 +04:00
if ( direct_gbpages )
n + = sprintf ( page + n , " DirectMap1G: %8lu kB \n " ,
direct_pages_count [ PG_LEVEL_1G ] < < 20 ) ;
2008-05-02 13:46:49 +04:00
# endif
2008-05-05 18:35:21 +04:00
return n ;
2008-05-02 13:46:49 +04:00
}
2008-05-05 18:35:21 +04:00
# else
static inline void split_page_count ( int level ) { }
# endif
2008-05-02 13:46:49 +04:00
2008-02-18 22:54:14 +03:00
# ifdef CONFIG_X86_64
static inline unsigned long highmap_start_pfn ( void )
{
return __pa ( _text ) > > PAGE_SHIFT ;
}
static inline unsigned long highmap_end_pfn ( void )
{
return __pa ( round_up ( ( unsigned long ) _end , PMD_SIZE ) ) > > PAGE_SHIFT ;
}
# endif
2008-02-13 16:37:52 +03:00
# ifdef CONFIG_DEBUG_PAGEALLOC
# define debug_pagealloc 1
# else
# define debug_pagealloc 0
# endif
2008-01-30 15:34:04 +03:00
static inline int
within ( unsigned long addr , unsigned long start , unsigned long end )
2008-01-30 15:34:04 +03:00
{
2008-01-30 15:34:04 +03:00
return addr > = start & & addr < end ;
}
2008-01-30 15:34:07 +03:00
/*
* Flushing functions
*/
2008-01-30 15:34:08 +03:00
/**
* clflush_cache_range - flush a cache range with clflush
* @ addr : virtual start address
* @ size : number of bytes to flush
*
* clflush is an unordered instruction which needs fencing with mfence
* to avoid ordering issues .
*/
2008-01-30 15:34:09 +03:00
void clflush_cache_range ( void * vaddr , unsigned int size )
2008-01-30 15:34:07 +03:00
{
2008-01-30 15:34:09 +03:00
void * vend = vaddr + size - 1 ;
2008-01-30 15:34:07 +03:00
2008-01-30 15:34:08 +03:00
mb ( ) ;
2008-01-30 15:34:09 +03:00
for ( ; vaddr < vend ; vaddr + = boot_cpu_data . x86_clflush_size )
clflush ( vaddr ) ;
/*
* Flush any possible final partial cacheline :
*/
clflush ( vend ) ;
2008-01-30 15:34:08 +03:00
mb ( ) ;
2008-01-30 15:34:07 +03:00
}
2008-01-30 15:34:08 +03:00
static void __cpa_flush_all ( void * arg )
2008-01-30 15:34:07 +03:00
{
2008-02-04 18:48:06 +03:00
unsigned long cache = ( unsigned long ) arg ;
2008-01-30 15:34:07 +03:00
/*
* Flush all to work around Errata in early athlons regarding
* large page flushing .
*/
__flush_tlb_all ( ) ;
2008-02-04 18:48:06 +03:00
if ( cache & & boot_cpu_data . x86_model > = 4 )
2008-01-30 15:34:07 +03:00
wbinvd ( ) ;
}
2008-02-04 18:48:06 +03:00
static void cpa_flush_all ( unsigned long cache )
2008-01-30 15:34:07 +03:00
{
BUG_ON ( irqs_disabled ( ) ) ;
2008-05-09 11:39:44 +04:00
on_each_cpu ( __cpa_flush_all , ( void * ) cache , 1 ) ;
2008-01-30 15:34:07 +03:00
}
2008-01-30 15:34:08 +03:00
static void __cpa_flush_range ( void * arg )
{
/*
* We could optimize that further and do individual per page
* tlb invalidates for a low number of pages . Caveat : we must
* flush the high aliases on 64 bit as well .
*/
__flush_tlb_all ( ) ;
}
2008-02-04 18:48:06 +03:00
static void cpa_flush_range ( unsigned long start , int numpages , int cache )
2008-01-30 15:34:08 +03:00
{
2008-01-30 15:34:09 +03:00
unsigned int i , level ;
unsigned long addr ;
2008-01-30 15:34:08 +03:00
BUG_ON ( irqs_disabled ( ) ) ;
2008-01-30 15:34:09 +03:00
WARN_ON ( PAGE_ALIGN ( start ) ! = start ) ;
2008-01-30 15:34:08 +03:00
2008-05-09 11:39:44 +04:00
on_each_cpu ( __cpa_flush_range , NULL , 1 ) ;
2008-01-30 15:34:08 +03:00
2008-02-04 18:48:06 +03:00
if ( ! cache )
return ;
2008-01-30 15:34:08 +03:00
/*
* We only need to flush on one CPU ,
* clflush is a MESI - coherent instruction that
* will cause all other CPUs to flush the same
* cachelines :
*/
2008-01-30 15:34:09 +03:00
for ( i = 0 , addr = start ; i < numpages ; i + + , addr + = PAGE_SIZE ) {
pte_t * pte = lookup_address ( addr , & level ) ;
/*
* Only flush present addresses :
*/
2008-02-04 18:48:08 +03:00
if ( pte & & ( pte_val ( * pte ) & _PAGE_PRESENT ) )
2008-01-30 15:34:09 +03:00
clflush_cache_range ( ( void * ) addr , PAGE_SIZE ) ;
}
2008-01-30 15:34:08 +03:00
}
2008-08-21 06:46:06 +04:00
static void cpa_flush_array ( unsigned long * start , int numpages , int cache )
{
unsigned int i , level ;
unsigned long * addr ;
BUG_ON ( irqs_disabled ( ) ) ;
on_each_cpu ( __cpa_flush_range , NULL , 1 ) ;
if ( ! cache )
return ;
/* 4M threshold */
if ( numpages > = 1024 ) {
if ( boot_cpu_data . x86_model > = 4 )
wbinvd ( ) ;
return ;
}
/*
* We only need to flush on one CPU ,
* clflush is a MESI - coherent instruction that
* will cause all other CPUs to flush the same
* cachelines :
*/
for ( i = 0 , addr = start ; i < numpages ; i + + , addr + + ) {
pte_t * pte = lookup_address ( * addr , & level ) ;
/*
* Only flush present addresses :
*/
if ( pte & & ( pte_val ( * pte ) & _PAGE_PRESENT ) )
clflush_cache_range ( ( void * ) * addr , PAGE_SIZE ) ;
}
}
2008-01-30 15:34:04 +03:00
/*
* Certain areas of memory on x86 require very specific protection flags ,
* for example the BIOS area or kernel text . Callers don ' t always get this
* right ( again , ioremap ( ) on BIOS memory is not uncommon ) so this function
* checks and fixes these known static required protection bits .
*/
2008-02-18 22:54:14 +03:00
static inline pgprot_t static_protections ( pgprot_t prot , unsigned long address ,
unsigned long pfn )
2008-01-30 15:34:04 +03:00
{
pgprot_t forbidden = __pgprot ( 0 ) ;
2008-01-30 15:34:04 +03:00
/*
2008-01-30 15:34:04 +03:00
* The BIOS area between 640 k and 1 Mb needs to be executable for
* PCI BIOS based config access ( CONFIG_PCI_GOBIOS ) support .
2008-01-30 15:34:04 +03:00
*/
2008-02-18 22:54:14 +03:00
if ( within ( pfn , BIOS_BEGIN > > PAGE_SHIFT , BIOS_END > > PAGE_SHIFT ) )
2008-01-30 15:34:04 +03:00
pgprot_val ( forbidden ) | = _PAGE_NX ;
/*
* The kernel text needs to be executable for obvious reasons
2008-02-18 22:54:14 +03:00
* Does not cover __inittext since that is gone later on . On
* 64 bit we do not enforce ! NX on the low mapping
2008-01-30 15:34:04 +03:00
*/
if ( within ( address , ( unsigned long ) _text , ( unsigned long ) _etext ) )
pgprot_val ( forbidden ) | = _PAGE_NX ;
2008-02-04 18:48:05 +03:00
/*
2008-02-18 22:54:14 +03:00
* The . rodata section needs to be read - only . Using the pfn
* catches all aliases .
2008-02-04 18:48:05 +03:00
*/
2008-02-18 22:54:14 +03:00
if ( within ( pfn , __pa ( ( unsigned long ) __start_rodata ) > > PAGE_SHIFT ,
__pa ( ( unsigned long ) __end_rodata ) > > PAGE_SHIFT ) )
2008-02-04 18:48:05 +03:00
pgprot_val ( forbidden ) | = _PAGE_RW ;
2008-01-30 15:34:04 +03:00
prot = __pgprot ( pgprot_val ( prot ) & ~ pgprot_val ( forbidden ) ) ;
2008-01-30 15:34:04 +03:00
return prot ;
}
2008-02-04 18:48:07 +03:00
/*
* Lookup the page table entry for a virtual address . Return a pointer
* to the entry and the level of the mapping .
*
* Note : We return pud and pmd either when the entry is marked large
* or when the present bit is not set . Otherwise we would return a
* pointer to a nonexisting mapping .
*/
2008-02-10 01:24:08 +03:00
pte_t * lookup_address ( unsigned long address , unsigned int * level )
2008-01-30 15:33:41 +03:00
{
2005-04-17 02:20:36 +04:00
pgd_t * pgd = pgd_offset_k ( address ) ;
pud_t * pud ;
pmd_t * pmd ;
2008-01-30 15:33:41 +03:00
2008-01-30 15:34:04 +03:00
* level = PG_LEVEL_NONE ;
2005-04-17 02:20:36 +04:00
if ( pgd_none ( * pgd ) )
return NULL ;
2008-02-04 18:48:09 +03:00
2005-04-17 02:20:36 +04:00
pud = pud_offset ( pgd , address ) ;
if ( pud_none ( * pud ) )
return NULL ;
2008-02-04 18:48:09 +03:00
* level = PG_LEVEL_1G ;
if ( pud_large ( * pud ) | | ! pud_present ( * pud ) )
return ( pte_t * ) pud ;
2005-04-17 02:20:36 +04:00
pmd = pmd_offset ( pud , address ) ;
if ( pmd_none ( * pmd ) )
return NULL ;
2008-01-30 15:34:04 +03:00
* level = PG_LEVEL_2M ;
2008-02-04 18:48:07 +03:00
if ( pmd_large ( * pmd ) | | ! pmd_present ( * pmd ) )
2005-04-17 02:20:36 +04:00
return ( pte_t * ) pmd ;
2008-01-30 15:34:04 +03:00
* level = PG_LEVEL_4K ;
2008-02-04 18:48:09 +03:00
2008-01-30 15:33:41 +03:00
return pte_offset_kernel ( pmd , address ) ;
}
2008-05-12 23:20:56 +04:00
EXPORT_SYMBOL_GPL ( lookup_address ) ;
2008-01-30 15:33:41 +03:00
2008-02-04 18:48:09 +03:00
/*
* Set the new pmd in all the pgds we know about :
*/
2008-01-30 15:33:57 +03:00
static void __set_pmd_pte ( pte_t * kpte , unsigned long address , pte_t pte )
2008-01-30 15:33:41 +03:00
{
/* change init_mm */
set_pte_atomic ( kpte , pte ) ;
2008-01-30 15:34:03 +03:00
# ifdef CONFIG_X86_32
2008-01-30 15:34:04 +03:00
if ( ! SHARED_KERNEL_PMD ) {
2008-01-30 15:34:03 +03:00
struct page * page ;
2008-01-30 15:34:11 +03:00
list_for_each_entry ( page , & pgd_list , lru ) {
2008-01-30 15:34:03 +03:00
pgd_t * pgd ;
pud_t * pud ;
pmd_t * pmd ;
pgd = ( pgd_t * ) page_address ( page ) + pgd_index ( address ) ;
pud = pud_offset ( pgd , address ) ;
pmd = pmd_offset ( pud , address ) ;
set_pte_atomic ( ( pte_t * ) pmd , pte ) ;
}
2005-04-17 02:20:36 +04:00
}
2008-01-30 15:34:03 +03:00
# endif
2005-04-17 02:20:36 +04:00
}
2008-02-04 18:48:09 +03:00
static int
try_preserve_large_page ( pte_t * kpte , unsigned long address ,
struct cpa_data * cpa )
2008-02-04 18:48:07 +03:00
{
2008-02-18 22:54:14 +03:00
unsigned long nextpage_addr , numpages , pmask , psize , flags , addr , pfn ;
2008-02-04 18:48:07 +03:00
pte_t new_pte , old_pte , * tmp ;
pgprot_t old_prot , new_prot ;
2008-02-10 01:24:09 +03:00
int i , do_split = 1 ;
2008-02-10 01:24:08 +03:00
unsigned int level ;
2008-02-04 18:48:07 +03:00
2008-03-12 05:53:29 +03:00
if ( cpa - > force_split )
return 1 ;
2008-02-04 18:48:07 +03:00
spin_lock_irqsave ( & pgd_lock , flags ) ;
/*
* Check for races , another CPU might have split this page
* up already :
*/
tmp = lookup_address ( address , & level ) ;
if ( tmp ! = kpte )
goto out_unlock ;
switch ( level ) {
case PG_LEVEL_2M :
2008-02-04 18:48:08 +03:00
psize = PMD_PAGE_SIZE ;
pmask = PMD_PAGE_MASK ;
2008-02-04 18:48:07 +03:00
break ;
2008-02-04 18:48:09 +03:00
# ifdef CONFIG_X86_64
2008-02-04 18:48:07 +03:00
case PG_LEVEL_1G :
2008-02-13 18:20:35 +03:00
psize = PUD_PAGE_SIZE ;
pmask = PUD_PAGE_MASK ;
2008-02-04 18:48:09 +03:00
break ;
# endif
2008-02-04 18:48:07 +03:00
default :
2008-02-04 18:48:09 +03:00
do_split = - EINVAL ;
2008-02-04 18:48:07 +03:00
goto out_unlock ;
}
/*
* Calculate the number of pages , which fit into this large
* page starting at address :
*/
nextpage_addr = ( address + psize ) & pmask ;
numpages = ( nextpage_addr - address ) > > PAGE_SHIFT ;
2008-03-03 03:17:37 +03:00
if ( numpages < cpa - > numpages )
cpa - > numpages = numpages ;
2008-02-04 18:48:07 +03:00
/*
* We are safe now . Check whether the new pgprot is the same :
*/
old_pte = * kpte ;
old_prot = new_prot = pte_pgprot ( old_pte ) ;
pgprot_val ( new_prot ) & = ~ pgprot_val ( cpa - > mask_clr ) ;
pgprot_val ( new_prot ) | = pgprot_val ( cpa - > mask_set ) ;
2008-02-18 22:54:14 +03:00
/*
* old_pte points to the large page base address . So we need
* to add the offset of the virtual address :
*/
pfn = pte_pfn ( old_pte ) + ( ( address & ( psize - 1 ) ) > > PAGE_SHIFT ) ;
cpa - > pfn = pfn ;
new_prot = static_protections ( new_prot , address , pfn ) ;
2008-02-04 18:48:07 +03:00
2008-02-10 01:24:09 +03:00
/*
* We need to check the full range , whether
* static_protection ( ) requires a different pgprot for one of
* the pages in the range we try to preserve :
*/
addr = address + PAGE_SIZE ;
2008-02-18 22:54:14 +03:00
pfn + + ;
2008-03-03 03:17:37 +03:00
for ( i = 1 ; i < cpa - > numpages ; i + + , addr + = PAGE_SIZE , pfn + + ) {
2008-02-18 22:54:14 +03:00
pgprot_t chk_prot = static_protections ( new_prot , addr , pfn ) ;
2008-02-10 01:24:09 +03:00
if ( pgprot_val ( chk_prot ) ! = pgprot_val ( new_prot ) )
goto out_unlock ;
}
2008-02-04 18:48:07 +03:00
/*
* If there are no changes , return . maxpages has been updated
* above :
*/
if ( pgprot_val ( new_prot ) = = pgprot_val ( old_prot ) ) {
2008-02-04 18:48:09 +03:00
do_split = 0 ;
2008-02-04 18:48:07 +03:00
goto out_unlock ;
}
/*
* We need to change the attributes . Check , whether we can
* change the large page in one go . We request a split , when
* the address is not aligned and the number of pages is
* smaller than the number of pages in the large page . Note
* that we limited the number of possible pages already to
* the number of pages in the large page .
*/
2008-03-03 03:17:37 +03:00
if ( address = = ( nextpage_addr - psize ) & & cpa - > numpages = = numpages ) {
2008-02-04 18:48:07 +03:00
/*
* The address is aligned and the number of pages
* covers the full page .
*/
new_pte = pfn_pte ( pte_pfn ( old_pte ) , canon_pgprot ( new_prot ) ) ;
__set_pmd_pte ( kpte , address , new_pte ) ;
2008-08-21 06:46:06 +04:00
cpa - > flags | = CPA_FLUSHTLB ;
2008-02-04 18:48:09 +03:00
do_split = 0 ;
2008-02-04 18:48:07 +03:00
}
out_unlock :
spin_unlock_irqrestore ( & pgd_lock , flags ) ;
2008-02-04 18:48:09 +03:00
2008-02-04 18:48:09 +03:00
return do_split ;
2008-02-04 18:48:07 +03:00
}
2008-02-10 01:24:09 +03:00
static LIST_HEAD ( page_pool ) ;
static unsigned long pool_size , pool_pages , pool_low ;
2008-02-13 16:37:52 +03:00
static unsigned long pool_used , pool_failed ;
2008-02-10 01:24:09 +03:00
2008-02-13 16:37:52 +03:00
static void cpa_fill_pool ( struct page * * ret )
2008-02-10 01:24:09 +03:00
{
gfp_t gfp = GFP_KERNEL ;
2008-02-13 16:37:52 +03:00
unsigned long flags ;
struct page * p ;
2008-02-10 01:24:09 +03:00
/*
2008-02-13 16:37:52 +03:00
* Avoid recursion ( on debug - pagealloc ) and also signal
* our priority to get to these pagetables :
2008-02-10 01:24:09 +03:00
*/
2008-02-13 16:37:52 +03:00
if ( current - > flags & PF_MEMALLOC )
2008-02-10 01:24:09 +03:00
return ;
2008-02-13 16:37:52 +03:00
current - > flags | = PF_MEMALLOC ;
2008-02-10 01:24:09 +03:00
/*
2008-02-13 16:37:52 +03:00
* Allocate atomically from atomic contexts :
2008-02-10 01:24:09 +03:00
*/
2008-02-13 16:37:52 +03:00
if ( in_atomic ( ) | | irqs_disabled ( ) | | debug_pagealloc )
gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN ;
2008-02-10 01:24:09 +03:00
2008-02-13 16:37:52 +03:00
while ( pool_pages < pool_size | | ( ret & & ! * ret ) ) {
2008-02-10 01:24:09 +03:00
p = alloc_pages ( gfp , 0 ) ;
if ( ! p ) {
pool_failed + + ;
break ;
}
2008-02-13 16:37:52 +03:00
/*
* If the call site needs a page right now , provide it :
*/
if ( ret & & ! * ret ) {
* ret = p ;
continue ;
}
spin_lock_irqsave ( & pgd_lock , flags ) ;
2008-02-10 01:24:09 +03:00
list_add ( & p - > lru , & page_pool ) ;
pool_pages + + ;
2008-02-13 16:37:52 +03:00
spin_unlock_irqrestore ( & pgd_lock , flags ) ;
2008-02-10 01:24:09 +03:00
}
2008-02-13 16:37:52 +03:00
current - > flags & = ~ PF_MEMALLOC ;
2008-02-10 01:24:09 +03:00
}
# define SHIFT_MB (20 - PAGE_SHIFT)
# define ROUND_MB_GB ((1 << 10) - 1)
# define SHIFT_MB_GB 10
# define POOL_PAGES_PER_GB 16
void __init cpa_init ( void )
{
struct sysinfo si ;
unsigned long gb ;
si_meminfo ( & si ) ;
/*
* Calculate the number of pool pages :
*
* Convert totalram ( nr of pages ) to MiB and round to the next
* GiB . Shift MiB to Gib and multiply the result by
* POOL_PAGES_PER_GB :
*/
2008-02-13 16:37:52 +03:00
if ( debug_pagealloc ) {
gb = ( ( si . totalram > > SHIFT_MB ) + ROUND_MB_GB ) > > SHIFT_MB_GB ;
pool_size = POOL_PAGES_PER_GB * gb ;
} else {
pool_size = 1 ;
}
2008-02-10 01:24:09 +03:00
pool_low = pool_size ;
2008-02-13 16:37:52 +03:00
cpa_fill_pool ( NULL ) ;
2008-02-10 01:24:09 +03:00
printk ( KERN_DEBUG
" CPA: page pool initialized %lu of %lu pages preallocated \n " ,
pool_pages , pool_size ) ;
}
2008-01-30 15:33:57 +03:00
static int split_large_page ( pte_t * kpte , unsigned long address )
2008-01-30 15:33:56 +03:00
{
2008-02-04 18:48:10 +03:00
unsigned long flags , pfn , pfninc = 1 ;
2008-02-04 18:48:09 +03:00
unsigned int i , level ;
2008-01-30 15:33:56 +03:00
pte_t * pbase , * tmp ;
2008-02-04 18:48:09 +03:00
pgprot_t ref_prot ;
2008-01-30 15:33:56 +03:00
struct page * base ;
2008-02-10 01:24:09 +03:00
/*
* Get a page from the pool . The pool list is protected by the
* pgd_lock , which we have to take anyway for the split
* operation :
*/
spin_lock_irqsave ( & pgd_lock , flags ) ;
if ( list_empty ( & page_pool ) ) {
spin_unlock_irqrestore ( & pgd_lock , flags ) ;
2008-02-13 16:37:52 +03:00
base = NULL ;
cpa_fill_pool ( & base ) ;
if ( ! base )
return - ENOMEM ;
spin_lock_irqsave ( & pgd_lock , flags ) ;
} else {
base = list_first_entry ( & page_pool , struct page , lru ) ;
list_del ( & base - > lru ) ;
pool_pages - - ;
if ( pool_pages < pool_low )
pool_low = pool_pages ;
2008-02-10 01:24:09 +03:00
}
2008-01-30 15:33:56 +03:00
/*
* Check for races , another CPU might have split this page
* up for us already :
*/
tmp = lookup_address ( address , & level ) ;
2008-02-04 18:48:08 +03:00
if ( tmp ! = kpte )
2008-01-30 15:33:56 +03:00
goto out_unlock ;
pbase = ( pte_t * ) page_address ( base ) ;
2008-03-18 02:37:01 +03:00
paravirt_alloc_pte ( & init_mm , page_to_pfn ( base ) ) ;
2008-02-04 18:48:08 +03:00
ref_prot = pte_pgprot ( pte_clrhuge ( * kpte ) ) ;
2008-01-30 15:33:56 +03:00
2008-02-04 18:48:09 +03:00
# ifdef CONFIG_X86_64
if ( level = = PG_LEVEL_1G ) {
pfninc = PMD_PAGE_SIZE > > PAGE_SHIFT ;
pgprot_val ( ref_prot ) | = _PAGE_PSE ;
}
# endif
2008-02-04 18:48:05 +03:00
/*
* Get the target pfn from the original entry :
*/
pfn = pte_pfn ( * kpte ) ;
2008-02-04 18:48:09 +03:00
for ( i = 0 ; i < PTRS_PER_PTE ; i + + , pfn + = pfninc )
2008-02-04 18:48:05 +03:00
set_pte ( & pbase [ i ] , pfn_pte ( pfn , ref_prot ) ) ;
2008-01-30 15:33:56 +03:00
2008-05-02 13:46:49 +04:00
if ( address > = ( unsigned long ) __va ( 0 ) & &
2008-07-11 07:38:26 +04:00
address < ( unsigned long ) __va ( max_low_pfn_mapped < < PAGE_SHIFT ) )
split_page_count ( level ) ;
# ifdef CONFIG_X86_64
if ( address > = ( unsigned long ) __va ( 1UL < < 32 ) & &
2008-05-05 18:35:21 +04:00
address < ( unsigned long ) __va ( max_pfn_mapped < < PAGE_SHIFT ) )
split_page_count ( level ) ;
2008-07-11 07:38:26 +04:00
# endif
2008-05-02 13:46:49 +04:00
2008-01-30 15:33:56 +03:00
/*
2008-02-04 18:48:08 +03:00
* Install the new , split up pagetable . Important details here :
2008-01-30 15:34:04 +03:00
*
* On Intel the NX bit of all levels must be cleared to make a
* page executable . See section 4.13 .2 of Intel 64 and IA - 32
* Architectures Software Developer ' s Manual ) .
2008-02-04 18:48:08 +03:00
*
* Mark the entry present . The current mapping might be
* set to not present , which we preserved above .
2008-01-30 15:33:56 +03:00
*/
2008-01-30 15:34:04 +03:00
ref_prot = pte_pgprot ( pte_mkexec ( pte_clrhuge ( * kpte ) ) ) ;
2008-02-04 18:48:08 +03:00
pgprot_val ( ref_prot ) | = _PAGE_PRESENT ;
2008-01-30 15:33:57 +03:00
__set_pmd_pte ( kpte , address , mk_pte ( base , ref_prot ) ) ;
2008-01-30 15:33:56 +03:00
base = NULL ;
out_unlock :
2008-02-10 01:24:09 +03:00
/*
* If we dropped out via the lookup_address check under
* pgd_lock then stick the page back into the pool :
*/
if ( base ) {
list_add ( & base - > lru , & page_pool ) ;
pool_pages + + ;
} else
pool_used + + ;
2008-01-30 15:33:57 +03:00
spin_unlock_irqrestore ( & pgd_lock , flags ) ;
2008-01-30 15:33:56 +03:00
return 0 ;
}
2008-02-18 22:54:14 +03:00
static int __change_page_attr ( struct cpa_data * cpa , int primary )
2008-01-30 15:33:41 +03:00
{
2008-08-21 06:46:06 +04:00
unsigned long address ;
2008-02-10 01:24:08 +03:00
int do_split , err ;
unsigned int level ;
2008-02-18 22:54:14 +03:00
pte_t * kpte , old_pte ;
2005-04-17 02:20:36 +04:00
2008-08-21 06:46:06 +04:00
if ( cpa - > flags & CPA_ARRAY )
address = cpa - > vaddr [ cpa - > curpage ] ;
else
address = * cpa - > vaddr ;
2008-01-30 15:33:55 +03:00
repeat :
2008-01-30 15:33:43 +03:00
kpte = lookup_address ( address , & level ) ;
2005-04-17 02:20:36 +04:00
if ( ! kpte )
2008-04-18 23:32:22 +04:00
return 0 ;
2008-02-18 22:54:14 +03:00
old_pte = * kpte ;
if ( ! pte_val ( old_pte ) ) {
if ( ! primary )
return 0 ;
2008-07-30 23:26:26 +04:00
WARN ( 1 , KERN_WARNING " CPA: called for zero pte. "
2008-02-18 22:54:14 +03:00
" vaddr = %lx cpa->vaddr = %lx \n " , address ,
2008-08-21 06:46:06 +04:00
* cpa - > vaddr ) ;
2005-04-17 02:20:36 +04:00
return - EINVAL ;
2008-02-18 22:54:14 +03:00
}
2008-01-30 15:33:41 +03:00
2008-01-30 15:34:04 +03:00
if ( level = = PG_LEVEL_4K ) {
2008-02-18 22:54:14 +03:00
pte_t new_pte ;
2008-02-04 18:48:05 +03:00
pgprot_t new_prot = pte_pgprot ( old_pte ) ;
2008-02-18 22:54:14 +03:00
unsigned long pfn = pte_pfn ( old_pte ) ;
2008-01-30 15:34:09 +03:00
2008-02-04 18:48:07 +03:00
pgprot_val ( new_prot ) & = ~ pgprot_val ( cpa - > mask_clr ) ;
pgprot_val ( new_prot ) | = pgprot_val ( cpa - > mask_set ) ;
2008-01-30 15:34:09 +03:00
2008-02-18 22:54:14 +03:00
new_prot = static_protections ( new_prot , address , pfn ) ;
2008-01-30 15:34:09 +03:00
2008-02-04 18:48:05 +03:00
/*
* We need to keep the pfn from the existing PTE ,
* after all we ' re only going to change it ' s attributes
* not the memory it points to
*/
2008-02-18 22:54:14 +03:00
new_pte = pfn_pte ( pfn , canon_pgprot ( new_prot ) ) ;
cpa - > pfn = pfn ;
2008-02-04 18:48:07 +03:00
/*
* Do we really change anything ?
*/
if ( pte_val ( old_pte ) ! = pte_val ( new_pte ) ) {
set_pte_atomic ( kpte , new_pte ) ;
2008-08-21 06:46:06 +04:00
cpa - > flags | = CPA_FLUSHTLB ;
2008-02-04 18:48:07 +03:00
}
2008-03-03 03:17:37 +03:00
cpa - > numpages = 1 ;
2008-02-04 18:48:07 +03:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
2008-02-04 18:48:07 +03:00
/*
* Check , whether we can keep the large page intact
* and just change the pte :
*/
2008-02-04 18:48:09 +03:00
do_split = try_preserve_large_page ( kpte , address , cpa ) ;
2008-02-04 18:48:07 +03:00
/*
* When the range fits into the existing large page ,
2008-03-03 03:17:37 +03:00
* return . cp - > numpages and cpa - > tlbflush have been updated in
2008-02-04 18:48:07 +03:00
* try_large_page :
*/
2008-02-04 18:48:10 +03:00
if ( do_split < = 0 )
return do_split ;
2008-02-04 18:48:07 +03:00
/*
* We have to split the large page :
*/
2008-02-04 18:48:10 +03:00
err = split_large_page ( kpte , address ) ;
if ( ! err ) {
2008-08-21 06:46:06 +04:00
cpa - > flags | = CPA_FLUSHTLB ;
2008-02-04 18:48:10 +03:00
goto repeat ;
}
2008-02-04 18:48:09 +03:00
2008-02-04 18:48:10 +03:00
return err ;
2008-01-30 15:33:41 +03:00
}
2005-04-17 02:20:36 +04:00
2008-02-18 22:54:14 +03:00
static int __change_page_attr_set_clr ( struct cpa_data * cpa , int checkalias ) ;
static int cpa_process_alias ( struct cpa_data * cpa )
2005-04-17 02:20:36 +04:00
{
2008-02-18 22:54:14 +03:00
struct cpa_data alias_cpa ;
2008-02-16 00:17:57 +03:00
int ret = 0 ;
2008-08-21 06:46:06 +04:00
unsigned long temp_cpa_vaddr , vaddr ;
2008-01-30 15:34:03 +03:00
2008-07-13 01:31:28 +04:00
if ( cpa - > pfn > = max_pfn_mapped )
2008-02-18 22:54:14 +03:00
return 0 ;
2008-02-04 18:48:05 +03:00
2008-07-11 07:38:26 +04:00
# ifdef CONFIG_X86_64
2008-07-13 01:31:28 +04:00
if ( cpa - > pfn > = max_low_pfn_mapped & & cpa - > pfn < ( 1UL < < ( 32 - PAGE_SHIFT ) ) )
2008-07-11 07:38:26 +04:00
return 0 ;
# endif
2008-02-16 00:17:57 +03:00
/*
* No need to redo , when the primary call touched the direct
* mapping already :
*/
2008-08-21 06:46:06 +04:00
if ( cpa - > flags & CPA_ARRAY )
vaddr = cpa - > vaddr [ cpa - > curpage ] ;
else
vaddr = * cpa - > vaddr ;
if ( ! ( within ( vaddr , PAGE_OFFSET ,
2008-07-11 07:38:26 +04:00
PAGE_OFFSET + ( max_low_pfn_mapped < < PAGE_SHIFT ) )
# ifdef CONFIG_X86_64
2008-08-21 06:46:06 +04:00
| | within ( vaddr , PAGE_OFFSET + ( 1UL < < 32 ) ,
2008-07-11 07:38:26 +04:00
PAGE_OFFSET + ( max_pfn_mapped < < PAGE_SHIFT ) )
# endif
) ) {
2008-01-30 15:34:03 +03:00
2008-02-16 00:17:57 +03:00
alias_cpa = * cpa ;
2008-08-21 06:46:06 +04:00
temp_cpa_vaddr = ( unsigned long ) __va ( cpa - > pfn < < PAGE_SHIFT ) ;
alias_cpa . vaddr = & temp_cpa_vaddr ;
alias_cpa . flags & = ~ CPA_ARRAY ;
2008-02-16 00:17:57 +03:00
ret = __change_page_attr_set_clr ( & alias_cpa , 0 ) ;
}
2008-01-30 15:34:03 +03:00
# ifdef CONFIG_X86_64
2008-02-18 22:54:14 +03:00
if ( ret )
return ret ;
2008-02-16 00:17:57 +03:00
/*
* No need to redo , when the primary call touched the high
* mapping already :
*/
2008-08-21 06:46:06 +04:00
if ( within ( vaddr , ( unsigned long ) _text , ( unsigned long ) _end ) )
2008-02-16 00:17:57 +03:00
return 0 ;
2008-01-30 15:34:07 +03:00
/*
2008-01-30 15:34:09 +03:00
* If the physical address is inside the kernel map , we need
* to touch the high mapped kernel as well :
2008-01-30 15:34:07 +03:00
*/
2008-02-18 22:54:14 +03:00
if ( ! within ( cpa - > pfn , highmap_start_pfn ( ) , highmap_end_pfn ( ) ) )
return 0 ;
2008-01-30 15:34:09 +03:00
2008-02-18 22:54:14 +03:00
alias_cpa = * cpa ;
2008-08-21 06:46:06 +04:00
temp_cpa_vaddr = ( cpa - > pfn < < PAGE_SHIFT ) + __START_KERNEL_map - phys_base ;
alias_cpa . vaddr = & temp_cpa_vaddr ;
alias_cpa . flags & = ~ CPA_ARRAY ;
2008-02-18 22:54:14 +03:00
/*
* The high mapping range is imprecise , so ignore the return value .
*/
__change_page_attr_set_clr ( & alias_cpa , 0 ) ;
2008-01-30 15:34:07 +03:00
# endif
2008-02-18 22:54:14 +03:00
return ret ;
2005-04-17 02:20:36 +04:00
}
2008-02-18 22:54:14 +03:00
static int __change_page_attr_set_clr ( struct cpa_data * cpa , int checkalias )
2008-01-30 15:34:08 +03:00
{
2008-02-04 18:48:07 +03:00
int ret , numpages = cpa - > numpages ;
2008-01-30 15:34:08 +03:00
2008-02-04 18:48:07 +03:00
while ( numpages ) {
/*
* Store the remaining nr of pages for the large page
* preservation check .
*/
2008-03-03 03:17:37 +03:00
cpa - > numpages = numpages ;
2008-08-21 06:46:06 +04:00
/* for array changes, we can't use large page */
if ( cpa - > flags & CPA_ARRAY )
cpa - > numpages = 1 ;
2008-02-18 22:54:14 +03:00
ret = __change_page_attr ( cpa , checkalias ) ;
2008-01-30 15:34:08 +03:00
if ( ret )
return ret ;
2008-02-18 22:54:14 +03:00
if ( checkalias ) {
ret = cpa_process_alias ( cpa ) ;
if ( ret )
return ret ;
}
2008-02-04 18:48:07 +03:00
/*
* Adjust the number of pages with the result of the
* CPA operation . Either a large page has been
* preserved or a single page update happened .
*/
2008-03-03 03:17:37 +03:00
BUG_ON ( cpa - > numpages > numpages ) ;
numpages - = cpa - > numpages ;
2008-08-21 06:46:06 +04:00
if ( cpa - > flags & CPA_ARRAY )
cpa - > curpage + + ;
else
* cpa - > vaddr + = cpa - > numpages * PAGE_SIZE ;
2008-02-04 18:48:07 +03:00
}
2008-01-30 15:34:08 +03:00
return 0 ;
}
2008-02-04 18:48:06 +03:00
static inline int cache_attr ( pgprot_t attr )
{
return pgprot_val ( attr ) &
( _PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD ) ;
}
2008-08-21 06:46:06 +04:00
static int change_page_attr_set_clr ( unsigned long * addr , int numpages ,
2008-03-12 05:53:29 +03:00
pgprot_t mask_set , pgprot_t mask_clr ,
2008-08-21 06:46:06 +04:00
int force_split , int array )
2008-01-30 15:34:08 +03:00
{
2008-02-04 18:48:07 +03:00
struct cpa_data cpa ;
2008-08-21 15:46:33 +04:00
int ret , cache , checkalias ;
2008-02-04 18:48:06 +03:00
/*
* Check , if we are requested to change a not supported
* feature :
*/
mask_set = canon_pgprot ( mask_set ) ;
mask_clr = canon_pgprot ( mask_clr ) ;
2008-03-12 05:53:29 +03:00
if ( ! pgprot_val ( mask_set ) & & ! pgprot_val ( mask_clr ) & & ! force_split )
2008-02-04 18:48:06 +03:00
return 0 ;
2008-02-13 13:04:50 +03:00
/* Ensure we are PAGE_SIZE aligned */
2008-08-21 06:46:06 +04:00
if ( ! array ) {
if ( * addr & ~ PAGE_MASK ) {
* addr & = PAGE_MASK ;
/*
* People should not be passing in unaligned addresses :
*/
WARN_ON_ONCE ( 1 ) ;
}
} else {
int i ;
for ( i = 0 ; i < numpages ; i + + ) {
if ( addr [ i ] & ~ PAGE_MASK ) {
addr [ i ] & = PAGE_MASK ;
WARN_ON_ONCE ( 1 ) ;
}
}
2008-02-13 13:04:50 +03:00
}
2008-08-01 05:15:21 +04:00
/* Must avoid aliasing mappings in the highmem code */
kmap_flush_unused ( ) ;
2008-02-04 18:48:07 +03:00
cpa . vaddr = addr ;
cpa . numpages = numpages ;
cpa . mask_set = mask_set ;
cpa . mask_clr = mask_clr ;
2008-08-21 06:46:06 +04:00
cpa . flags = 0 ;
cpa . curpage = 0 ;
2008-03-12 05:53:29 +03:00
cpa . force_split = force_split ;
2008-02-04 18:48:07 +03:00
2008-08-21 06:46:06 +04:00
if ( array )
cpa . flags | = CPA_ARRAY ;
2008-02-15 23:49:46 +03:00
/* No alias checking for _NX bit modifications */
checkalias = ( pgprot_val ( mask_set ) | pgprot_val ( mask_clr ) ) ! = _PAGE_NX ;
ret = __change_page_attr_set_clr ( & cpa , checkalias ) ;
2008-01-30 15:34:08 +03:00
2008-02-04 18:48:07 +03:00
/*
* Check whether we really changed something :
*/
2008-08-21 06:46:06 +04:00
if ( ! ( cpa . flags & CPA_FLUSHTLB ) )
2008-08-04 10:51:24 +04:00
goto out ;
2008-08-21 15:46:33 +04:00
2008-02-04 18:48:06 +03:00
/*
* No need to flush , when we did not set any of the caching
* attributes :
*/
cache = cache_attr ( mask_set ) ;
2008-01-30 15:34:08 +03:00
/*
* On success we use clflush , when the CPU supports it to
* avoid the wbindv . If the CPU does not support it and in the
2008-01-30 15:34:08 +03:00
* error case we fall back to cpa_flush_all ( which uses
2008-01-30 15:34:08 +03:00
* wbindv ) :
*/
2008-08-21 06:46:06 +04:00
if ( ! ret & & cpu_has_clflush ) {
if ( cpa . flags & CPA_ARRAY )
cpa_flush_array ( addr , numpages , cache ) ;
else
cpa_flush_range ( * addr , numpages , cache ) ;
} else
2008-02-04 18:48:06 +03:00
cpa_flush_all ( cache ) ;
2008-08-21 15:46:33 +04:00
2008-02-10 01:24:09 +03:00
out :
2008-08-21 15:46:33 +04:00
cpa_fill_pool ( NULL ) ;
2008-01-30 15:34:08 +03:00
return ret ;
}
2008-08-21 06:46:06 +04:00
static inline int change_page_attr_set ( unsigned long * addr , int numpages ,
pgprot_t mask , int array )
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
{
2008-08-21 06:46:06 +04:00
return change_page_attr_set_clr ( addr , numpages , mask , __pgprot ( 0 ) , 0 ,
array ) ;
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
}
2008-08-21 06:46:06 +04:00
static inline int change_page_attr_clear ( unsigned long * addr , int numpages ,
pgprot_t mask , int array )
2008-01-30 15:34:08 +03:00
{
2008-08-21 06:46:06 +04:00
return change_page_attr_set_clr ( addr , numpages , __pgprot ( 0 ) , mask , 0 ,
array ) ;
2008-01-30 15:34:08 +03:00
}
2008-03-19 03:00:18 +03:00
int _set_memory_uc ( unsigned long addr , int numpages )
2008-01-30 15:34:08 +03:00
{
2008-04-26 04:07:22 +04:00
/*
* for now UC MINUS . see comments in ioremap_nocache ( )
*/
2008-08-21 06:46:06 +04:00
return change_page_attr_set ( & addr , numpages ,
__pgprot ( _PAGE_CACHE_UC_MINUS ) , 0 ) ;
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
}
2008-03-19 03:00:18 +03:00
int set_memory_uc ( unsigned long addr , int numpages )
{
2008-04-26 04:07:22 +04:00
/*
* for now UC MINUS . see comments in ioremap_nocache ( )
*/
2008-08-21 03:45:51 +04:00
if ( reserve_memtype ( __pa ( addr ) , __pa ( addr ) + numpages * PAGE_SIZE ,
2008-04-26 04:07:22 +04:00
_PAGE_CACHE_UC_MINUS , NULL ) )
2008-03-19 03:00:18 +03:00
return - EINVAL ;
return _set_memory_uc ( addr , numpages ) ;
}
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
EXPORT_SYMBOL ( set_memory_uc ) ;
2008-08-21 06:46:06 +04:00
int set_memory_array_uc ( unsigned long * addr , int addrinarray )
{
int i ;
/*
* for now UC MINUS . see comments in ioremap_nocache ( )
*/
for ( i = 0 ; i < addrinarray ; i + + ) {
2008-08-22 02:10:13 +04:00
if ( reserve_memtype ( __pa ( addr [ i ] ) , __pa ( addr [ i ] ) + PAGE_SIZE ,
2008-08-21 06:46:06 +04:00
_PAGE_CACHE_UC_MINUS , NULL ) )
goto out ;
}
return change_page_attr_set ( addr , addrinarray ,
__pgprot ( _PAGE_CACHE_UC_MINUS ) , 1 ) ;
out :
while ( - - i > = 0 )
2008-08-22 02:10:13 +04:00
free_memtype ( __pa ( addr [ i ] ) , __pa ( addr [ i ] ) + PAGE_SIZE ) ;
2008-08-21 06:46:06 +04:00
return - EINVAL ;
}
EXPORT_SYMBOL ( set_memory_array_uc ) ;
2008-03-19 03:00:23 +03:00
int _set_memory_wc ( unsigned long addr , int numpages )
{
2008-08-21 06:46:06 +04:00
return change_page_attr_set ( & addr , numpages ,
__pgprot ( _PAGE_CACHE_WC ) , 0 ) ;
2008-03-19 03:00:23 +03:00
}
int set_memory_wc ( unsigned long addr , int numpages )
{
2008-06-10 18:06:21 +04:00
if ( ! pat_enabled )
2008-03-19 03:00:23 +03:00
return set_memory_uc ( addr , numpages ) ;
2008-08-21 03:45:51 +04:00
if ( reserve_memtype ( __pa ( addr ) , __pa ( addr ) + numpages * PAGE_SIZE ,
2008-03-19 03:00:23 +03:00
_PAGE_CACHE_WC , NULL ) )
return - EINVAL ;
return _set_memory_wc ( addr , numpages ) ;
}
EXPORT_SYMBOL ( set_memory_wc ) ;
2008-03-19 03:00:18 +03:00
int _set_memory_wb ( unsigned long addr , int numpages )
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
{
2008-08-21 06:46:06 +04:00
return change_page_attr_clear ( & addr , numpages ,
__pgprot ( _PAGE_CACHE_MASK ) , 0 ) ;
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
}
2008-03-19 03:00:18 +03:00
int set_memory_wb ( unsigned long addr , int numpages )
{
2008-08-21 03:45:51 +04:00
free_memtype ( __pa ( addr ) , __pa ( addr ) + numpages * PAGE_SIZE ) ;
2008-03-19 03:00:18 +03:00
return _set_memory_wb ( addr , numpages ) ;
}
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
EXPORT_SYMBOL ( set_memory_wb ) ;
2008-08-21 06:46:06 +04:00
int set_memory_array_wb ( unsigned long * addr , int addrinarray )
{
int i ;
for ( i = 0 ; i < addrinarray ; i + + )
2008-08-22 02:10:13 +04:00
free_memtype ( __pa ( addr [ i ] ) , __pa ( addr [ i ] ) + PAGE_SIZE ) ;
2008-08-21 06:46:06 +04:00
return change_page_attr_clear ( addr , addrinarray ,
__pgprot ( _PAGE_CACHE_MASK ) , 1 ) ;
}
EXPORT_SYMBOL ( set_memory_array_wb ) ;
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
int set_memory_x ( unsigned long addr , int numpages )
{
2008-08-21 06:46:06 +04:00
return change_page_attr_clear ( & addr , numpages , __pgprot ( _PAGE_NX ) , 0 ) ;
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
}
EXPORT_SYMBOL ( set_memory_x ) ;
int set_memory_nx ( unsigned long addr , int numpages )
{
2008-08-21 06:46:06 +04:00
return change_page_attr_set ( & addr , numpages , __pgprot ( _PAGE_NX ) , 0 ) ;
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
}
EXPORT_SYMBOL ( set_memory_nx ) ;
int set_memory_ro ( unsigned long addr , int numpages )
{
2008-08-21 06:46:06 +04:00
return change_page_attr_clear ( & addr , numpages , __pgprot ( _PAGE_RW ) , 0 ) ;
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
}
int set_memory_rw ( unsigned long addr , int numpages )
{
2008-08-21 06:46:06 +04:00
return change_page_attr_set ( & addr , numpages , __pgprot ( _PAGE_RW ) , 0 ) ;
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
}
2008-01-30 15:34:07 +03:00
int set_memory_np ( unsigned long addr , int numpages )
{
2008-08-21 06:46:06 +04:00
return change_page_attr_clear ( & addr , numpages , __pgprot ( _PAGE_PRESENT ) , 0 ) ;
2008-01-30 15:34:07 +03:00
}
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
2008-03-12 05:53:29 +03:00
int set_memory_4k ( unsigned long addr , int numpages )
{
2008-08-21 06:46:06 +04:00
return change_page_attr_set_clr ( & addr , numpages , __pgprot ( 0 ) ,
__pgprot ( 0 ) , 1 , 0 ) ;
2008-03-12 05:53:29 +03:00
}
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
int set_pages_uc ( struct page * page , int numpages )
{
unsigned long addr = ( unsigned long ) page_address ( page ) ;
2008-01-30 15:34:07 +03:00
return set_memory_uc ( addr , numpages ) ;
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
}
EXPORT_SYMBOL ( set_pages_uc ) ;
int set_pages_wb ( struct page * page , int numpages )
{
unsigned long addr = ( unsigned long ) page_address ( page ) ;
2008-01-30 15:34:07 +03:00
return set_memory_wb ( addr , numpages ) ;
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
}
EXPORT_SYMBOL ( set_pages_wb ) ;
int set_pages_x ( struct page * page , int numpages )
{
unsigned long addr = ( unsigned long ) page_address ( page ) ;
2008-01-30 15:34:07 +03:00
return set_memory_x ( addr , numpages ) ;
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
}
EXPORT_SYMBOL ( set_pages_x ) ;
int set_pages_nx ( struct page * page , int numpages )
{
unsigned long addr = ( unsigned long ) page_address ( page ) ;
2008-01-30 15:34:07 +03:00
return set_memory_nx ( addr , numpages ) ;
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
}
EXPORT_SYMBOL ( set_pages_nx ) ;
int set_pages_ro ( struct page * page , int numpages )
{
unsigned long addr = ( unsigned long ) page_address ( page ) ;
2008-01-30 15:34:07 +03:00
return set_memory_ro ( addr , numpages ) ;
x86: a new API for drivers/etc to control cache and other page attributes
Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.
This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-01-30 15:34:06 +03:00
}
int set_pages_rw ( struct page * page , int numpages )
{
unsigned long addr = ( unsigned long ) page_address ( page ) ;
2008-01-30 15:34:06 +03:00
2008-01-30 15:34:07 +03:00
return set_memory_rw ( addr , numpages ) ;
2008-01-30 15:33:55 +03:00
}
2005-04-17 02:20:36 +04:00
# ifdef CONFIG_DEBUG_PAGEALLOC
2008-01-30 15:34:07 +03:00
static int __set_pages_p ( struct page * page , int numpages )
{
2008-08-21 06:46:06 +04:00
unsigned long tempaddr = ( unsigned long ) page_address ( page ) ;
struct cpa_data cpa = { . vaddr = & tempaddr ,
2008-02-04 18:48:07 +03:00
. numpages = numpages ,
. mask_set = __pgprot ( _PAGE_PRESENT | _PAGE_RW ) ,
2008-08-21 06:46:06 +04:00
. mask_clr = __pgprot ( 0 ) ,
. flags = 0 } ;
2008-01-30 15:34:08 +03:00
2008-02-18 22:54:14 +03:00
return __change_page_attr_set_clr ( & cpa , 1 ) ;
2008-01-30 15:34:07 +03:00
}
static int __set_pages_np ( struct page * page , int numpages )
{
2008-08-21 06:46:06 +04:00
unsigned long tempaddr = ( unsigned long ) page_address ( page ) ;
struct cpa_data cpa = { . vaddr = & tempaddr ,
2008-02-04 18:48:07 +03:00
. numpages = numpages ,
. mask_set = __pgprot ( 0 ) ,
2008-08-21 06:46:06 +04:00
. mask_clr = __pgprot ( _PAGE_PRESENT | _PAGE_RW ) ,
. flags = 0 } ;
2008-01-30 15:34:08 +03:00
2008-02-18 22:54:14 +03:00
return __change_page_attr_set_clr ( & cpa , 1 ) ;
2008-01-30 15:34:07 +03:00
}
2005-04-17 02:20:36 +04:00
void kernel_map_pages ( struct page * page , int numpages , int enable )
{
if ( PageHighMem ( page ) )
return ;
2008-01-30 15:33:41 +03:00
if ( ! enable ) {
2006-06-27 13:54:49 +04:00
debug_check_no_locks_freed ( page_address ( page ) ,
numpages * PAGE_SIZE ) ;
2008-01-30 15:33:41 +03:00
}
2006-01-10 02:59:21 +03:00
2008-01-30 15:33:58 +03:00
/*
* If page allocator is not up yet then do not call c_p_a ( ) :
*/
if ( ! debug_pagealloc_enabled )
return ;
2008-01-30 15:33:41 +03:00
/*
2008-02-13 16:09:53 +03:00
* The return value is ignored as the calls cannot fail .
* Large pages are kept enabled at boot time , and are
* split up quickly with DEBUG_PAGEALLOC . If a splitup
* fails here ( due to temporary memory shortage ) no damage
* is done because we just keep the largepage intact up
* to the next attempt when it will likely be split up :
2005-04-17 02:20:36 +04:00
*/
2008-01-30 15:34:07 +03:00
if ( enable )
__set_pages_p ( page , numpages ) ;
else
__set_pages_np ( page , numpages ) ;
2008-01-30 15:33:41 +03:00
/*
2008-01-30 15:34:04 +03:00
* We should perform an IPI and flush all tlbs ,
* but that can deadlock - > flush only current cpu :
2005-04-17 02:20:36 +04:00
*/
__flush_tlb_all ( ) ;
2008-02-10 01:24:09 +03:00
/*
* Try to refill the page pool here . We can do this only after
* the tlb flush .
*/
2008-02-13 16:37:52 +03:00
cpa_fill_pool ( NULL ) ;
2005-04-17 02:20:36 +04:00
}
2008-02-20 03:47:44 +03:00
2008-04-17 19:40:45 +04:00
# ifdef CONFIG_DEBUG_FS
static int dpa_show ( struct seq_file * m , void * v )
{
seq_puts ( m , " DEBUG_PAGEALLOC \n " ) ;
seq_printf ( m , " pool_size : %lu \n " , pool_size ) ;
seq_printf ( m , " pool_pages : %lu \n " , pool_pages ) ;
seq_printf ( m , " pool_low : %lu \n " , pool_low ) ;
seq_printf ( m , " pool_used : %lu \n " , pool_used ) ;
seq_printf ( m , " pool_failed : %lu \n " , pool_failed ) ;
return 0 ;
}
static int dpa_open ( struct inode * inode , struct file * filp )
{
return single_open ( filp , dpa_show , NULL ) ;
}
static const struct file_operations dpa_fops = {
. open = dpa_open ,
. read = seq_read ,
. llseek = seq_lseek ,
. release = single_release ,
} ;
2008-04-23 15:20:56 +04:00
static int __init debug_pagealloc_proc_init ( void )
2008-04-17 19:40:45 +04:00
{
struct dentry * de ;
de = debugfs_create_file ( " debug_pagealloc " , 0600 , NULL , NULL ,
& dpa_fops ) ;
if ( ! de )
return - ENOMEM ;
return 0 ;
}
__initcall ( debug_pagealloc_proc_init ) ;
# endif
2008-02-20 03:47:44 +03:00
# ifdef CONFIG_HIBERNATION
bool kernel_page_present ( struct page * page )
{
unsigned int level ;
pte_t * pte ;
if ( PageHighMem ( page ) )
return false ;
pte = lookup_address ( ( unsigned long ) page_address ( page ) , & level ) ;
return ( pte_val ( * pte ) & _PAGE_PRESENT ) ;
}
# endif /* CONFIG_HIBERNATION */
# endif /* CONFIG_DEBUG_PAGEALLOC */
2008-01-30 15:34:07 +03:00
/*
* The testcases use internal knowledge of the implementation that shouldn ' t
* be exposed to the rest of the kernel . Include these directly here .
*/
# ifdef CONFIG_CPA_DEBUG
# include "pageattr-test.c"
# endif