2005-04-16 15:20:36 -07:00
/* pgalloc.c: page directory & page table allocation
*
* Copyright ( C ) 2004 Red Hat , Inc . All Rights Reserved .
* Written by David Howells ( dhowells @ redhat . com )
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
# include <linux/sched.h>
# include <linux/slab.h>
# include <linux/mm.h>
# include <linux/highmem.h>
2007-05-09 02:32:48 -07:00
# include <linux/quicklist.h>
2005-04-16 15:20:36 -07:00
# include <asm/pgalloc.h>
# include <asm/page.h>
# include <asm/cacheflush.h>
pgd_t swapper_pg_dir [ PTRS_PER_PGD ] __attribute__ ( ( aligned ( PAGE_SIZE ) ) ) ;
pte_t * pte_alloc_one_kernel ( struct mm_struct * mm , unsigned long address )
{
pte_t * pte = ( pte_t * ) __get_free_page ( GFP_KERNEL | __GFP_REPEAT ) ;
if ( pte )
clear_page ( pte ) ;
return pte ;
}
2008-02-08 04:22:04 -08:00
pgtable_t pte_alloc_one ( struct mm_struct * mm , unsigned long address )
2005-04-16 15:20:36 -07:00
{
struct page * page ;
# ifdef CONFIG_HIGHPTE
page = alloc_pages ( GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT , 0 ) ;
# else
page = alloc_pages ( GFP_KERNEL | __GFP_REPEAT , 0 ) ;
# endif
2008-02-08 04:22:04 -08:00
if ( page ) {
2005-04-16 15:20:36 -07:00
clear_highpage ( page ) ;
2008-02-08 04:22:04 -08:00
pgtable_page_ctor ( page ) ;
flush_dcache_page ( page ) ;
}
2005-04-16 15:20:36 -07:00
return page ;
}
void __set_pmd ( pmd_t * pmdptr , unsigned long pmd )
{
unsigned long * __ste_p = pmdptr - > ste ;
int loop ;
if ( ! pmd ) {
memset ( __ste_p , 0 , PME_SIZE ) ;
}
else {
BUG_ON ( pmd & ( 0x3f00 | xAMPRx_SS | 0xe ) ) ;
for ( loop = PME_SIZE ; loop > 0 ; loop - = 4 ) {
* __ste_p + + = pmd ;
pmd + = __frv_PT_SIZE ;
}
}
frv_dcache_writeback ( ( unsigned long ) pmdptr , ( unsigned long ) ( pmdptr + 1 ) ) ;
}
/*
* List of all pgd ' s needed for non - PAE so it can invalidate entries
* in both cached and uncached pgd ' s ; not needed for PAE since the
* kernel pmd is shared . If PAE were not to share the pmd a similar
* tactic would be needed . This is essentially codepath - based locking
* against pageattr . c ; it is the unique case in which a valid change
* of kernel pagetables can ' t be lazily synchronized by vmalloc faults .
* vmalloc faults work because attached pagetables are never freed .
* If the locking proves to be non - performant , a ticketing scheme with
* checks at dup_mmap ( ) , exec ( ) , and other mmlist addition points
* could be used . The locking scheme was chosen on the basis of
* manfred ' s recommendations and having no core impact whatsoever .
* - - wli
*/
DEFINE_SPINLOCK ( pgd_lock ) ;
struct page * pgd_list ;
static inline void pgd_list_add ( pgd_t * pgd )
{
struct page * page = virt_to_page ( pgd ) ;
page - > index = ( unsigned long ) pgd_list ;
if ( pgd_list )
2005-11-28 13:43:51 -08:00
set_page_private ( pgd_list , ( unsigned long ) & page - > index ) ;
2005-04-16 15:20:36 -07:00
pgd_list = page ;
[PATCH] mm: split page table lock
Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with
a many-threaded application which concurrently initializes different parts of
a large anonymous area.
This patch corrects that, by using a separate spinlock per page table page, to
guard the page table entries in that page, instead of using the mm's single
page_table_lock. (But even then, page_table_lock is still used to guard page
table allocation, and anon_vma allocation.)
In this implementation, the spinlock is tucked inside the struct page of the
page table page: with a BUILD_BUG_ON in case it overflows - which it would in
the case of 32-bit PA-RISC with spinlock debugging enabled.
Splitting the lock is not quite for free: another cacheline access. Ideally,
I suppose we would use split ptlock only for multi-threaded processes on
multi-cpu machines; but deciding that dynamically would have its own costs.
So for now enable it by config, at some number of cpus - since the Kconfig
language doesn't support inequalities, let preprocessor compare that with
NR_CPUS. But I don't think it's worth being user-configurable: for good
testing of both split and unsplit configs, split now at 4 cpus, and perhaps
change that to 8 later.
There is a benefit even for singly threaded processes: kswapd can be attacking
one part of the mm while another part is busy faulting.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-10-29 18:16:40 -07:00
set_page_private ( page , ( unsigned long ) & pgd_list ) ;
2005-04-16 15:20:36 -07:00
}
static inline void pgd_list_del ( pgd_t * pgd )
{
struct page * next , * * pprev , * page = virt_to_page ( pgd ) ;
next = ( struct page * ) page - > index ;
2005-11-28 13:43:51 -08:00
pprev = ( struct page * * ) page_private ( page ) ;
2005-04-16 15:20:36 -07:00
* pprev = next ;
if ( next )
2005-11-28 13:43:51 -08:00
set_page_private ( next , ( unsigned long ) pprev ) ;
2005-04-16 15:20:36 -07:00
}
2007-05-09 02:32:48 -07:00
void pgd_ctor ( void * pgd )
2005-04-16 15:20:36 -07:00
{
unsigned long flags ;
if ( PTRS_PER_PMD = = 1 )
spin_lock_irqsave ( & pgd_lock , flags ) ;
memcpy ( ( pgd_t * ) pgd + USER_PGDS_IN_LAST_PML4 ,
swapper_pg_dir + USER_PGDS_IN_LAST_PML4 ,
( PTRS_PER_PGD - USER_PGDS_IN_LAST_PML4 ) * sizeof ( pgd_t ) ) ;
if ( PTRS_PER_PMD > 1 )
return ;
pgd_list_add ( pgd ) ;
spin_unlock_irqrestore ( & pgd_lock , flags ) ;
memset ( pgd , 0 , USER_PGDS_IN_LAST_PML4 * sizeof ( pgd_t ) ) ;
}
/* never called when PTRS_PER_PMD > 1 */
2007-05-09 02:32:48 -07:00
void pgd_dtor ( void * pgd )
2005-04-16 15:20:36 -07:00
{
unsigned long flags ; /* can be called from interrupt context */
spin_lock_irqsave ( & pgd_lock , flags ) ;
pgd_list_del ( pgd ) ;
spin_unlock_irqrestore ( & pgd_lock , flags ) ;
}
pgd_t * pgd_alloc ( struct mm_struct * mm )
{
pgd_t * pgd ;
2007-05-09 02:32:48 -07:00
pgd = quicklist_alloc ( 0 , GFP_KERNEL , pgd_ctor ) ;
2005-04-16 15:20:36 -07:00
if ( ! pgd )
return pgd ;
return pgd ;
}
2008-02-04 22:29:14 -08:00
void pgd_free ( struct mm_struct * mm , pgd_t * pgd )
2005-04-16 15:20:36 -07:00
{
/* in the non-PAE case, clear_page_tables() clears user pgd entries */
2007-05-09 02:32:48 -07:00
quicklist_free ( 0 , pgd_dtor , pgd ) ;
2005-04-16 15:20:36 -07:00
}
void __init pgtable_cache_init ( void )
{
}
2007-05-09 02:32:48 -07:00
void check_pgt_cache ( void )
{
quicklist_trim ( 0 , pgd_dtor , 25 , 16 ) ;
}