2007-10-22 12:52:47 +02:00
/*
2009-06-12 10:26:33 +02:00
* Copyright IBM Corp . 2007 , 2009
2007-10-22 12:52:47 +02:00
* Author ( s ) : Martin Schwidefsky < schwidefsky @ de . ibm . com >
*/
# include <linux/sched.h>
# include <linux/kernel.h>
# include <linux/errno.h>
# include <linux/mm.h>
# include <linux/swap.h>
# include <linux/smp.h>
# include <linux/highmem.h>
# include <linux/slab.h>
# include <linux/pagemap.h>
# include <linux/spinlock.h>
# include <linux/module.h>
# include <linux/quicklist.h>
# include <asm/system.h>
# include <asm/pgtable.h>
# include <asm/pgalloc.h>
# include <asm/tlb.h>
# include <asm/tlbflush.h>
2008-02-09 18:24:37 +01:00
# include <asm/mmu_context.h>
2007-10-22 12:52:47 +02:00
# ifndef CONFIG_64BIT
# define ALLOC_ORDER 1
2008-02-09 18:24:35 +01:00
# define TABLES_PER_PAGE 4
# define FRAG_MASK 15UL
# define SECOND_HALVES 10UL
2008-03-25 18:47:10 +01:00
void clear_table_pgstes ( unsigned long * table )
{
clear_table ( table , _PAGE_TYPE_EMPTY , PAGE_SIZE / 4 ) ;
memset ( table + 256 , 0 , PAGE_SIZE / 4 ) ;
clear_table ( table + 512 , _PAGE_TYPE_EMPTY , PAGE_SIZE / 4 ) ;
memset ( table + 768 , 0 , PAGE_SIZE / 4 ) ;
}
2007-10-22 12:52:47 +02:00
# else
# define ALLOC_ORDER 2
2008-02-09 18:24:35 +01:00
# define TABLES_PER_PAGE 2
# define FRAG_MASK 3UL
# define SECOND_HALVES 2UL
2008-03-25 18:47:10 +01:00
void clear_table_pgstes ( unsigned long * table )
{
clear_table ( table , _PAGE_TYPE_EMPTY , PAGE_SIZE / 2 ) ;
memset ( table + 256 , 0 , PAGE_SIZE / 2 ) ;
}
2007-10-22 12:52:47 +02:00
# endif
2009-06-12 10:26:33 +02:00
unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE ;
EXPORT_SYMBOL ( VMALLOC_START ) ;
static int __init parse_vmalloc ( char * arg )
{
if ( ! arg )
return - EINVAL ;
VMALLOC_START = ( VMALLOC_END - memparse ( arg , & arg ) ) & PAGE_MASK ;
return 0 ;
}
early_param ( " vmalloc " , parse_vmalloc ) ;
2007-10-22 12:52:47 +02:00
unsigned long * crst_table_alloc ( struct mm_struct * mm , int noexec )
{
struct page * page = alloc_pages ( GFP_KERNEL , ALLOC_ORDER ) ;
if ( ! page )
return NULL ;
page - > index = 0 ;
if ( noexec ) {
struct page * shadow = alloc_pages ( GFP_KERNEL , ALLOC_ORDER ) ;
if ( ! shadow ) {
__free_pages ( page , ALLOC_ORDER ) ;
return NULL ;
}
page - > index = page_to_phys ( shadow ) ;
}
2008-02-09 18:24:35 +01:00
spin_lock ( & mm - > page_table_lock ) ;
list_add ( & page - > lru , & mm - > context . crst_list ) ;
spin_unlock ( & mm - > page_table_lock ) ;
2007-10-22 12:52:47 +02:00
return ( unsigned long * ) page_to_phys ( page ) ;
}
2008-02-09 18:24:35 +01:00
void crst_table_free ( struct mm_struct * mm , unsigned long * table )
2007-10-22 12:52:47 +02:00
{
unsigned long * shadow = get_shadow_table ( table ) ;
2008-02-09 18:24:35 +01:00
struct page * page = virt_to_page ( table ) ;
2007-10-22 12:52:47 +02:00
2008-02-09 18:24:35 +01:00
spin_lock ( & mm - > page_table_lock ) ;
list_del ( & page - > lru ) ;
spin_unlock ( & mm - > page_table_lock ) ;
2007-10-22 12:52:47 +02:00
if ( shadow )
free_pages ( ( unsigned long ) shadow , ALLOC_ORDER ) ;
free_pages ( ( unsigned long ) table , ALLOC_ORDER ) ;
}
2008-02-09 18:24:37 +01:00
# ifdef CONFIG_64BIT
int crst_table_upgrade ( struct mm_struct * mm , unsigned long limit )
{
unsigned long * table , * pgd ;
unsigned long entry ;
BUG_ON ( limit > ( 1UL < < 53 ) ) ;
repeat :
table = crst_table_alloc ( mm , mm - > context . noexec ) ;
if ( ! table )
return - ENOMEM ;
spin_lock ( & mm - > page_table_lock ) ;
if ( mm - > context . asce_limit < limit ) {
pgd = ( unsigned long * ) mm - > pgd ;
if ( mm - > context . asce_limit < = ( 1UL < < 31 ) ) {
entry = _REGION3_ENTRY_EMPTY ;
mm - > context . asce_limit = 1UL < < 42 ;
mm - > context . asce_bits = _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS |
_ASCE_TYPE_REGION3 ;
} else {
entry = _REGION2_ENTRY_EMPTY ;
mm - > context . asce_limit = 1UL < < 53 ;
mm - > context . asce_bits = _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS |
_ASCE_TYPE_REGION2 ;
}
crst_table_init ( table , entry ) ;
pgd_populate ( mm , ( pgd_t * ) table , ( pud_t * ) pgd ) ;
mm - > pgd = ( pgd_t * ) table ;
2009-03-18 13:27:36 +01:00
mm - > task_size = mm - > context . asce_limit ;
2008-02-09 18:24:37 +01:00
table = NULL ;
}
spin_unlock ( & mm - > page_table_lock ) ;
if ( table )
crst_table_free ( mm , table ) ;
if ( mm - > context . asce_limit < limit )
goto repeat ;
update_mm ( mm , current ) ;
return 0 ;
}
void crst_table_downgrade ( struct mm_struct * mm , unsigned long limit )
{
pgd_t * pgd ;
if ( mm - > context . asce_limit < = limit )
return ;
__tlb_flush_mm ( mm ) ;
while ( mm - > context . asce_limit > limit ) {
pgd = mm - > pgd ;
switch ( pgd_val ( * pgd ) & _REGION_ENTRY_TYPE_MASK ) {
case _REGION_ENTRY_TYPE_R2 :
mm - > context . asce_limit = 1UL < < 42 ;
mm - > context . asce_bits = _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS |
_ASCE_TYPE_REGION3 ;
break ;
case _REGION_ENTRY_TYPE_R3 :
mm - > context . asce_limit = 1UL < < 31 ;
mm - > context . asce_bits = _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS |
_ASCE_TYPE_SEGMENT ;
break ;
default :
BUG ( ) ;
}
mm - > pgd = ( pgd_t * ) ( pgd_val ( * pgd ) & _REGION_ENTRY_ORIGIN ) ;
2009-03-18 13:27:36 +01:00
mm - > task_size = mm - > context . asce_limit ;
2008-02-09 18:24:37 +01:00
crst_table_free ( mm , ( unsigned long * ) pgd ) ;
}
update_mm ( mm , current ) ;
}
# endif
2007-10-22 12:52:47 +02:00
/*
* page table entry allocation / free routines .
*/
2008-02-09 18:24:35 +01:00
unsigned long * page_table_alloc ( struct mm_struct * mm )
2007-10-22 12:52:47 +02:00
{
2008-02-09 18:24:35 +01:00
struct page * page ;
2007-10-22 12:52:47 +02:00
unsigned long * table ;
2008-02-09 18:24:35 +01:00
unsigned long bits ;
2007-10-22 12:52:47 +02:00
2008-10-28 11:10:15 +01:00
bits = ( mm - > context . noexec | | mm - > context . has_pgste ) ? 3UL : 1UL ;
2008-02-09 18:24:35 +01:00
spin_lock ( & mm - > page_table_lock ) ;
page = NULL ;
if ( ! list_empty ( & mm - > context . pgtable_list ) ) {
page = list_first_entry ( & mm - > context . pgtable_list ,
struct page , lru ) ;
if ( ( page - > flags & FRAG_MASK ) = = ( ( 1UL < < TABLES_PER_PAGE ) - 1 ) )
page = NULL ;
}
if ( ! page ) {
spin_unlock ( & mm - > page_table_lock ) ;
page = alloc_page ( GFP_KERNEL | __GFP_REPEAT ) ;
if ( ! page )
2007-10-22 12:52:47 +02:00
return NULL ;
2008-02-09 18:24:35 +01:00
pgtable_page_ctor ( page ) ;
page - > flags & = ~ FRAG_MASK ;
table = ( unsigned long * ) page_to_phys ( page ) ;
2008-10-28 11:10:15 +01:00
if ( mm - > context . has_pgste )
2008-03-25 18:47:10 +01:00
clear_table_pgstes ( table ) ;
else
clear_table ( table , _PAGE_TYPE_EMPTY , PAGE_SIZE ) ;
2008-02-09 18:24:35 +01:00
spin_lock ( & mm - > page_table_lock ) ;
list_add ( & page - > lru , & mm - > context . pgtable_list ) ;
2007-10-22 12:52:47 +02:00
}
table = ( unsigned long * ) page_to_phys ( page ) ;
2008-02-09 18:24:35 +01:00
while ( page - > flags & bits ) {
table + = 256 ;
bits < < = 1 ;
}
page - > flags | = bits ;
if ( ( page - > flags & FRAG_MASK ) = = ( ( 1UL < < TABLES_PER_PAGE ) - 1 ) )
list_move_tail ( & page - > lru , & mm - > context . pgtable_list ) ;
spin_unlock ( & mm - > page_table_lock ) ;
2007-10-22 12:52:47 +02:00
return table ;
}
2008-02-09 18:24:35 +01:00
void page_table_free ( struct mm_struct * mm , unsigned long * table )
2007-10-22 12:52:47 +02:00
{
2008-02-09 18:24:35 +01:00
struct page * page ;
unsigned long bits ;
2007-10-22 12:52:47 +02:00
2008-10-28 11:10:15 +01:00
bits = ( mm - > context . noexec | | mm - > context . has_pgste ) ? 3UL : 1UL ;
2008-02-09 18:24:35 +01:00
bits < < = ( __pa ( table ) & ( PAGE_SIZE - 1 ) ) / 256 / sizeof ( unsigned long ) ;
page = pfn_to_page ( __pa ( table ) > > PAGE_SHIFT ) ;
spin_lock ( & mm - > page_table_lock ) ;
page - > flags ^ = bits ;
if ( page - > flags & FRAG_MASK ) {
/* Page now has some free pgtable fragments. */
list_move ( & page - > lru , & mm - > context . pgtable_list ) ;
page = NULL ;
} else
/* All fragments of the 4K page have been freed. */
list_del ( & page - > lru ) ;
spin_unlock ( & mm - > page_table_lock ) ;
if ( page ) {
pgtable_page_dtor ( page ) ;
__free_page ( page ) ;
}
}
2007-10-22 12:52:47 +02:00
2008-02-09 18:24:35 +01:00
void disable_noexec ( struct mm_struct * mm , struct task_struct * tsk )
{
struct page * page ;
spin_lock ( & mm - > page_table_lock ) ;
/* Free shadow region and segment tables. */
list_for_each_entry ( page , & mm - > context . crst_list , lru )
if ( page - > index ) {
free_pages ( ( unsigned long ) page - > index , ALLOC_ORDER ) ;
page - > index = 0 ;
}
/* "Free" second halves of page tables. */
list_for_each_entry ( page , & mm - > context . pgtable_list , lru )
page - > flags & = ~ SECOND_HALVES ;
spin_unlock ( & mm - > page_table_lock ) ;
mm - > context . noexec = 0 ;
update_mm ( mm , tsk ) ;
2007-10-22 12:52:47 +02:00
}
2008-03-25 18:47:10 +01:00
/*
* switch on pgstes for its userspace process ( for kvm )
*/
int s390_enable_sie ( void )
{
struct task_struct * tsk = current ;
2008-05-21 13:37:29 +02:00
struct mm_struct * mm , * old_mm ;
2008-03-25 18:47:10 +01:00
2009-03-26 15:23:57 +01:00
/* Do we have switched amode? If no, we cannot do sie */
if ( ! switch_amode )
return - EINVAL ;
2008-05-21 13:37:29 +02:00
/* Do we have pgstes? if yes, we are done */
2008-10-28 11:10:15 +01:00
if ( tsk - > mm - > context . has_pgste )
2008-05-21 13:37:29 +02:00
return 0 ;
2008-03-25 18:47:10 +01:00
2008-05-21 13:37:29 +02:00
/* lets check if we are allowed to replace the mm */
task_lock ( tsk ) ;
2008-03-25 18:47:10 +01:00
if ( ! tsk - > mm | | atomic_read ( & tsk - > mm - > mm_users ) > 1 | |
2008-12-09 08:11:22 +01:00
tsk - > mm ! = tsk - > active_mm | | ! hlist_empty ( & tsk - > mm - > ioctx_list ) ) {
2008-05-21 13:37:29 +02:00
task_unlock ( tsk ) ;
return - EINVAL ;
}
task_unlock ( tsk ) ;
2008-03-25 18:47:10 +01:00
2008-10-28 11:10:15 +01:00
/* we copy the mm and let dup_mm create the page tables with_pgstes */
tsk - > mm - > context . alloc_pgste = 1 ;
2008-03-25 18:47:10 +01:00
mm = dup_mm ( tsk ) ;
2008-10-28 11:10:15 +01:00
tsk - > mm - > context . alloc_pgste = 0 ;
2008-03-25 18:47:10 +01:00
if ( ! mm )
2008-05-21 13:37:29 +02:00
return - ENOMEM ;
2008-10-28 11:10:15 +01:00
/* Now lets check again if something happened */
2008-05-21 13:37:29 +02:00
task_lock ( tsk ) ;
if ( ! tsk - > mm | | atomic_read ( & tsk - > mm - > mm_users ) > 1 | |
2008-12-09 08:11:22 +01:00
tsk - > mm ! = tsk - > active_mm | | ! hlist_empty ( & tsk - > mm - > ioctx_list ) ) {
2008-05-21 13:37:29 +02:00
mmput ( mm ) ;
task_unlock ( tsk ) ;
return - EINVAL ;
}
/* ok, we are alone. No ptrace, no threads, etc. */
old_mm = tsk - > mm ;
2008-03-25 18:47:10 +01:00
tsk - > mm = tsk - > active_mm = mm ;
preempt_disable ( ) ;
update_mm ( mm , tsk ) ;
2009-03-26 15:25:01 +01:00
cpumask_set_cpu ( smp_processor_id ( ) , mm_cpumask ( mm ) ) ;
2008-03-25 18:47:10 +01:00
preempt_enable ( ) ;
task_unlock ( tsk ) ;
2008-05-21 13:37:29 +02:00
mmput ( old_mm ) ;
return 0 ;
2008-03-25 18:47:10 +01:00
}
EXPORT_SYMBOL_GPL ( s390_enable_sie ) ;