2005-09-26 10:04:21 +04:00
/*
* PowerPC version
* Copyright ( C ) 1995 - 1996 Gary Thomas ( gdt @ linuxppc . org )
*
* Derived from " arch/i386/mm/fault.c "
* Copyright ( C ) 1991 , 1992 , 1993 , 1994 Linus Torvalds
*
* Modified by Cort Dougan and Paul Mackerras .
*
* Modified for PPC64 by Dave Engebretsen ( engebret @ ibm . com )
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
# include <linux/config.h>
# include <linux/signal.h>
# include <linux/sched.h>
# include <linux/kernel.h>
# include <linux/errno.h>
# include <linux/string.h>
# include <linux/types.h>
# include <linux/ptrace.h>
# include <linux/mman.h>
# include <linux/mm.h>
# include <linux/interrupt.h>
# include <linux/highmem.h>
# include <linux/module.h>
# include <linux/kprobes.h>
# include <asm/page.h>
# include <asm/pgtable.h>
# include <asm/mmu.h>
# include <asm/mmu_context.h>
# include <asm/system.h>
# include <asm/uaccess.h>
# include <asm/tlbflush.h>
# include <asm/kdebug.h>
# include <asm/siginfo.h>
/*
* Check whether the instruction at regs - > nip is a store using
* an update addressing form which will update r1 .
*/
static int store_updates_sp ( struct pt_regs * regs )
{
unsigned int inst ;
if ( get_user ( inst , ( unsigned int __user * ) regs - > nip ) )
return 0 ;
/* check for 1 in the rA field */
if ( ( ( inst > > 16 ) & 0x1f ) ! = 1 )
return 0 ;
/* check major opcode */
switch ( inst > > 26 ) {
case 37 : /* stwu */
case 39 : /* stbu */
case 45 : /* sthu */
case 53 : /* stfsu */
case 55 : /* stfdu */
return 1 ;
case 62 : /* std or stdu */
return ( inst & 3 ) = = 1 ;
case 31 :
/* check minor opcode */
switch ( ( inst > > 1 ) & 0x3ff ) {
case 181 : /* stdux */
case 183 : /* stwux */
case 247 : /* stbux */
case 439 : /* sthux */
case 695 : /* stfsux */
case 759 : /* stfdux */
return 1 ;
}
}
return 0 ;
}
2005-10-26 18:55:41 +04:00
# if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
2006-01-09 07:47:04 +03:00
static void do_dabr ( struct pt_regs * regs , unsigned long address ,
unsigned long error_code )
2005-09-26 10:04:21 +04:00
{
siginfo_t info ;
if ( notify_die ( DIE_DABR_MATCH , " dabr_match " , regs , error_code ,
11 , SIGSEGV ) = = NOTIFY_STOP )
return ;
if ( debugger_dabr_match ( regs ) )
return ;
/* Clear the DABR */
set_dabr ( 0 ) ;
/* Deliver the signal to userspace */
info . si_signo = SIGTRAP ;
info . si_errno = 0 ;
info . si_code = TRAP_HWBKPT ;
2006-01-09 07:47:04 +03:00
info . si_addr = ( void __user * ) address ;
2005-09-26 10:04:21 +04:00
force_sig_info ( SIGTRAP , & info , current ) ;
}
2005-10-26 18:55:41 +04:00
# endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
2005-09-26 10:04:21 +04:00
/*
* For 600 - and 800 - family processors , the error_code parameter is DSISR
* for a data fault , SRR1 for an instruction fault . For 400 - family processors
* the error_code parameter is ESR for a data fault , 0 for an instruction
* fault .
* For 64 - bit processors , the error_code parameter is
* - DSISR for a non - SLB data access fault ,
* - SRR1 & 0x08000000 for a non - SLB instruction access fault
* - 0 any SLB fault .
*
* The return value is 0 if the fault was handled , or the signal
* number if this is a kernel fault that can ' t be handled here .
*/
int __kprobes do_page_fault ( struct pt_regs * regs , unsigned long address ,
unsigned long error_code )
{
struct vm_area_struct * vma ;
struct mm_struct * mm = current - > mm ;
siginfo_t info ;
int code = SEGV_MAPERR ;
int is_write = 0 ;
int trap = TRAP ( regs ) ;
int is_exec = trap = = 0x400 ;
# if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
/*
* Fortunately the bit assignments in SRR1 for an instruction
* fault and DSISR for a data fault are mostly the same for the
* bits we are interested in . But there are some bits which
* indicate errors in DSISR but can validly be set in SRR1 .
*/
if ( trap = = 0x400 )
error_code & = 0x48200000 ;
else
is_write = error_code & DSISR_ISSTORE ;
# else
is_write = error_code & ESR_DST ;
# endif /* CONFIG_4xx || CONFIG_BOOKE */
if ( notify_die ( DIE_PAGE_FAULT , " page_fault " , regs , error_code ,
11 , SIGSEGV ) = = NOTIFY_STOP )
return 0 ;
if ( trap = = 0x300 ) {
if ( debugger_fault_handler ( regs ) )
return 0 ;
}
/* On a kernel SLB miss we can only check for a valid exception entry */
if ( ! user_mode ( regs ) & & ( address > = TASK_SIZE ) )
return SIGSEGV ;
# if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
if ( error_code & DSISR_DABRMATCH ) {
/* DABR match */
2006-01-09 07:47:04 +03:00
do_dabr ( regs , address , error_code ) ;
2005-09-26 10:04:21 +04:00
return 0 ;
}
# endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
if ( in_atomic ( ) | | mm = = NULL ) {
if ( ! user_mode ( regs ) )
return SIGSEGV ;
/* in_atomic() in user mode is really bad,
as is current - > mm = = NULL . */
printk ( KERN_EMERG " Page fault in user mode with "
" in_atomic() = %d mm = %p \n " , in_atomic ( ) , mm ) ;
printk ( KERN_EMERG " NIP = %lx MSR = %lx \n " ,
regs - > nip , regs - > msr ) ;
die ( " Weird page fault " , regs , SIGSEGV ) ;
}
/* When running in the kernel we expect faults to occur only to
* addresses in user space . All other faults represent errors in the
* kernel and should generate an OOPS . Unfortunatly , in the case of an
* erroneous fault occuring in a code path which already holds mmap_sem
* we will deadlock attempting to validate the fault against the
* address space . Luckily the kernel only validly references user
* space from well defined areas of code , which are listed in the
* exceptions table .
*
* As the vast majority of faults will be valid we will only perform
* the source reference check when there is a possibilty of a deadlock .
* Attempt to lock the address space , if we cannot we then validate the
* source . If this is invalid we can skip the address space check ,
* thus avoiding the deadlock .
*/
if ( ! down_read_trylock ( & mm - > mmap_sem ) ) {
if ( ! user_mode ( regs ) & & ! search_exception_tables ( regs - > nip ) )
goto bad_area_nosemaphore ;
down_read ( & mm - > mmap_sem ) ;
}
vma = find_vma ( mm , address ) ;
if ( ! vma )
goto bad_area ;
if ( vma - > vm_start < = address )
goto good_area ;
if ( ! ( vma - > vm_flags & VM_GROWSDOWN ) )
goto bad_area ;
/*
* N . B . The POWER / Open ABI allows programs to access up to
* 288 bytes below the stack pointer .
* The kernel signal delivery code writes up to about 1.5 kB
* below the stack pointer ( r1 ) before decrementing it .
* The exec code can write slightly over 640 kB to the stack
* before setting the user r1 . Thus we allow the stack to
* expand to 1 MB without further checks .
*/
if ( address + 0x100000 < vma - > vm_end ) {
/* get user regs even if this fault is in kernel mode */
struct pt_regs * uregs = current - > thread . regs ;
if ( uregs = = NULL )
goto bad_area ;
/*
* A user - mode access to an address a long way below
* the stack pointer is only valid if the instruction
* is one which would update the stack pointer to the
* address accessed if the instruction completed ,
* i . e . either stwu rs , n ( r1 ) or stwux rs , r1 , rb
* ( or the byte , halfword , float or double forms ) .
*
* If we don ' t check this then any write to the area
* between the last mapped region and the stack will
* expand the stack rather than segfaulting .
*/
if ( address + 2048 < uregs - > gpr [ 1 ]
& & ( ! user_mode ( regs ) | | ! store_updates_sp ( regs ) ) )
goto bad_area ;
}
if ( expand_stack ( vma , address ) )
goto bad_area ;
good_area :
code = SEGV_ACCERR ;
# if defined(CONFIG_6xx)
if ( error_code & 0x95700000 )
/* an error such as lwarx to I/O controller space,
address matching DABR , eciwx , etc . */
goto bad_area ;
# endif /* CONFIG_6xx */
# if defined(CONFIG_8xx)
/* The MPC8xx seems to always set 0x80000000, which is
* " undefined " . Of those that can be set , this is the only
* one which seems bad .
*/
if ( error_code & 0x10000000 )
/* Guarded storage error. */
goto bad_area ;
# endif /* CONFIG_8xx */
if ( is_exec ) {
# ifdef CONFIG_PPC64
/* protection fault */
if ( error_code & DSISR_PROTFAULT )
goto bad_area ;
if ( ! ( vma - > vm_flags & VM_EXEC ) )
goto bad_area ;
# endif
# if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
pte_t * ptep ;
/* Since 4xx/Book-E supports per-page execute permission,
* we lazily flush dcache to icache . */
ptep = NULL ;
if ( get_pteptr ( mm , address , & ptep ) & & pte_present ( * ptep ) ) {
struct page * page = pte_page ( * ptep ) ;
if ( ! test_bit ( PG_arch_1 , & page - > flags ) ) {
flush_dcache_icache_page ( page ) ;
set_bit ( PG_arch_1 , & page - > flags ) ;
}
pte_update ( ptep , 0 , _PAGE_HWEXEC ) ;
_tlbie ( address ) ;
pte_unmap ( ptep ) ;
up_read ( & mm - > mmap_sem ) ;
return 0 ;
}
if ( ptep ! = NULL )
pte_unmap ( ptep ) ;
# endif
/* a write */
} else if ( is_write ) {
if ( ! ( vma - > vm_flags & VM_WRITE ) )
goto bad_area ;
/* a read */
} else {
/* protection fault */
if ( error_code & 0x08000000 )
goto bad_area ;
if ( ! ( vma - > vm_flags & ( VM_READ | VM_EXEC ) ) )
goto bad_area ;
}
/*
* If for any reason at all we couldn ' t handle the fault ,
* make sure we exit gracefully rather than endlessly redo
* the fault .
*/
survive :
switch ( handle_mm_fault ( mm , vma , address , is_write ) ) {
case VM_FAULT_MINOR :
current - > min_flt + + ;
break ;
case VM_FAULT_MAJOR :
current - > maj_flt + + ;
break ;
case VM_FAULT_SIGBUS :
goto do_sigbus ;
case VM_FAULT_OOM :
goto out_of_memory ;
default :
BUG ( ) ;
}
up_read ( & mm - > mmap_sem ) ;
return 0 ;
bad_area :
up_read ( & mm - > mmap_sem ) ;
bad_area_nosemaphore :
/* User mode accesses cause a SIGSEGV */
if ( user_mode ( regs ) ) {
_exception ( SIGSEGV , regs , code , address ) ;
return 0 ;
}
if ( is_exec & & ( error_code & DSISR_PROTFAULT )
& & printk_ratelimit ( ) )
printk ( KERN_CRIT " kernel tried to execute NX-protected "
" page (%lx) - exploit attempt? (uid: %d) \n " ,
address , current - > uid ) ;
return SIGSEGV ;
/*
* We ran out of memory , or some other thing happened to us that made
* us unable to handle the page fault gracefully .
*/
out_of_memory :
up_read ( & mm - > mmap_sem ) ;
if ( current - > pid = = 1 ) {
yield ( ) ;
down_read ( & mm - > mmap_sem ) ;
goto survive ;
}
printk ( " VM: killing process %s \n " , current - > comm ) ;
if ( user_mode ( regs ) )
do_exit ( SIGKILL ) ;
return SIGKILL ;
do_sigbus :
up_read ( & mm - > mmap_sem ) ;
if ( user_mode ( regs ) ) {
info . si_signo = SIGBUS ;
info . si_errno = 0 ;
info . si_code = BUS_ADRERR ;
info . si_addr = ( void __user * ) address ;
force_sig_info ( SIGBUS , & info , current ) ;
return 0 ;
}
return SIGBUS ;
}
/*
* bad_page_fault is called when we have a bad access from the kernel .
* It is called from the DSI and ISI handlers in head . S and from some
* of the procedures in traps . c .
*/
void bad_page_fault ( struct pt_regs * regs , unsigned long address , int sig )
{
const struct exception_table_entry * entry ;
/* Are we prepared to handle this fault? */
if ( ( entry = search_exception_tables ( regs - > nip ) ) ! = NULL ) {
regs - > nip = entry - > fixup ;
return ;
}
/* kernel has accessed a bad area */
2005-11-07 01:54:36 +03:00
printk ( KERN_ALERT " Unable to handle kernel paging request for " ) ;
switch ( regs - > trap ) {
case 0x300 :
case 0x380 :
printk ( " data at address 0x%08lx \n " , regs - > dar ) ;
break ;
case 0x400 :
case 0x480 :
printk ( " instruction fetch \n " ) ;
break ;
default :
printk ( " unknown fault \n " ) ;
}
printk ( KERN_ALERT " Faulting instruction address: 0x%08lx \n " ,
regs - > nip ) ;
2005-09-26 10:04:21 +04:00
die ( " Kernel access of bad area " , regs , sig ) ;
}