2005-04-17 02:20:36 +04:00
/*
2014-10-30 00:33:46 +03:00
* Copyright ( c ) 2012 - 2014 Andy Lutomirski < luto @ amacapital . net >
*
* Based on the original implementation which is :
2005-04-17 02:20:36 +04:00
* Copyright ( C ) 2001 Andrea Arcangeli < andrea @ suse . de > SuSE
* Copyright 2003 Andi Kleen , SuSE Labs .
*
2014-10-30 00:33:46 +03:00
* Parts of the original code have been moved to arch / x86 / vdso / vma . c
*
* This file implements vsyscall emulation . vsyscalls are a legacy ABI :
* Userspace can request certain kernel services by calling fixed
* addresses . This concept is problematic :
2011-06-05 21:50:24 +04:00
*
2014-10-30 00:33:46 +03:00
* - It interferes with ASLR .
* - It ' s awkward to write code that lives in kernel addresses but is
* callable by userspace at fixed addresses .
* - The whole concept is impossible for 32 - bit compat userspace .
* - UML cannot easily virtualize a vsyscall .
2005-04-17 02:20:36 +04:00
*
2014-10-30 00:33:46 +03:00
* As of mid - 2014 , I believe that there is no new userspace code that
* will use a vsyscall if the vDSO is present . I hope that there will
* soon be no new userspace code that will ever use a vsyscall .
2005-04-17 02:20:36 +04:00
*
2014-10-30 00:33:46 +03:00
* The code in this file emulates vsyscalls when notified of a page
* fault to a vsyscall address .
2005-04-17 02:20:36 +04:00
*/
# include <linux/kernel.h>
# include <linux/timer.h>
2017-02-08 20:51:30 +03:00
# include <linux/sched/signal.h>
2017-02-04 02:16:44 +03:00
# include <linux/mm_types.h>
2011-06-05 21:50:24 +04:00
# include <linux/syscalls.h>
# include <linux/ratelimit.h>
2005-04-17 02:20:36 +04:00
# include <asm/vsyscall.h>
2007-02-16 12:28:21 +03:00
# include <asm/unistd.h>
2005-04-17 02:20:36 +04:00
# include <asm/fixmap.h>
2011-06-05 21:50:24 +04:00
# include <asm/traps.h>
2005-04-17 02:20:36 +04:00
2011-08-03 17:31:54 +04:00
# define CREATE_TRACE_POINTS
# include "vsyscall_trace.h"
2015-08-13 03:55:19 +03:00
static enum { EMULATE , NATIVE , NONE } vsyscall_mode =
2015-09-21 10:48:29 +03:00
# if defined(CONFIG_LEGACY_VSYSCALL_NATIVE)
2015-08-13 03:55:19 +03:00
NATIVE ;
2015-09-21 10:48:29 +03:00
# elif defined(CONFIG_LEGACY_VSYSCALL_NONE)
2015-08-13 03:55:19 +03:00
NONE ;
# else
EMULATE ;
# endif
2011-08-10 19:15:32 +04:00
static int __init vsyscall_setup ( char * str )
{
if ( str ) {
if ( ! strcmp ( " emulate " , str ) )
vsyscall_mode = EMULATE ;
else if ( ! strcmp ( " native " , str ) )
vsyscall_mode = NATIVE ;
else if ( ! strcmp ( " none " , str ) )
vsyscall_mode = NONE ;
else
return - EINVAL ;
return 0 ;
}
return - EINVAL ;
}
early_param ( " vsyscall " , vsyscall_setup ) ;
2011-06-05 21:50:24 +04:00
static void warn_bad_vsyscall ( const char * level , struct pt_regs * regs ,
const char * message )
2005-04-17 02:20:36 +04:00
{
2012-05-22 06:50:07 +04:00
if ( ! show_unhandled_signals )
2011-06-05 21:50:24 +04:00
return ;
2005-04-17 02:20:36 +04:00
2014-07-26 03:30:27 +04:00
printk_ratelimited ( " %s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx \n " ,
level , current - > comm , task_pid_nr ( current ) ,
message , regs - > ip , regs - > cs ,
regs - > sp , regs - > ax , regs - > si , regs - > di ) ;
2011-07-13 17:24:09 +04:00
}
static int addr_to_vsyscall_nr ( unsigned long addr )
{
int nr ;
2014-05-05 23:19:36 +04:00
if ( ( addr & ~ 0xC00UL ) ! = VSYSCALL_ADDR )
2011-07-13 17:24:09 +04:00
return - EINVAL ;
nr = ( addr & 0xC00UL ) > > 10 ;
if ( nr > = 3 )
return - EINVAL ;
return nr ;
2005-04-17 02:20:36 +04:00
}
2011-11-08 04:33:40 +04:00
static bool write_ok_or_segv ( unsigned long ptr , size_t size )
{
/*
* XXX : if access_ok , get_user , and put_user handled
2016-07-15 11:21:11 +03:00
* sig_on_uaccess_err , this could go away .
2011-11-08 04:33:40 +04:00
*/
if ( ! access_ok ( VERIFY_WRITE , ( void __user * ) ptr , size ) ) {
siginfo_t info ;
struct thread_struct * thread = & current - > thread ;
thread - > error_code = 6 ; /* user fault, no page, write */
thread - > cr2 = ptr ;
2012-03-12 13:25:55 +04:00
thread - > trap_nr = X86_TRAP_PF ;
2011-11-08 04:33:40 +04:00
memset ( & info , 0 , sizeof ( info ) ) ;
info . si_signo = SIGSEGV ;
info . si_errno = 0 ;
info . si_code = SEGV_MAPERR ;
info . si_addr = ( void __user * ) ptr ;
force_sig_info ( SIGSEGV , & info , current ) ;
return false ;
} else {
return true ;
}
}
2011-08-10 19:15:32 +04:00
bool emulate_vsyscall ( struct pt_regs * regs , unsigned long address )
2005-04-17 02:20:36 +04:00
{
2011-06-05 21:50:24 +04:00
struct task_struct * tsk ;
unsigned long caller ;
2012-10-01 22:40:45 +04:00
int vsyscall_nr , syscall_nr , tmp ;
2016-07-15 11:21:11 +03:00
int prev_sig_on_uaccess_err ;
2011-06-05 21:50:24 +04:00
long ret ;
2011-08-10 19:15:32 +04:00
/*
* No point in checking CS - - the only way to get here is a user mode
* trap to a high address , which means that we ' re in 64 - bit user code .
*/
2011-06-05 21:50:24 +04:00
2011-08-10 19:15:32 +04:00
WARN_ON_ONCE ( address ! = regs - > ip ) ;
2011-07-13 17:24:09 +04:00
2011-08-10 19:15:32 +04:00
if ( vsyscall_mode = = NONE ) {
warn_bad_vsyscall ( KERN_INFO , regs ,
" vsyscall attempted with vsyscall=none " ) ;
return false ;
2011-07-13 17:24:09 +04:00
}
2011-08-10 19:15:32 +04:00
vsyscall_nr = addr_to_vsyscall_nr ( address ) ;
2011-08-03 17:31:54 +04:00
trace_emulate_vsyscall ( vsyscall_nr ) ;
2011-07-13 17:24:09 +04:00
if ( vsyscall_nr < 0 ) {
warn_bad_vsyscall ( KERN_WARNING , regs ,
2011-08-10 19:15:32 +04:00
" misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround " ) ;
2011-06-05 21:50:24 +04:00
goto sigsegv ;
}
2007-05-21 16:31:52 +04:00
2011-06-05 21:50:24 +04:00
if ( get_user ( caller , ( unsigned long __user * ) regs - > sp ) ! = 0 ) {
2011-08-10 19:15:32 +04:00
warn_bad_vsyscall ( KERN_WARNING , regs ,
" vsyscall with bad stack (exploit attempt?) " ) ;
2011-06-05 21:50:24 +04:00
goto sigsegv ;
}
2010-07-14 04:56:18 +04:00
2011-06-05 21:50:24 +04:00
tsk = current ;
2011-11-08 04:33:40 +04:00
/*
2012-10-01 22:40:45 +04:00
* Check for access_ok violations and find the syscall nr .
*
2012-04-01 22:48:04 +04:00
* NULL is a valid user pointer ( in the access_ok sense ) on 32 - bit and
2011-11-08 04:33:40 +04:00
* 64 - bit , so we don ' t need to special - case it here . For all the
2012-04-01 22:48:04 +04:00
* vsyscalls , NULL means " don't write anything " not " write it at
2011-11-08 04:33:40 +04:00
* address 0 " .
*/
2011-06-05 21:50:24 +04:00
switch ( vsyscall_nr ) {
case 0 :
2011-11-08 04:33:40 +04:00
if ( ! write_ok_or_segv ( regs - > di , sizeof ( struct timeval ) ) | |
2012-10-01 22:40:45 +04:00
! write_ok_or_segv ( regs - > si , sizeof ( struct timezone ) ) ) {
ret = - EFAULT ;
goto check_fault ;
}
2011-11-08 04:33:40 +04:00
2012-10-01 22:40:45 +04:00
syscall_nr = __NR_gettimeofday ;
break ;
case 1 :
if ( ! write_ok_or_segv ( regs - > di , sizeof ( time_t ) ) ) {
ret = - EFAULT ;
goto check_fault ;
}
syscall_nr = __NR_time ;
break ;
case 2 :
if ( ! write_ok_or_segv ( regs - > di , sizeof ( unsigned ) ) | |
! write_ok_or_segv ( regs - > si , sizeof ( unsigned ) ) ) {
ret = - EFAULT ;
goto check_fault ;
}
syscall_nr = __NR_getcpu ;
break ;
}
/*
* Handle seccomp . regs - > ip must be the original value .
* See seccomp_send_sigsys and Documentation / prctl / seccomp_filter . txt .
*
* We could optimize the seccomp disabled case , but performance
* here doesn ' t matter .
*/
regs - > orig_ax = syscall_nr ;
regs - > ax = - ENOSYS ;
2016-05-27 22:57:02 +03:00
tmp = secure_computing ( NULL ) ;
2012-10-01 22:40:45 +04:00
if ( ( ! tmp & & regs - > orig_ax ! = syscall_nr ) | | regs - > ip ! = address ) {
warn_bad_vsyscall ( KERN_DEBUG , regs ,
" seccomp tried to change syscall nr or ip " ) ;
do_exit ( SIGSYS ) ;
}
2014-11-05 02:36:50 +03:00
regs - > orig_ax = - 1 ;
2012-10-01 22:40:45 +04:00
if ( tmp )
goto do_ret ; /* skip requested */
/*
* With a real vsyscall , page faults cause SIGSEGV . We want to
* preserve that behavior to make writing exploits harder .
*/
2016-07-15 11:21:11 +03:00
prev_sig_on_uaccess_err = current - > thread . sig_on_uaccess_err ;
current - > thread . sig_on_uaccess_err = 1 ;
2012-10-01 22:40:45 +04:00
ret = - EFAULT ;
switch ( vsyscall_nr ) {
case 0 :
2011-06-05 21:50:24 +04:00
ret = sys_gettimeofday (
( struct timeval __user * ) regs - > di ,
( struct timezone __user * ) regs - > si ) ;
break ;
case 1 :
ret = sys_time ( ( time_t __user * ) regs - > di ) ;
break ;
case 2 :
ret = sys_getcpu ( ( unsigned __user * ) regs - > di ,
( unsigned __user * ) regs - > si ,
2012-04-01 22:48:04 +04:00
NULL ) ;
2011-06-05 21:50:24 +04:00
break ;
}
2010-07-14 04:56:18 +04:00
2016-07-15 11:21:11 +03:00
current - > thread . sig_on_uaccess_err = prev_sig_on_uaccess_err ;
2011-11-08 04:33:40 +04:00
2012-10-01 22:40:45 +04:00
check_fault :
2011-06-05 21:50:24 +04:00
if ( ret = = - EFAULT ) {
2011-11-08 04:33:40 +04:00
/* Bad news -- userspace fed a bad pointer to a vsyscall. */
2011-06-05 21:50:24 +04:00
warn_bad_vsyscall ( KERN_INFO , regs ,
" vsyscall fault (exploit attempt?) " ) ;
2011-11-08 04:33:40 +04:00
/*
* If we failed to generate a signal for any reason ,
* generate one here . ( This should be impossible . )
*/
if ( WARN_ON_ONCE ( ! sigismember ( & tsk - > pending . signal , SIGBUS ) & &
! sigismember ( & tsk - > pending . signal , SIGSEGV ) ) )
goto sigsegv ;
return true ; /* Don't emulate the ret. */
2011-06-05 21:50:24 +04:00
}
2010-07-14 04:56:18 +04:00
2011-06-05 21:50:24 +04:00
regs - > ax = ret ;
2005-04-17 02:20:36 +04:00
2012-07-13 21:06:35 +04:00
do_ret :
2011-06-05 21:50:24 +04:00
/* Emulate a ret instruction. */
regs - > ip = caller ;
regs - > sp + = 8 ;
2011-08-10 19:15:32 +04:00
return true ;
2011-06-05 21:50:24 +04:00
sigsegv :
force_sig ( SIGSEGV , current ) ;
2011-08-10 19:15:32 +04:00
return true ;
2005-04-17 02:20:36 +04:00
}
2014-09-23 21:50:51 +04:00
/*
* A pseudo VMA to allow ptrace access for the vsyscall page . This only
* covers the 64 bit vsyscall page now . 32 bit has a real VMA now and does
* not need special handling anymore :
*/
static const char * gate_vma_name ( struct vm_area_struct * vma )
{
return " [vsyscall] " ;
}
2015-09-10 01:39:26 +03:00
static const struct vm_operations_struct gate_vma_ops = {
2014-09-23 21:50:51 +04:00
. name = gate_vma_name ,
} ;
static struct vm_area_struct gate_vma = {
. vm_start = VSYSCALL_ADDR ,
. vm_end = VSYSCALL_ADDR + PAGE_SIZE ,
. vm_page_prot = PAGE_READONLY_EXEC ,
. vm_flags = VM_READ | VM_EXEC ,
. vm_ops = & gate_vma_ops ,
} ;
struct vm_area_struct * get_gate_vma ( struct mm_struct * mm )
{
2015-06-22 14:55:16 +03:00
# ifdef CONFIG_COMPAT
2014-09-23 21:50:51 +04:00
if ( ! mm | | mm - > context . ia32_compat )
return NULL ;
# endif
2014-10-30 00:33:45 +03:00
if ( vsyscall_mode = = NONE )
return NULL ;
2014-09-23 21:50:51 +04:00
return & gate_vma ;
}
int in_gate_area ( struct mm_struct * mm , unsigned long addr )
{
struct vm_area_struct * vma = get_gate_vma ( mm ) ;
if ( ! vma )
return 0 ;
return ( addr > = vma - > vm_start ) & & ( addr < vma - > vm_end ) ;
}
/*
* Use this when you have no reliable mm , typically from interrupt
* context . It is less reliable than using a task ' s mm and may give
* false positives .
*/
int in_gate_area_no_mm ( unsigned long addr )
{
2014-10-30 00:33:45 +03:00
return vsyscall_mode ! = NONE & & ( addr & PAGE_MASK ) = = VSYSCALL_ADDR ;
2014-09-23 21:50:51 +04:00
}
2008-01-30 15:32:39 +03:00
void __init map_vsyscall ( void )
2005-04-17 02:20:36 +04:00
{
2011-08-10 19:15:32 +04:00
extern char __vsyscall_page ;
unsigned long physaddr_vsyscall = __pa_symbol ( & __vsyscall_page ) ;
2005-04-17 02:20:36 +04:00
2014-10-30 00:33:45 +03:00
if ( vsyscall_mode ! = NONE )
__set_fixmap ( VSYSCALL_PAGE , physaddr_vsyscall ,
vsyscall_mode = = NATIVE
? PAGE_KERNEL_VSYSCALL
: PAGE_KERNEL_VVAR ) ;
2014-05-05 23:19:36 +04:00
BUILD_BUG_ON ( ( unsigned long ) __fix_to_virt ( VSYSCALL_PAGE ) ! =
( unsigned long ) VSYSCALL_ADDR ) ;
2005-04-17 02:20:36 +04:00
}