2019-05-29 17:18:02 +03:00
// SPDX-License-Identifier: GPL-2.0-only
2015-09-16 21:23:29 +03:00
/*
* 32 - bit syscall ABI conformance test .
*
* Copyright ( c ) 2015 Denys Vlasenko
*/
/*
* Can be built statically :
* gcc - Os - Wall - static - m32 test_syscall_vdso . c thunks_32 . S
*/
# undef _GNU_SOURCE
# define _GNU_SOURCE 1
# undef __USE_GNU
# define __USE_GNU 1
# include <unistd.h>
# include <stdlib.h>
# include <string.h>
# include <stdio.h>
# include <signal.h>
# include <sys/types.h>
# include <sys/select.h>
# include <sys/time.h>
# include <elf.h>
# include <sys/ptrace.h>
# include <sys/wait.h>
# if !defined(__i386__)
int main ( int argc , char * * argv , char * * envp )
{
printf ( " [SKIP] \t Not a 32-bit x86 userspace \n " ) ;
return 0 ;
}
# else
long syscall_addr ;
long get_syscall ( char * * envp )
{
Elf32_auxv_t * auxv ;
while ( * envp + + ! = NULL )
continue ;
for ( auxv = ( void * ) envp ; auxv - > a_type ! = AT_NULL ; auxv + + )
if ( auxv - > a_type = = AT_SYSINFO )
return auxv - > a_un . a_val ;
printf ( " [WARN] \t AT_SYSINFO not supplied \n " ) ;
return 0 ;
}
asm (
" .pushsection .text \n "
" .global int80 \n "
" int80: \n "
" int $0x80 \n "
" ret \n "
" .popsection \n "
) ;
extern char int80 ;
struct regs64 {
uint64_t rax , rbx , rcx , rdx ;
uint64_t rsi , rdi , rbp , rsp ;
uint64_t r8 , r9 , r10 , r11 ;
uint64_t r12 , r13 , r14 , r15 ;
} ;
struct regs64 regs64 ;
int kernel_is_64bit ;
asm (
" .pushsection .text \n "
" .code64 \n "
" get_regs64: \n "
" push %rax \n "
" mov $regs64, %eax \n "
" pop 0*8(%rax) \n "
" movq %rbx, 1*8(%rax) \n "
" movq %rcx, 2*8(%rax) \n "
" movq %rdx, 3*8(%rax) \n "
" movq %rsi, 4*8(%rax) \n "
" movq %rdi, 5*8(%rax) \n "
" movq %rbp, 6*8(%rax) \n "
" movq %rsp, 7*8(%rax) \n "
" movq %r8, 8*8(%rax) \n "
" movq %r9, 9*8(%rax) \n "
" movq %r10, 10*8(%rax) \n "
" movq %r11, 11*8(%rax) \n "
" movq %r12, 12*8(%rax) \n "
" movq %r13, 13*8(%rax) \n "
" movq %r14, 14*8(%rax) \n "
" movq %r15, 15*8(%rax) \n "
" ret \n "
" poison_regs64: \n "
" movq $0x7f7f7f7f, %r8 \n "
" shl $32, %r8 \n "
" orq $0x7f7f7f7f, %r8 \n "
" movq %r8, %r9 \n "
x86/entry/64/compat: Preserve r8-r11 in int $0x80
32-bit user code that uses int $80 doesn't care about r8-r11. There is,
however, some 64-bit user code that intentionally uses int $0x80 to invoke
32-bit system calls. From what I've seen, basically all such code assumes
that r8-r15 are all preserved, but the kernel clobbers r8-r11. Since I
doubt that there's any code that depends on int $0x80 zeroing r8-r11,
change the kernel to preserve them.
I suspect that very little user code is broken by the old clobber, since
r8-r11 are only rarely allocated by gcc, and they're clobbered by function
calls, so they only way we'd see a problem is if the same function that
invokes int $0x80 also spills something important to one of these
registers.
The current behavior seems to date back to the historical commit
"[PATCH] x86-64 merge for 2.6.4". Before that, all regs were
preserved. I can't find any explanation of why this change was made.
Update the test_syscall_vdso_32 testcase as well to verify the new
behavior, and it strengthens the test to make sure that the kernel doesn't
accidentally permute r8..r15.
Suggested-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Link: https://lkml.kernel.org/r/d4c4d9985fbe64f8c9e19291886453914b48caee.1523975710.git.luto@kernel.org
2018-04-17 17:36:36 +03:00
" incq %r9 \n "
" movq %r9, %r10 \n "
" incq %r10 \n "
" movq %r10, %r11 \n "
" incq %r11 \n "
" movq %r11, %r12 \n "
" incq %r12 \n "
" movq %r12, %r13 \n "
" incq %r13 \n "
" movq %r13, %r14 \n "
" incq %r14 \n "
" movq %r14, %r15 \n "
" incq %r15 \n "
2015-09-16 21:23:29 +03:00
" ret \n "
" .code32 \n "
" .popsection \n "
) ;
extern void get_regs64 ( void ) ;
extern void poison_regs64 ( void ) ;
extern unsigned long call64_from_32 ( void ( * function ) ( void ) ) ;
void print_regs64 ( void )
{
if ( ! kernel_is_64bit )
return ;
printf ( " ax:%016llx bx:%016llx cx:%016llx dx:%016llx \n " , regs64 . rax , regs64 . rbx , regs64 . rcx , regs64 . rdx ) ;
printf ( " si:%016llx di:%016llx bp:%016llx sp:%016llx \n " , regs64 . rsi , regs64 . rdi , regs64 . rbp , regs64 . rsp ) ;
printf ( " 8:%016llx 9:%016llx 10:%016llx 11:%016llx \n " , regs64 . r8 , regs64 . r9 , regs64 . r10 , regs64 . r11 ) ;
printf ( " 12:%016llx 13:%016llx 14:%016llx 15:%016llx \n " , regs64 . r12 , regs64 . r13 , regs64 . r14 , regs64 . r15 ) ;
}
int check_regs64 ( void )
{
int err = 0 ;
int num = 8 ;
uint64_t * r64 = & regs64 . r8 ;
x86/entry/64/compat: Preserve r8-r11 in int $0x80
32-bit user code that uses int $80 doesn't care about r8-r11. There is,
however, some 64-bit user code that intentionally uses int $0x80 to invoke
32-bit system calls. From what I've seen, basically all such code assumes
that r8-r15 are all preserved, but the kernel clobbers r8-r11. Since I
doubt that there's any code that depends on int $0x80 zeroing r8-r11,
change the kernel to preserve them.
I suspect that very little user code is broken by the old clobber, since
r8-r11 are only rarely allocated by gcc, and they're clobbered by function
calls, so they only way we'd see a problem is if the same function that
invokes int $0x80 also spills something important to one of these
registers.
The current behavior seems to date back to the historical commit
"[PATCH] x86-64 merge for 2.6.4". Before that, all regs were
preserved. I can't find any explanation of why this change was made.
Update the test_syscall_vdso_32 testcase as well to verify the new
behavior, and it strengthens the test to make sure that the kernel doesn't
accidentally permute r8..r15.
Suggested-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Link: https://lkml.kernel.org/r/d4c4d9985fbe64f8c9e19291886453914b48caee.1523975710.git.luto@kernel.org
2018-04-17 17:36:36 +03:00
uint64_t expected = 0x7f7f7f7f7f7f7f7fULL ;
2015-09-16 21:23:29 +03:00
if ( ! kernel_is_64bit )
return 0 ;
do {
x86/entry/64/compat: Preserve r8-r11 in int $0x80
32-bit user code that uses int $80 doesn't care about r8-r11. There is,
however, some 64-bit user code that intentionally uses int $0x80 to invoke
32-bit system calls. From what I've seen, basically all such code assumes
that r8-r15 are all preserved, but the kernel clobbers r8-r11. Since I
doubt that there's any code that depends on int $0x80 zeroing r8-r11,
change the kernel to preserve them.
I suspect that very little user code is broken by the old clobber, since
r8-r11 are only rarely allocated by gcc, and they're clobbered by function
calls, so they only way we'd see a problem is if the same function that
invokes int $0x80 also spills something important to one of these
registers.
The current behavior seems to date back to the historical commit
"[PATCH] x86-64 merge for 2.6.4". Before that, all regs were
preserved. I can't find any explanation of why this change was made.
Update the test_syscall_vdso_32 testcase as well to verify the new
behavior, and it strengthens the test to make sure that the kernel doesn't
accidentally permute r8..r15.
Suggested-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Link: https://lkml.kernel.org/r/d4c4d9985fbe64f8c9e19291886453914b48caee.1523975710.git.luto@kernel.org
2018-04-17 17:36:36 +03:00
if ( * r64 = = expected + + )
2015-09-16 21:23:29 +03:00
continue ; /* register did not change */
if ( syscall_addr ! = ( long ) & int80 ) {
/*
* Non - INT80 syscall entrypoints are allowed to clobber R8 + regs :
* either clear them to 0 , or for R11 , load EFLAGS .
*/
if ( * r64 = = 0 )
continue ;
if ( num = = 11 ) {
printf ( " [NOTE] \t R11 has changed:%016llx - assuming clobbered by SYSRET insn \n " , * r64 ) ;
continue ;
}
} else {
x86/entry/64/compat: Preserve r8-r11 in int $0x80
32-bit user code that uses int $80 doesn't care about r8-r11. There is,
however, some 64-bit user code that intentionally uses int $0x80 to invoke
32-bit system calls. From what I've seen, basically all such code assumes
that r8-r15 are all preserved, but the kernel clobbers r8-r11. Since I
doubt that there's any code that depends on int $0x80 zeroing r8-r11,
change the kernel to preserve them.
I suspect that very little user code is broken by the old clobber, since
r8-r11 are only rarely allocated by gcc, and they're clobbered by function
calls, so they only way we'd see a problem is if the same function that
invokes int $0x80 also spills something important to one of these
registers.
The current behavior seems to date back to the historical commit
"[PATCH] x86-64 merge for 2.6.4". Before that, all regs were
preserved. I can't find any explanation of why this change was made.
Update the test_syscall_vdso_32 testcase as well to verify the new
behavior, and it strengthens the test to make sure that the kernel doesn't
accidentally permute r8..r15.
Suggested-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Link: https://lkml.kernel.org/r/d4c4d9985fbe64f8c9e19291886453914b48caee.1523975710.git.luto@kernel.org
2018-04-17 17:36:36 +03:00
/*
* INT80 syscall entrypoint can be used by
2015-09-16 21:23:29 +03:00
* 64 - bit programs too , unlike SYSCALL / SYSENTER .
* Therefore it must preserve R12 +
* ( they are callee - saved registers in 64 - bit C ABI ) .
*
x86/entry/64/compat: Preserve r8-r11 in int $0x80
32-bit user code that uses int $80 doesn't care about r8-r11. There is,
however, some 64-bit user code that intentionally uses int $0x80 to invoke
32-bit system calls. From what I've seen, basically all such code assumes
that r8-r15 are all preserved, but the kernel clobbers r8-r11. Since I
doubt that there's any code that depends on int $0x80 zeroing r8-r11,
change the kernel to preserve them.
I suspect that very little user code is broken by the old clobber, since
r8-r11 are only rarely allocated by gcc, and they're clobbered by function
calls, so they only way we'd see a problem is if the same function that
invokes int $0x80 also spills something important to one of these
registers.
The current behavior seems to date back to the historical commit
"[PATCH] x86-64 merge for 2.6.4". Before that, all regs were
preserved. I can't find any explanation of why this change was made.
Update the test_syscall_vdso_32 testcase as well to verify the new
behavior, and it strengthens the test to make sure that the kernel doesn't
accidentally permute r8..r15.
Suggested-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Link: https://lkml.kernel.org/r/d4c4d9985fbe64f8c9e19291886453914b48caee.1523975710.git.luto@kernel.org
2018-04-17 17:36:36 +03:00
* Starting in Linux 4.17 ( and any kernel that
* backports the change ) , R8 . .11 are preserved .
* Historically ( and probably unintentionally ) , they
* were clobbered or zeroed .
2015-09-16 21:23:29 +03:00
*/
}
printf ( " [FAIL] \t R%d has changed:%016llx \n " , num , * r64 ) ;
err + + ;
} while ( r64 + + , + + num < 16 ) ;
if ( ! err )
printf ( " [OK] \t R8..R15 did not leak kernel data \n " ) ;
return err ;
}
int nfds ;
fd_set rfds ;
fd_set wfds ;
fd_set efds ;
struct timespec timeout ;
sigset_t sigmask ;
struct {
sigset_t * sp ;
int sz ;
} sigmask_desc ;
void prep_args ( )
{
nfds = 42 ;
FD_ZERO ( & rfds ) ;
FD_ZERO ( & wfds ) ;
FD_ZERO ( & efds ) ;
FD_SET ( 0 , & rfds ) ;
FD_SET ( 1 , & wfds ) ;
FD_SET ( 2 , & efds ) ;
timeout . tv_sec = 0 ;
timeout . tv_nsec = 123 ;
sigemptyset ( & sigmask ) ;
sigaddset ( & sigmask , SIGINT ) ;
sigaddset ( & sigmask , SIGUSR2 ) ;
sigaddset ( & sigmask , SIGRTMAX ) ;
sigmask_desc . sp = & sigmask ;
sigmask_desc . sz = 8 ; /* bytes */
}
static void print_flags ( const char * name , unsigned long r )
{
static const char * bitarray [ ] = {
" \n " , " c \n " , /* Carry Flag */
" 0 " , " 1 " , /* Bit 1 - always on */
" " , " p " , /* Parity Flag */
" 0 " , " 3? " ,
" " , " a " , /* Auxiliary carry Flag */
" 0 " , " 5? " ,
" " , " z " , /* Zero Flag */
" " , " s " , /* Sign Flag */
" " , " t " , /* Trap Flag */
" " , " i " , /* Interrupt Flag */
" " , " d " , /* Direction Flag */
" " , " o " , /* Overflow Flag */
" 0 " , " 1 " , /* I/O Privilege Level (2 bits) */
" 0 " , " 1 " , /* I/O Privilege Level (2 bits) */
" " , " n " , /* Nested Task */
" 0 " , " 15? " ,
" " , " r " , /* Resume Flag */
" " , " v " , /* Virtual Mode */
" " , " ac " , /* Alignment Check/Access Control */
" " , " vif " , /* Virtual Interrupt Flag */
" " , " vip " , /* Virtual Interrupt Pending */
" " , " id " , /* CPUID detection */
NULL
} ;
const char * * bitstr ;
int bit ;
printf ( " %s=%016lx " , name , r ) ;
bitstr = bitarray + 42 ;
bit = 21 ;
if ( ( r > > 22 ) ! = 0 )
printf ( " (extra bits are set) " ) ;
do {
if ( bitstr [ ( r > > bit ) & 1 ] [ 0 ] )
fputs ( bitstr [ ( r > > bit ) & 1 ] , stdout ) ;
bitstr - = 2 ;
bit - - ;
} while ( bit > = 0 ) ;
}
int run_syscall ( void )
{
long flags , bad_arg ;
prep_args ( ) ;
if ( kernel_is_64bit )
call64_from_32 ( poison_regs64 ) ;
/*print_regs64();*/
asm ( " \n "
/* Try 6-arg syscall: pselect. It should return quickly */
" push %%ebp \n "
" mov $308, %%eax \n " /* PSELECT */
" mov nfds, %%ebx \n " /* ebx arg1 */
" mov $rfds, %%ecx \n " /* ecx arg2 */
" mov $wfds, %%edx \n " /* edx arg3 */
" mov $efds, %%esi \n " /* esi arg4 */
" mov $timeout, %%edi \n " /* edi arg5 */
" mov $sigmask_desc, %%ebp \n " /* %ebp arg6 */
" push $0x200ed7 \n " /* set almost all flags */
" popf \n " /* except TF, IOPL, NT, RF, VM, AC, VIF, VIP */
" call *syscall_addr \n "
/* Check that registers are not clobbered */
" pushf \n "
" pop %%eax \n "
" cld \n "
" cmp nfds, %%ebx \n " /* ebx arg1 */
" mov $1, %%ebx \n "
" jne 1f \n "
" cmp $rfds, %%ecx \n " /* ecx arg2 */
" mov $2, %%ebx \n "
" jne 1f \n "
" cmp $wfds, %%edx \n " /* edx arg3 */
" mov $3, %%ebx \n "
" jne 1f \n "
" cmp $efds, %%esi \n " /* esi arg4 */
" mov $4, %%ebx \n "
" jne 1f \n "
" cmp $timeout, %%edi \n " /* edi arg5 */
" mov $5, %%ebx \n "
" jne 1f \n "
" cmpl $sigmask_desc, %%ebp \n " /* %ebp arg6 */
" mov $6, %%ebx \n "
" jne 1f \n "
" mov $0, %%ebx \n "
" 1: \n "
" pop %%ebp \n "
: " =a " ( flags ) , " =b " ( bad_arg )
:
: " cx " , " dx " , " si " , " di "
) ;
if ( kernel_is_64bit ) {
memset ( & regs64 , 0x77 , sizeof ( regs64 ) ) ;
call64_from_32 ( get_regs64 ) ;
/*print_regs64();*/
}
/*
* On paravirt kernels , flags are not preserved across syscalls .
* Thus , we do not consider it a bug if some are changed .
* We just show ones which do .
*/
if ( ( 0x200ed7 ^ flags ) ! = 0 ) {
print_flags ( " [WARN] \t Flags before " , 0x200ed7 ) ;
print_flags ( " [WARN] \t Flags after " , flags ) ;
print_flags ( " [WARN] \t Flags change " , ( 0x200ed7 ^ flags ) ) ;
}
if ( bad_arg ) {
printf ( " [FAIL] \t arg#%ld clobbered \n " , bad_arg ) ;
return 1 ;
}
printf ( " [OK] \t Arguments are preserved across syscall \n " ) ;
return check_regs64 ( ) ;
}
int run_syscall_twice ( )
{
int exitcode = 0 ;
long sv ;
if ( syscall_addr ) {
printf ( " [RUN] \t Executing 6-argument 32-bit syscall via VDSO \n " ) ;
exitcode = run_syscall ( ) ;
}
sv = syscall_addr ;
syscall_addr = ( long ) & int80 ;
printf ( " [RUN] \t Executing 6-argument 32-bit syscall via INT 80 \n " ) ;
exitcode + = run_syscall ( ) ;
syscall_addr = sv ;
return exitcode ;
}
void ptrace_me ( )
{
pid_t pid ;
fflush ( NULL ) ;
pid = fork ( ) ;
if ( pid < 0 )
exit ( 1 ) ;
if ( pid = = 0 ) {
/* child */
if ( ptrace ( PTRACE_TRACEME , 0L , 0L , 0L ) ! = 0 )
exit ( 0 ) ;
raise ( SIGSTOP ) ;
return ;
}
/* parent */
printf ( " [RUN] \t Running tests under ptrace \n " ) ;
while ( 1 ) {
int status ;
pid = waitpid ( - 1 , & status , __WALL ) ;
if ( WIFEXITED ( status ) )
exit ( WEXITSTATUS ( status ) ) ;
if ( WIFSIGNALED ( status ) )
exit ( WTERMSIG ( status ) ) ;
if ( pid < = 0 | | ! WIFSTOPPED ( status ) ) /* paranoia */
exit ( 255 ) ;
/*
* Note : we do not inject sig = WSTOPSIG ( status ) .
* We probably should , but careful : do not inject SIGTRAP
* generated by syscall entry / exit stops .
* That kills the child .
*/
ptrace ( PTRACE_SYSCALL , pid , 0L , 0L /*sig*/ ) ;
}
}
int main ( int argc , char * * argv , char * * envp )
{
int exitcode = 0 ;
int cs ;
asm ( " \n "
" movl %%cs, %%eax \n "
: " =a " ( cs )
) ;
kernel_is_64bit = ( cs = = 0x23 ) ;
if ( ! kernel_is_64bit )
printf ( " [NOTE] \t Not a 64-bit kernel, won't test R8..R15 leaks \n " ) ;
/* This only works for non-static builds:
* syscall_addr = dlsym ( dlopen ( " linux-gate.so.1 " , RTLD_NOW ) , " __kernel_vsyscall " ) ;
*/
syscall_addr = get_syscall ( envp ) ;
exitcode + = run_syscall_twice ( ) ;
ptrace_me ( ) ;
exitcode + = run_syscall_twice ( ) ;
return exitcode ;
}
# endif