2008-01-30 15:31:08 +03:00
# ifndef _ASM_X86_SYSTEM_H_
# define _ASM_X86_SYSTEM_H_
# include <asm/asm.h>
2008-01-30 15:31:08 +03:00
# include <asm/segment.h>
# include <asm/cpufeature.h>
# include <asm/cmpxchg.h>
2008-01-30 15:32:38 +03:00
# include <asm/nops.h>
2008-01-30 15:31:08 +03:00
2008-01-30 15:31:08 +03:00
# include <linux/kernel.h>
2008-01-30 15:31:08 +03:00
# include <linux/irqflags.h>
2008-01-30 15:31:08 +03:00
2008-01-30 15:31:24 +03:00
/* entries in ARCH_DLINFO: */
# ifdef CONFIG_IA32_EMULATION
# define AT_VECTOR_SIZE_ARCH 2
# else
# define AT_VECTOR_SIZE_ARCH 1
# endif
2007-10-11 13:20:03 +04:00
# ifdef CONFIG_X86_32
2008-01-30 15:31:08 +03:00
struct task_struct ; /* one of the stranger aspects of C forward declarations */
2008-02-04 18:48:03 +03:00
struct task_struct * __switch_to ( struct task_struct * prev ,
struct task_struct * next ) ;
2008-01-30 15:31:08 +03:00
/*
* Saving eflags is important . It switches not only IOPL between tasks ,
* it also protects other tasks from NT leaking through sysenter etc .
*/
2008-03-05 12:24:37 +03:00
# define switch_to(prev, next, last) \
do { \
2008-03-05 12:46:38 +03:00
/* \
* Context - switching clobbers all registers , so we clobber \
* them explicitly , via unused output variables . \
* ( EAX and EBP is not listed because EBP is saved / restored \
* explicitly for wchan access and EAX is the return value of \
* __switch_to ( ) ) \
*/ \
unsigned long ebx , ecx , edx , esi , edi ; \
2008-03-05 12:24:37 +03:00
\
2008-03-23 11:03:39 +03:00
asm volatile ( " pushfl \n \t " /* save flags */ \
" pushl %%ebp \n \t " /* save EBP */ \
" movl %%esp,%[prev_sp] \n \t " /* save ESP */ \
" movl %[next_sp],%%esp \n \t " /* restore ESP */ \
" movl $1f,%[prev_ip] \n \t " /* save EIP */ \
" pushl %[next_ip] \n \t " /* restore EIP */ \
" jmp __switch_to \n " /* regparm call */ \
" 1: \t " \
" popl %%ebp \n \t " /* restore EBP */ \
" popfl \n " /* restore flags */ \
2008-03-05 12:24:37 +03:00
\
2008-03-23 11:03:39 +03:00
/* output parameters */ \
: [ prev_sp ] " =m " ( prev - > thread . sp ) , \
[ prev_ip ] " =m " ( prev - > thread . ip ) , \
" =a " ( last ) , \
2008-03-05 12:24:37 +03:00
\
2008-03-23 11:03:39 +03:00
/* clobbered output registers: */ \
" =b " ( ebx ) , " =c " ( ecx ) , " =d " ( edx ) , \
" =S " ( esi ) , " =D " ( edi ) \
\
/* input parameters: */ \
: [ next_sp ] " m " ( next - > thread . sp ) , \
[ next_ip ] " m " ( next - > thread . ip ) , \
\
/* regparm parameters for __switch_to(): */ \
[ prev ] " a " ( prev ) , \
[ next ] " d " ( next ) ) ; \
2008-01-30 15:31:08 +03:00
} while ( 0 )
2008-01-30 15:31:08 +03:00
/*
* disable hlt during certain critical i / o operations
*/
# define HAVE_DISABLE_HLT
2007-10-11 13:20:03 +04:00
# else
2008-01-30 15:31:08 +03:00
# define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
# define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
/* frame pointer must be last for get_wchan */
# define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
# define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
# define __EXTRA_CLOBBER \
, " rcx " , " rbx " , " rdx " , " r8 " , " r9 " , " r10 " , " r11 " , \
" r12 " , " r13 " , " r14 " , " r15 "
/* Save restore flags to clear handle leaking NT */
# define switch_to(prev, next, last) \
2008-01-30 15:31:24 +03:00
asm volatile ( SAVE_CONTEXT \
2008-01-30 15:31:08 +03:00
" movq %%rsp,%P[threadrsp](%[prev]) \n \t " /* save RSP */ \
" movq %P[threadrsp](%[next]),%%rsp \n \t " /* restore RSP */ \
" call __switch_to \n \t " \
" .globl thread_return \n " \
" thread_return: \n \t " \
" movq %%gs:%P[pda_pcurrent],%%rsi \n \t " \
" movq %P[thread_info](%%rsi),%%r8 \n \t " \
LOCK_PREFIX " btr %[tif_fork],%P[ti_flags](%%r8) \n \t " \
" movq %%rax,%%rdi \n \t " \
" jc ret_from_fork \n \t " \
RESTORE_CONTEXT \
: " =a " ( last ) \
: [ next ] " S " ( next ) , [ prev ] " D " ( prev ) , \
[ threadrsp ] " i " ( offsetof ( struct task_struct , thread . sp ) ) , \
[ ti_flags ] " i " ( offsetof ( struct thread_info , flags ) ) , \
[ tif_fork ] " i " ( TIF_FORK ) , \
[ thread_info ] " i " ( offsetof ( struct task_struct , stack ) ) , \
[ pda_pcurrent ] " i " ( offsetof ( struct x8664_pda , pcurrent ) ) \
: " memory " , " cc " __EXTRA_CLOBBER )
2007-10-11 13:20:03 +04:00
# endif
2008-01-30 15:31:08 +03:00
# ifdef __KERNEL__
# define _set_base(addr, base) do { unsigned long __pr; \
__asm__ __volatile__ ( " movw %%dx,%1 \n \t " \
" rorl $16,%%edx \n \t " \
" movb %%dl,%2 \n \t " \
" movb %%dh,%3 " \
: " =&d " ( __pr ) \
: " m " ( * ( ( addr ) + 2 ) ) , \
" m " ( * ( ( addr ) + 4 ) ) , \
" m " ( * ( ( addr ) + 7 ) ) , \
" 0 " ( base ) \
) ; } while ( 0 )
# define _set_limit(addr, limit) do { unsigned long __lr; \
__asm__ __volatile__ ( " movw %%dx,%1 \n \t " \
" rorl $16,%%edx \n \t " \
" movb %2,%%dh \n \t " \
" andb $0xf0,%%dh \n \t " \
" orb %%dh,%%dl \n \t " \
" movb %%dl,%2 " \
: " =&d " ( __lr ) \
: " m " ( * ( addr ) ) , \
" m " ( * ( ( addr ) + 6 ) ) , \
" 0 " ( limit ) \
) ; } while ( 0 )
# define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base))
# define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1))
2008-06-25 08:19:32 +04:00
extern void native_load_gs_index ( unsigned ) ;
2008-01-30 15:31:08 +03:00
2008-01-30 15:31:08 +03:00
/*
* Load a segment . Fall back on loading the zero
* segment if something goes wrong . .
*/
# define loadsegment(seg, value) \
asm volatile ( " \n " \
2008-03-23 11:03:39 +03:00
" 1: \t " \
" movl %k0,%% " # seg " \n " \
" 2: \n " \
" .section .fixup, \" ax \" \n " \
" 3: \t " \
" movl %k1, %% " # seg " \n \t " \
" jmp 2b \n " \
" .previous \n " \
_ASM_EXTABLE ( 1 b , 3 b ) \
2008-06-25 08:18:58 +04:00
: : " r " ( value ) , " r " ( 0 ) : " memory " )
2008-01-30 15:31:08 +03:00
2008-01-30 15:31:08 +03:00
/*
* Save a segment register away
*/
2008-03-23 11:03:39 +03:00
# define savesegment(seg, value) \
x86: fix savesegment() bug causing crashes on 64-bit
i spent a fair amount of time chasing a 64-bit bootup crash that manifested
itself as bootup segfaults:
S10network[1825]: segfault at 7f3e2b5d16b8 ip 00000031108748c9 sp 00007fffb9c14c70 error 4 in libc-2.7.so[3110800000+14d000]
eventually causing init to die and panic the system:
Kernel panic - not syncing: Attempted to kill init!
Pid: 1, comm: init Not tainted 2.6.26-rc9-tip #13878
after a maratonic bisection session, the bad commit turned out to be:
| b7675791859075418199c7af86a116ea34eaf5bd is first bad commit
| commit b7675791859075418199c7af86a116ea34eaf5bd
| Author: Jeremy Fitzhardinge <jeremy@goop.org>
| Date: Wed Jun 25 00:19:00 2008 -0400
|
| x86: remove open-coded save/load segment operations
|
| This removes a pile of buggy open-coded implementations of savesegment
| and loadsegment.
after some more bisection of this patch itself, it turns out that what
makes the difference are the savesegment() changes to __switch_to().
Taking a look at this portion of arch/x86/kernel/process_64.o revealed
this crutial difference:
| good: 99c: 8c e0 mov %fs,%eax
| 99e: 89 45 cc mov %eax,-0x34(%rbp)
|
| bad: 99c: 8c 65 cc mov %fs,-0x34(%rbp)
which is due to:
| unsigned fsindex;
| - asm volatile("movl %%fs,%0" : "=r" (fsindex));
| + savesegment(fs, fsindex);
savesegment() is implemented as:
#define savesegment(seg, value) \
asm("mov %%" #seg ",%0":"=rm" (value) : : "memory")
note the "m" modifier - it allows GCC to generate the segment move
into a memory operand as well.
But regarding segment operands there's a subtle detail in the x86
instruction set: the above 16-bit moves are zero-extend, but only
if it goes to a register.
If it goes to a memory operand, -0x34(%rbp) in the above case, there's
no zero-extend to 32-bit and the instruction will only save 16 bits
instead of the intended 32-bit.
The other 16 bits is random data - which can cause problems when that
value is used later on.
The solution is to only allow segment operands to go to registers.
This fix allows my test-system to boot up without crashing.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-11 21:41:19 +04:00
asm ( " mov %% " # seg " ,%0 " : " =r " ( value ) : : " memory " )
2008-01-30 15:31:08 +03:00
static inline unsigned long get_limit ( unsigned long segment )
{
unsigned long __limit ;
2008-03-23 11:03:39 +03:00
asm ( " lsll %1,%0 " : " =r " ( __limit ) : " r " ( segment ) ) ;
return __limit + 1 ;
2008-01-30 15:31:08 +03:00
}
2008-01-30 15:31:08 +03:00
static inline void native_clts ( void )
{
2008-03-23 11:03:39 +03:00
asm volatile ( " clts " ) ;
2008-01-30 15:31:08 +03:00
}
/*
* Volatile isn ' t enough to prevent the compiler from reordering the
* read / write functions for the control registers and messing everything up .
* A memory clobber would solve the problem , but would prevent reordering of
* all loads stores around it , which can hurt performance . Solution is to
* use a variable and mimic reads and writes to it to enforce serialization
*/
static unsigned long __force_order ;
static inline unsigned long native_read_cr0 ( void )
{
unsigned long val ;
2008-03-23 11:03:39 +03:00
asm volatile ( " mov %%cr0,%0 \n \t " : " =r " ( val ) , " =m " ( __force_order ) ) ;
2008-01-30 15:31:08 +03:00
return val ;
}
static inline void native_write_cr0 ( unsigned long val )
{
2008-03-23 11:03:39 +03:00
asm volatile ( " mov %0,%%cr0 " : : " r " ( val ) , " m " ( __force_order ) ) ;
2008-01-30 15:31:08 +03:00
}
static inline unsigned long native_read_cr2 ( void )
{
unsigned long val ;
2008-03-23 11:03:39 +03:00
asm volatile ( " mov %%cr2,%0 \n \t " : " =r " ( val ) , " =m " ( __force_order ) ) ;
2008-01-30 15:31:08 +03:00
return val ;
}
static inline void native_write_cr2 ( unsigned long val )
{
2008-03-23 11:03:39 +03:00
asm volatile ( " mov %0,%%cr2 " : : " r " ( val ) , " m " ( __force_order ) ) ;
2008-01-30 15:31:08 +03:00
}
static inline unsigned long native_read_cr3 ( void )
{
unsigned long val ;
2008-03-23 11:03:39 +03:00
asm volatile ( " mov %%cr3,%0 \n \t " : " =r " ( val ) , " =m " ( __force_order ) ) ;
2008-01-30 15:31:08 +03:00
return val ;
}
static inline void native_write_cr3 ( unsigned long val )
{
2008-03-23 11:03:39 +03:00
asm volatile ( " mov %0,%%cr3 " : : " r " ( val ) , " m " ( __force_order ) ) ;
2008-01-30 15:31:08 +03:00
}
static inline unsigned long native_read_cr4 ( void )
{
unsigned long val ;
2008-03-23 11:03:39 +03:00
asm volatile ( " mov %%cr4,%0 \n \t " : " =r " ( val ) , " =m " ( __force_order ) ) ;
2008-01-30 15:31:08 +03:00
return val ;
}
static inline unsigned long native_read_cr4_safe ( void )
{
unsigned long val ;
/* This could fault if %cr4 does not exist. In x86_64, a cr4 always
* exists , so it will never fail . */
# ifdef CONFIG_X86_32
2008-02-04 18:47:58 +03:00
asm volatile ( " 1: mov %%cr4, %0 \n "
" 2: \n "
2008-03-23 11:03:39 +03:00
_ASM_EXTABLE ( 1 b , 2 b )
2008-02-04 18:47:58 +03:00
: " =r " ( val ) , " =m " ( __force_order ) : " 0 " ( 0 ) ) ;
2008-01-30 15:31:08 +03:00
# else
val = native_read_cr4 ( ) ;
# endif
return val ;
}
static inline void native_write_cr4 ( unsigned long val )
{
2008-03-23 11:03:39 +03:00
asm volatile ( " mov %0,%%cr4 " : : " r " ( val ) , " m " ( __force_order ) ) ;
2008-01-30 15:31:08 +03:00
}
2008-01-30 15:33:19 +03:00
# ifdef CONFIG_X86_64
static inline unsigned long native_read_cr8 ( void )
{
unsigned long cr8 ;
asm volatile ( " movq %%cr8,%0 " : " =r " ( cr8 ) ) ;
return cr8 ;
}
static inline void native_write_cr8 ( unsigned long val )
{
asm volatile ( " movq %0,%%cr8 " : : " r " ( val ) : " memory " ) ;
}
# endif
2008-01-30 15:31:08 +03:00
static inline void native_wbinvd ( void )
{
asm volatile ( " wbinvd " : : : " memory " ) ;
}
2008-03-23 11:03:39 +03:00
2008-01-30 15:31:08 +03:00
# ifdef CONFIG_PARAVIRT
# include <asm/paravirt.h>
# else
# define read_cr0() (native_read_cr0())
# define write_cr0(x) (native_write_cr0(x))
# define read_cr2() (native_read_cr2())
# define write_cr2(x) (native_write_cr2(x))
# define read_cr3() (native_read_cr3())
# define write_cr3(x) (native_write_cr3(x))
# define read_cr4() (native_read_cr4())
# define read_cr4_safe() (native_read_cr4_safe())
# define write_cr4(x) (native_write_cr4(x))
# define wbinvd() (native_wbinvd())
2008-01-30 15:31:08 +03:00
# ifdef CONFIG_X86_64
2008-01-30 15:33:19 +03:00
# define read_cr8() (native_read_cr8())
# define write_cr8(x) (native_write_cr8(x))
2008-06-25 08:19:32 +04:00
# define load_gs_index native_load_gs_index
2008-01-30 15:31:08 +03:00
# endif
2008-01-30 15:31:08 +03:00
/* Clear the 'TS' bit */
# define clts() (native_clts())
# endif /* CONFIG_PARAVIRT */
2008-05-27 02:31:03 +04:00
# define stts() write_cr0(read_cr0() | X86_CR0_TS)
2008-01-30 15:31:08 +03:00
2008-01-30 15:31:08 +03:00
# endif /* __KERNEL__ */
2008-02-04 18:48:00 +03:00
static inline void clflush ( volatile void * __p )
2008-01-30 15:31:08 +03:00
{
2008-02-04 18:48:00 +03:00
asm volatile ( " clflush %0 " : " +m " ( * ( volatile char __force * ) __p ) ) ;
2008-01-30 15:31:08 +03:00
}
2008-03-23 11:03:39 +03:00
# define nop() asm volatile ("nop")
2008-01-30 15:31:08 +03:00
void disable_hlt ( void ) ;
void enable_hlt ( void ) ;
void cpu_idle_wait ( void ) ;
extern unsigned long arch_align_stack ( unsigned long sp ) ;
extern void free_init_pages ( char * what , unsigned long begin , unsigned long end ) ;
void default_idle ( void ) ;
2008-01-30 15:31:08 +03:00
/*
* Force strict CPU ordering .
* And yes , this is required on UP too when we ' re talking
* to devices .
*/
# ifdef CONFIG_X86_32
/*
2008-03-03 14:49:09 +03:00
* Some non - Intel clones support out of order store . wmb ( ) ceases to be a
2008-01-30 15:31:08 +03:00
* nop for these .
*/
# define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
# define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
# define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
# else
# define mb() asm volatile("mfence":::"memory")
# define rmb() asm volatile("lfence":::"memory")
# define wmb() asm volatile("sfence" ::: "memory")
# endif
/**
* read_barrier_depends - Flush all pending reads that subsequents reads
* depend on .
*
* No data - dependent reads from memory - like regions are ever reordered
* over this barrier . All reads preceding this primitive are guaranteed
* to access memory ( but not necessarily other CPUs ' caches ) before any
* reads following this primitive that depend on the data return by
* any of the preceding reads . This primitive is much lighter weight than
* rmb ( ) on most CPUs , and is never heavier weight than is
* rmb ( ) .
*
* These ordering constraints are respected by both the local CPU
* and the compiler .
*
* Ordering is not guaranteed by anything other than these primitives ,
* not even by data dependencies . See the documentation for
* memory_barrier ( ) for examples and URLs to more information .
*
* For example , the following code would force ordering ( the initial
* value of " a " is zero , " b " is one , and " p " is " &a " ) :
*
* < programlisting >
* CPU 0 CPU 1
*
* b = 2 ;
* memory_barrier ( ) ;
* p = & b ; q = p ;
* read_barrier_depends ( ) ;
* d = * q ;
* < / programlisting >
*
* because the read of " *q " depends on the read of " p " and these
* two reads are separated by a read_barrier_depends ( ) . However ,
* the following code , with the same initial values for " a " and " b " :
*
* < programlisting >
* CPU 0 CPU 1
*
* a = 2 ;
* memory_barrier ( ) ;
* b = 3 ; y = b ;
* read_barrier_depends ( ) ;
* x = a ;
* < / programlisting >
*
* does not enforce ordering , since there is no data dependency between
* the read of " a " and the read of " b " . Therefore , on some CPUs , such
* as Alpha , " y " could be set to 3 and " x " to 0. Use rmb ( )
* in cases like this where there are no data dependencies .
* */
# define read_barrier_depends() do { } while (0)
# ifdef CONFIG_SMP
# define smp_mb() mb()
# ifdef CONFIG_X86_PPRO_FENCE
# define smp_rmb() rmb()
# else
# define smp_rmb() barrier()
# endif
# ifdef CONFIG_X86_OOSTORE
# define smp_wmb() wmb()
# else
# define smp_wmb() barrier()
# endif
# define smp_read_barrier_depends() read_barrier_depends()
2008-03-23 11:03:39 +03:00
# define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
2008-01-30 15:31:08 +03:00
# else
# define smp_mb() barrier()
# define smp_rmb() barrier()
# define smp_wmb() barrier()
# define smp_read_barrier_depends() do { } while (0)
# define set_mb(var, value) do { var = value; barrier(); } while (0)
# endif
2008-01-30 15:32:38 +03:00
/*
* Stop RDTSC speculation . This is needed when you need to use RDTSC
* ( or get_cycles or vread that possibly accesses the TSC ) in a defined
* code region .
*
* ( Could use an alternative three way for this if there was one . )
*/
static inline void rdtsc_barrier ( void )
{
alternative ( ASM_NOP3 , " mfence " , X86_FEATURE_MFENCE_RDTSC ) ;
alternative ( ASM_NOP3 , " lfence " , X86_FEATURE_LFENCE_RDTSC ) ;
}
2008-01-30 15:31:08 +03:00
2008-01-30 15:31:08 +03:00
# endif