2005-04-16 15:20:36 -07:00
# ifndef __ASM_SYSTEM_H
# define __ASM_SYSTEM_H
# include <linux/kernel.h>
# include <asm/segment.h>
2007-05-08 00:35:02 -07:00
# include <asm/cmpxchg.h>
2005-04-16 15:20:36 -07:00
# ifdef __KERNEL__
2007-12-18 18:05:58 +01:00
/* entries in ARCH_DLINFO: */
# ifdef CONFIG_IA32_EMULATION
# define AT_VECTOR_SIZE_ARCH 2
# else
# define AT_VECTOR_SIZE_ARCH 1
# endif
2005-04-16 15:20:36 -07:00
# define __SAVE(reg,offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
# define __RESTORE(reg,offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
/* frame pointer must be last for get_wchan */
2006-09-26 10:52:41 +02:00
# define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
# define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
2005-04-16 15:20:36 -07:00
# define __EXTRA_CLOBBER \
, " rcx " , " rbx " , " rdx " , " r8 " , " r9 " , " r10 " , " r11 " , " r12 " , " r13 " , " r14 " , " r15 "
2006-09-26 10:52:41 +02:00
/* Save restore flags to clear handle leaking NT */
2005-04-16 15:20:36 -07:00
# define switch_to(prev,next,last) \
asm volatile ( SAVE_CONTEXT \
" movq %%rsp,%P[threadrsp](%[prev]) \n \t " /* save RSP */ \
" movq %P[threadrsp](%[next]),%%rsp \n \t " /* restore RSP */ \
" call __switch_to \n \t " \
" .globl thread_return \n " \
" thread_return: \n \t " \
" movq %%gs:%P[pda_pcurrent],%%rsi \n \t " \
" movq %P[thread_info](%%rsi),%%r8 \n \t " \
2006-06-26 13:56:16 +02:00
LOCK_PREFIX " btr %[tif_fork],%P[ti_flags](%%r8) \n \t " \
2005-04-16 15:20:36 -07:00
" movq %%rax,%%rdi \n \t " \
" jc ret_from_fork \n \t " \
RESTORE_CONTEXT \
: " =a " ( last ) \
: [ next ] " S " ( next ) , [ prev ] " D " ( prev ) , \
2008-01-30 13:31:02 +01:00
[ threadrsp ] " i " ( offsetof ( struct task_struct , thread . sp ) ) , \
2005-04-16 15:20:36 -07:00
[ ti_flags ] " i " ( offsetof ( struct thread_info , flags ) ) , \
[ tif_fork ] " i " ( TIF_FORK ) , \
2007-05-09 02:35:17 -07:00
[ thread_info ] " i " ( offsetof ( struct task_struct , stack ) ) , \
2005-04-16 15:20:36 -07:00
[ pda_pcurrent ] " i " ( offsetof ( struct x8664_pda , pcurrent ) ) \
: " memory " , " cc " __EXTRA_CLOBBER )
extern void load_gs_index ( unsigned ) ;
/*
* Clear and set ' TS ' bit respectively
*/
# define clts() __asm__ __volatile__ ("clts")
static inline unsigned long read_cr0 ( void )
{
unsigned long cr0 ;
asm volatile ( " movq %%cr0,%0 " : " =r " ( cr0 ) ) ;
return cr0 ;
2007-07-22 11:12:29 +02:00
}
2005-04-16 15:20:36 -07:00
static inline void write_cr0 ( unsigned long val )
{
asm volatile ( " movq %0,%%cr0 " : : " r " ( val ) ) ;
2007-07-22 11:12:29 +02:00
}
static inline unsigned long read_cr2 ( void )
{
unsigned long cr2 ;
2007-10-17 18:04:33 +02:00
asm volatile ( " movq %%cr2,%0 " : " =r " ( cr2 ) ) ;
2007-07-22 11:12:29 +02:00
return cr2 ;
}
static inline void write_cr2 ( unsigned long val )
{
asm volatile ( " movq %0,%%cr2 " : : " r " ( val ) ) ;
}
2005-04-16 15:20:36 -07:00
static inline unsigned long read_cr3 ( void )
{
unsigned long cr3 ;
2007-10-17 18:04:33 +02:00
asm volatile ( " movq %%cr3,%0 " : " =r " ( cr3 ) ) ;
2005-04-16 15:20:36 -07:00
return cr3 ;
2007-07-22 11:12:29 +02:00
}
2005-04-16 15:20:36 -07:00
[PATCH] x86-64: Remove duplicated code for reading control registers
On Tue, Mar 13, 2007 at 05:33:09AM -0700, Randy.Dunlap wrote:
> On Tue, 13 Mar 2007, Glauber de Oliveira Costa wrote:
>
> > Tiny cleanup:
> >
> > In x86_64, the same functions for reading cr3 and writing cr{3,4} are
> > defined in tlbflush.h and system.h, whith just a name change.
> > The only difference is the clobbering of memory, which seems a safe, and
> > even needed change for the write_cr4. This patch removes the duplicate.
> > write_cr3() is moved to system.h for consistency.
>
> missing patch.....
>
thanks. Attached now
--
Glauber de Oliveira Costa
Red Hat Inc.
"Free as in Freedom"
Signed-off-by: Andi Kleen <ak@suse.de>
2007-05-02 19:27:06 +02:00
static inline void write_cr3 ( unsigned long val )
{
asm volatile ( " movq %0,%%cr3 " : : " r " ( val ) : " memory " ) ;
}
2005-04-16 15:20:36 -07:00
static inline unsigned long read_cr4 ( void )
{
unsigned long cr4 ;
2007-10-17 18:04:33 +02:00
asm volatile ( " movq %%cr4,%0 " : " =r " ( cr4 ) ) ;
2005-04-16 15:20:36 -07:00
return cr4 ;
2007-07-22 11:12:29 +02:00
}
2005-04-16 15:20:36 -07:00
static inline void write_cr4 ( unsigned long val )
{
[PATCH] x86-64: Remove duplicated code for reading control registers
On Tue, Mar 13, 2007 at 05:33:09AM -0700, Randy.Dunlap wrote:
> On Tue, 13 Mar 2007, Glauber de Oliveira Costa wrote:
>
> > Tiny cleanup:
> >
> > In x86_64, the same functions for reading cr3 and writing cr{3,4} are
> > defined in tlbflush.h and system.h, whith just a name change.
> > The only difference is the clobbering of memory, which seems a safe, and
> > even needed change for the write_cr4. This patch removes the duplicate.
> > write_cr3() is moved to system.h for consistency.
>
> missing patch.....
>
thanks. Attached now
--
Glauber de Oliveira Costa
Red Hat Inc.
"Free as in Freedom"
Signed-off-by: Andi Kleen <ak@suse.de>
2007-05-02 19:27:06 +02:00
asm volatile ( " movq %0,%%cr4 " : : " r " ( val ) : " memory " ) ;
2007-07-22 11:12:29 +02:00
}
static inline unsigned long read_cr8 ( void )
{
unsigned long cr8 ;
2007-10-17 18:04:33 +02:00
asm volatile ( " movq %%cr8,%0 " : " =r " ( cr8 ) ) ;
2007-07-22 11:12:29 +02:00
return cr8 ;
}
static inline void write_cr8 ( unsigned long val )
{
asm volatile ( " movq %0,%%cr8 " : : " r " ( val ) : " memory " ) ;
}
2005-04-16 15:20:36 -07:00
# define stts() write_cr0(8 | read_cr0())
# define wbinvd() \
2007-07-21 04:37:17 -07:00
__asm__ __volatile__ ( " wbinvd " : : : " memory " )
2005-04-16 15:20:36 -07:00
# endif /* __KERNEL__ */
# ifdef CONFIG_SMP
# define smp_mb() mb()
x86: optimise barriers
According to latest memory ordering specification documents from Intel
and AMD, both manufacturers are committed to in-order loads from
cacheable memory for the x86 architecture. Hence, smp_rmb() may be a
simple barrier.
Also according to those documents, and according to existing practice in
Linux (eg. spin_unlock doesn't enforce ordering), stores to cacheable
memory are visible in program order too. Special string stores are safe
-- their constituent stores may be out of order, but they must complete
in order WRT surrounding stores. Nontemporal stores to WB memory can go
out of order, and so they should be fenced explicitly to make them
appear in-order WRT other stores. Hence, smp_wmb() may be a simple
barrier.
http://developer.intel.com/products/processor/manuals/318147.pdf
http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/24593.pdf
In userspace microbenchmarks on a core2 system, fence instructions range
anywhere from around 15 cycles to 50, which may not be totally
insignificant in performance critical paths (code size will go down
too).
However the primary motivation for this is to have the canonical barrier
implementation for x86 architecture.
smp_rmb on buggy pentium pros remains a locked op, which is apparently
required.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-13 03:07:38 +02:00
# define smp_rmb() barrier()
# define smp_wmb() barrier()
2005-04-16 15:20:36 -07:00
# define smp_read_barrier_depends() do {} while(0)
# else
# define smp_mb() barrier()
# define smp_rmb() barrier()
# define smp_wmb() barrier()
# define smp_read_barrier_depends() do {} while(0)
# endif
/*
* Force strict CPU ordering .
* And yes , this is required on UP too when we ' re talking
* to devices .
*/
# define mb() asm volatile("mfence":::"memory")
# define rmb() asm volatile("lfence":::"memory")
# define wmb() asm volatile("sfence" ::: "memory")
2007-10-13 03:06:55 +02:00
2005-04-16 15:20:36 -07:00
# define read_barrier_depends() do {} while(0)
2006-02-04 23:28:05 -08:00
# define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
2005-04-16 15:20:36 -07:00
# define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0)
2006-07-03 00:24:45 -07:00
# include <linux/irqflags.h>
2006-01-17 07:03:47 +01:00
2005-04-16 15:20:36 -07:00
# endif