2005-04-16 15:20:36 -07:00
/ *
* linux/ a r c h / x86 _ 6 4 / e n t r y . S
*
* Copyright ( C ) 1 9 9 1 , 1 9 9 2 L i n u s T o r v a l d s
* Copyright ( C ) 2 0 0 0 , 2 0 0 1 , 2 0 0 2 A n d i K l e e n S u S E L a b s
* Copyright ( C ) 2 0 0 0 P a v e l M a c h e k < p a v e l @suse.cz>
* /
/ *
* entry. S c o n t a i n s t h e s y s t e m - c a l l a n d f a u l t l o w - l e v e l h a n d l i n g r o u t i n e s .
*
* NOTE : This c o d e h a n d l e s s i g n a l - r e c o g n i t i o n , w h i c h h a p p e n s e v e r y t i m e
* after a n i n t e r r u p t a n d a f t e r e a c h s y s t e m c a l l .
2008-11-16 15:29:00 +01:00
*
* Normal s y s c a l l s a n d i n t e r r u p t s d o n ' t s a v e a f u l l s t a c k f r a m e , t h i s i s
2005-04-16 15:20:36 -07:00
* only d o n e f o r s y s c a l l t r a c i n g , s i g n a l s o r f o r k / e x e c e t . a l .
2008-11-16 15:29:00 +01:00
*
* A n o t e o n t e r m i n o l o g y :
* - top o f s t a c k : A r c h i t e c t u r e d e f i n e d i n t e r r u p t f r a m e f r o m S S t o R I P
* at t h e t o p o f t h e k e r n e l p r o c e s s s t a c k .
2011-03-17 16:24:16 -03:00
* - partial s t a c k f r a m e : p a r t i a l l y s a v e d r e g i s t e r s u p t o R 1 1 .
2008-11-16 15:29:00 +01:00
* - full s t a c k f r a m e : L i k e p a r t i a l s t a c k f r a m e , b u t a l l r e g i s t e r s a v e d .
2006-09-26 10:52:29 +02:00
*
* Some m a c r o u s a g e :
* - CFI m a c r o s a r e u s e d t o g e n e r a t e d w a r f2 u n w i n d i n f o r m a t i o n f o r b e t t e r
* backtraces. T h e y d o n ' t c h a n g e a n y c o d e .
* - SAVE_ A L L / R E S T O R E _ A L L - S a v e / r e s t o r e a l l r e g i s t e r s
* - SAVE_ A R G S / R E S T O R E _ A R G S - S a v e / r e s t o r e r e g i s t e r s t h a t C f u n c t i o n s m o d i f y .
* There a r e u n f o r t u n a t e l y l o t s o f s p e c i a l c a s e s w h e r e s o m e r e g i s t e r s
* not t o u c h e d . T h e m a c r o i s a b i g m e s s t h a t s h o u l d b e c l e a n e d u p .
* - SAVE_ R E S T / R E S T O R E _ R E S T - H a n d l e t h e r e g i s t e r s n o t s a v e d b y S A V E _ A R G S .
* Gives a f u l l s t a c k f r a m e .
* - ENTRY/ E N D D e f i n e f u n c t i o n s i n t h e s y m b o l t a b l e .
* - FIXUP_ T O P _ O F _ S T A C K / R E S T O R E _ T O P _ O F _ S T A C K - F i x u p t h e h a r d w a r e s t a c k
* frame t h a t i s o t h e r w i s e u n d e f i n e d a f t e r a S Y S C A L L
* - TRACE_ I R Q _ * - T r a c e h a r d i n t e r r u p t s t a t e f o r l o c k d e b u g g i n g .
* - errorentry/ p a r a n o i d e n t r y / z e r o e n t r y - D e f i n e e x c e p t i o n e n t r y p o i n t s .
2005-04-16 15:20:36 -07:00
* /
# include < l i n u x / l i n k a g e . h >
# include < a s m / s e g m e n t . h >
# include < a s m / c a c h e . h >
# include < a s m / e r r n o . h >
# include < a s m / d w a r f2 . h >
# include < a s m / c a l l i n g . h >
2005-09-09 21:28:48 +02:00
# include < a s m / a s m - o f f s e t s . h >
2005-04-16 15:20:36 -07:00
# include < a s m / m s r . h >
# include < a s m / u n i s t d . h >
# include < a s m / t h r e a d _ i n f o . h >
# include < a s m / h w _ i r q . h >
2009-02-13 11:14:01 -08:00
# include < a s m / p a g e _ t y p e s . h >
2006-07-03 00:24:45 -07:00
# include < a s m / i r q f l a g s . h >
2008-01-30 13:32:08 +01:00
# include < a s m / p a r a v i r t . h >
2008-06-21 23:47:27 +05:30
# include < a s m / f t r a c e . h >
2009-01-13 20:41:35 +09:00
# include < a s m / p e r c p u . h >
2005-04-16 15:20:36 -07:00
2008-06-23 15:37:04 -07:00
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
# include < l i n u x / e l f - e m . h >
# define A U D I T _ A R C H _ X 8 6 _ 6 4 ( E M _ X 8 6 _ 6 4 | _ _ A U D I T _ A R C H _ 6 4 B I T | _ _ A U D I T _ A R C H _ L E )
# define _ _ A U D I T _ A R C H _ 6 4 B I T 0 x80 0 0 0 0 0 0
# define _ _ A U D I T _ A R C H _ L E 0 x40 0 0 0 0 0 0
2005-04-16 15:20:36 -07:00
.code64
2011-03-07 19:10:39 +01:00
.section .entry .text , " ax"
2008-10-06 19:06:12 -04:00
# ifdef C O N F I G _ F U N C T I O N _ T R A C E R
2008-05-12 21:20:43 +02:00
# ifdef C O N F I G _ D Y N A M I C _ F T R A C E
ENTRY( m c o u n t )
retq
END( m c o u n t )
ENTRY( f t r a c e _ c a l l e r )
2008-11-05 16:05:44 -05:00
cmpl $ 0 , f u n c t i o n _ t r a c e _ s t o p
jne f t r a c e _ s t u b
2008-05-12 21:20:43 +02:00
2008-12-13 00:09:08 +03:00
MCOUNT_ S A V E _ F R A M E
2008-05-12 21:20:43 +02:00
movq 0 x38 ( % r s p ) , % r d i
movq 8 ( % r b p ) , % r s i
2008-06-21 23:47:27 +05:30
subq $ M C O U N T _ I N S N _ S I Z E , % r d i
2008-05-12 21:20:43 +02:00
2009-02-23 22:57:01 +03:00
GLOBAL( f t r a c e _ c a l l )
2008-05-12 21:20:43 +02:00
call f t r a c e _ s t u b
2008-12-13 00:09:08 +03:00
MCOUNT_ R E S T O R E _ F R A M E
2008-05-12 21:20:43 +02:00
2008-12-02 00:20:39 +01:00
# ifdef C O N F I G _ F U N C T I O N _ G R A P H _ T R A C E R
2009-02-23 22:57:01 +03:00
GLOBAL( f t r a c e _ g r a p h _ c a l l )
2008-12-02 00:20:39 +01:00
jmp f t r a c e _ s t u b
# endif
2008-05-12 21:20:43 +02:00
2009-02-23 22:57:01 +03:00
GLOBAL( f t r a c e _ s t u b )
2008-05-12 21:20:43 +02:00
retq
END( f t r a c e _ c a l l e r )
# else / * ! C O N F I G _ D Y N A M I C _ F T R A C E * /
2008-05-12 21:20:42 +02:00
ENTRY( m c o u n t )
2008-11-05 16:05:44 -05:00
cmpl $ 0 , f u n c t i o n _ t r a c e _ s t o p
jne f t r a c e _ s t u b
2008-05-12 21:20:42 +02:00
cmpq $ f t r a c e _ s t u b , f t r a c e _ t r a c e _ f u n c t i o n
jnz t r a c e
2008-12-02 00:20:39 +01:00
# ifdef C O N F I G _ F U N C T I O N _ G R A P H _ T R A C E R
cmpq $ f t r a c e _ s t u b , f t r a c e _ g r a p h _ r e t u r n
jnz f t r a c e _ g r a p h _ c a l l e r
2008-12-02 23:50:05 -05:00
cmpq $ f t r a c e _ g r a p h _ e n t r y _ s t u b , f t r a c e _ g r a p h _ e n t r y
jnz f t r a c e _ g r a p h _ c a l l e r
2008-12-02 00:20:39 +01:00
# endif
2009-02-23 22:57:01 +03:00
GLOBAL( f t r a c e _ s t u b )
2008-05-12 21:20:42 +02:00
retq
trace :
2008-12-13 00:09:08 +03:00
MCOUNT_ S A V E _ F R A M E
2008-05-12 21:20:42 +02:00
movq 0 x38 ( % r s p ) , % r d i
movq 8 ( % r b p ) , % r s i
2008-06-21 23:47:27 +05:30
subq $ M C O U N T _ I N S N _ S I Z E , % r d i
2008-05-12 21:20:42 +02:00
call * f t r a c e _ t r a c e _ f u n c t i o n
2008-12-13 00:09:08 +03:00
MCOUNT_ R E S T O R E _ F R A M E
2008-05-12 21:20:42 +02:00
jmp f t r a c e _ s t u b
END( m c o u n t )
2008-05-12 21:20:43 +02:00
# endif / * C O N F I G _ D Y N A M I C _ F T R A C E * /
2008-10-06 19:06:12 -04:00
# endif / * C O N F I G _ F U N C T I O N _ T R A C E R * /
2008-05-12 21:20:42 +02:00
2008-12-02 00:20:39 +01:00
# ifdef C O N F I G _ F U N C T I O N _ G R A P H _ T R A C E R
ENTRY( f t r a c e _ g r a p h _ c a l l e r )
cmpl $ 0 , f u n c t i o n _ t r a c e _ s t o p
jne f t r a c e _ s t u b
2008-12-13 00:09:08 +03:00
MCOUNT_ S A V E _ F R A M E
2008-12-02 00:20:39 +01:00
leaq 8 ( % r b p ) , % r d i
movq 0 x38 ( % r s p ) , % r s i
function-graph: add stack frame test
In case gcc does something funny with the stack frames, or the return
from function code, we would like to detect that.
An arch may implement passing of a variable that is unique to the
function and can be saved on entering a function and can be tested
when exiting the function. Usually the frame pointer can be used for
this purpose.
This patch also implements this for x86. Where it passes in the stack
frame of the parent function, and will test that frame on exit.
There was a case in x86_32 with optimize for size (-Os) where, for a
few functions, gcc would align the stack frame and place a copy of the
return address into it. The function graph tracer modified the copy and
not the actual return address. On return from the funtion, it did not go
to the tracer hook, but returned to the parent. This broke the function
graph tracer, because the return of the parent (where gcc did not do
this funky manipulation) returned to the location that the child function
was suppose to. This caused strange kernel crashes.
This test detected the problem and pointed out where the issue was.
This modifies the parameters of one of the functions that the arch
specific code calls, so it includes changes to arch code to accommodate
the new prototype.
Note, I notice that the parsic arch implements its own push_return_trace.
This is now a generic function and the ftrace_push_return_trace should be
used instead. This patch does not touch that code.
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2009-06-18 12:45:08 -04:00
movq ( % r b p ) , % r d x
2008-12-02 15:34:09 -05:00
subq $ M C O U N T _ I N S N _ S I Z E , % r s i
2008-12-02 00:20:39 +01:00
call p r e p a r e _ f t r a c e _ r e t u r n
2008-12-13 00:09:08 +03:00
MCOUNT_ R E S T O R E _ F R A M E
2008-12-02 00:20:39 +01:00
retq
END( f t r a c e _ g r a p h _ c a l l e r )
2009-02-23 22:57:01 +03:00
GLOBAL( r e t u r n _ t o _ h a n d l e r )
2009-07-29 10:58:37 +02:00
subq $ 2 4 , % r s p
2008-12-02 00:20:39 +01:00
2009-03-25 14:30:04 -04:00
/* Save the return values */
2008-05-12 21:20:42 +02:00
movq % r a x , ( % r s p )
2009-03-25 14:30:04 -04:00
movq % r d x , 8 ( % r s p )
function-graph: add stack frame test
In case gcc does something funny with the stack frames, or the return
from function code, we would like to detect that.
An arch may implement passing of a variable that is unique to the
function and can be saved on entering a function and can be tested
when exiting the function. Usually the frame pointer can be used for
this purpose.
This patch also implements this for x86. Where it passes in the stack
frame of the parent function, and will test that frame on exit.
There was a case in x86_32 with optimize for size (-Os) where, for a
few functions, gcc would align the stack frame and place a copy of the
return address into it. The function graph tracer modified the copy and
not the actual return address. On return from the funtion, it did not go
to the tracer hook, but returned to the parent. This broke the function
graph tracer, because the return of the parent (where gcc did not do
this funky manipulation) returned to the location that the child function
was suppose to. This caused strange kernel crashes.
This test detected the problem and pointed out where the issue was.
This modifies the parameters of one of the functions that the arch
specific code calls, so it includes changes to arch code to accommodate
the new prototype.
Note, I notice that the parsic arch implements its own push_return_trace.
This is now a generic function and the ftrace_push_return_trace should be
used instead. This patch does not touch that code.
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2009-06-18 12:45:08 -04:00
movq % r b p , % r d i
2008-05-12 21:20:42 +02:00
2008-12-02 00:20:39 +01:00
call f t r a c e _ r e t u r n _ t o _ h a n d l e r
2008-05-12 21:20:42 +02:00
2009-10-13 16:33:50 -04:00
movq % r a x , % r d i
2009-03-25 14:30:04 -04:00
movq 8 ( % r s p ) , % r d x
2008-05-12 21:20:42 +02:00
movq ( % r s p ) , % r a x
2009-10-13 16:33:50 -04:00
addq $ 2 4 , % r s p
jmp * % r d i
2008-12-02 00:20:39 +01:00
# endif
2008-05-12 21:20:42 +02:00
2005-04-16 15:25:05 -07:00
# ifndef C O N F I G _ P R E E M P T
2005-04-16 15:20:36 -07:00
# define r e t i n t _ k e r n e l r e t i n t _ r e s t o r e _ a r g s
2008-11-16 15:29:00 +01:00
# endif
2006-07-03 00:24:45 -07:00
2008-01-30 13:32:08 +01:00
# ifdef C O N F I G _ P A R A V I R T
2008-06-25 00:19:28 -04:00
ENTRY( n a t i v e _ u s e r g s _ s y s r e t 6 4 )
2008-01-30 13:32:08 +01:00
swapgs
sysretq
2009-02-23 22:57:00 +03:00
ENDPROC( n a t i v e _ u s e r g s _ s y s r e t 6 4 )
2008-01-30 13:32:08 +01:00
# endif / * C O N F I G _ P A R A V I R T * /
2006-07-03 00:24:45 -07:00
.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
# ifdef C O N F I G _ T R A C E _ I R Q F L A G S
bt $ 9 ,E F L A G S - \ o f f s e t ( % r s p ) / * i n t e r r u p t s o f f ? * /
jnc 1 f
TRACE_ I R Q S _ O N
1 :
# endif
.endm
2005-04-16 15:20:36 -07:00
/ *
2008-11-16 15:29:00 +01:00
* C c o d e i s n o t s u p p o s e d t o k n o w a b o u t u n d e f i n e d t o p o f s t a c k . E v e r y t i m e
* a C f u n c t i o n w i t h a n p t _ r e g s a r g u m e n t i s c a l l e d f r o m t h e S Y S C A L L b a s e d
2005-04-16 15:20:36 -07:00
* fast p a t h F I X U P _ T O P _ O F _ S T A C K i s n e e d e d .
* RESTORE_ T O P _ O F _ S T A C K s y n c s t h e s y s c a l l s t a t e a f t e r a n y p o s s i b l e p t r e g s
* manipulation.
2008-11-16 15:29:00 +01:00
* /
/* %rsp:at FRAMEEND */
2008-11-21 16:41:55 +01:00
.macro FIXUP_TOP_OF_STACK tmp o f f s e t =0
2009-01-19 00:38:58 +09:00
movq P E R _ C P U _ V A R ( o l d _ r s p ) ,\ t m p
2008-11-21 16:41:55 +01:00
movq \ t m p ,R S P + \ o f f s e t ( % r s p )
movq $ _ _ U S E R _ D S ,S S + \ o f f s e t ( % r s p )
movq $ _ _ U S E R _ C S ,C S + \ o f f s e t ( % r s p )
movq $ - 1 ,R C X + \ o f f s e t ( % r s p )
movq R 1 1 + \ o f f s e t ( % r s p ) ,\ t m p / * g e t e f l a g s * /
movq \ t m p ,E F L A G S + \ o f f s e t ( % r s p )
2005-04-16 15:20:36 -07:00
.endm
2008-11-21 16:41:55 +01:00
.macro RESTORE_TOP_OF_STACK tmp o f f s e t =0
movq R S P + \ o f f s e t ( % r s p ) ,\ t m p
2009-01-19 00:38:58 +09:00
movq \ t m p ,P E R _ C P U _ V A R ( o l d _ r s p )
2008-11-21 16:41:55 +01:00
movq E F L A G S + \ o f f s e t ( % r s p ) ,\ t m p
movq \ t m p ,R 1 1 + \ o f f s e t ( % r s p )
2005-04-16 15:20:36 -07:00
.endm
.macro FAKE_STACK_FRAME child_ r i p
/* push in order ss, rsp, eflags, cs, rip */
2005-07-28 21:15:48 -07:00
xorl % e a x , % e a x
2010-09-02 14:07:16 +01:00
pushq_ c f i $ _ _ K E R N E L _ D S / * s s * /
2005-09-12 18:49:24 +02:00
/*CFI_REL_OFFSET ss,0*/
2010-09-02 14:07:16 +01:00
pushq_ c f i % r a x / * r s p * /
2005-09-12 18:49:24 +02:00
CFI_ R E L _ O F F S E T r s p ,0
2010-09-02 14:07:16 +01:00
pushq_ c f i $ X 8 6 _ E F L A G S _ I F / * e f l a g s - i n t e r r u p t s o n * /
2005-09-12 18:49:24 +02:00
/*CFI_REL_OFFSET rflags,0*/
2010-09-02 14:07:16 +01:00
pushq_ c f i $ _ _ K E R N E L _ C S / * c s * /
2005-09-12 18:49:24 +02:00
/*CFI_REL_OFFSET cs,0*/
2010-09-02 14:07:16 +01:00
pushq_ c f i \ c h i l d _ r i p / * r i p * /
2005-09-12 18:49:24 +02:00
CFI_ R E L _ O F F S E T r i p ,0
2010-09-02 14:07:16 +01:00
pushq_ c f i % r a x / * o r i g r a x * /
2005-04-16 15:20:36 -07:00
.endm
.macro UNFAKE_STACK_FRAME
addq $ 8 * 6 , % r s p
CFI_ A D J U S T _ C F A _ O F F S E T - ( 6 * 8 )
.endm
2008-11-20 14:40:11 +01:00
/ *
* initial f r a m e s t a t e f o r i n t e r r u p t s ( a n d e x c e p t i o n s w i t h o u t e r r o r c o d e )
* /
.macro EMPTY_FRAME start=1 o f f s e t =0
2005-09-12 18:49:24 +02:00
.if \ start
2008-11-20 14:40:11 +01:00
CFI_ S T A R T P R O C s i m p l e
2006-09-26 10:52:41 +02:00
CFI_ S I G N A L _ F R A M E
2008-11-20 14:40:11 +01:00
CFI_ D E F _ C F A r s p ,8 + \ o f f s e t
2005-09-12 18:49:24 +02:00
.else
2008-11-20 14:40:11 +01:00
CFI_ D E F _ C F A _ O F F S E T 8 + \ o f f s e t
2005-09-12 18:49:24 +02:00
.endif
2005-04-16 15:20:36 -07:00
.endm
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
/ *
2008-11-20 14:40:11 +01:00
* initial f r a m e s t a t e f o r i n t e r r u p t s ( a n d e x c e p t i o n s w i t h o u t e r r o r c o d e )
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
* /
2008-11-20 14:40:11 +01:00
.macro INTR_FRAME start=1 o f f s e t =0
2008-11-21 15:11:32 +01:00
EMPTY_ F R A M E \ s t a r t , S S + 8 + \ o f f s e t - R I P
/*CFI_REL_OFFSET ss, SS+\offset-RIP*/
CFI_ R E L _ O F F S E T r s p , R S P + \ o f f s e t - R I P
/*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
/*CFI_REL_OFFSET cs, CS+\offset-RIP*/
CFI_ R E L _ O F F S E T r i p , R I P + \ o f f s e t - R I P
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
.endm
/ *
* initial f r a m e s t a t e f o r e x c e p t i o n s w i t h e r r o r c o d e ( a n d i n t e r r u p t s
* with v e c t o r a l r e a d y p u s h e d )
* /
2008-11-20 14:40:11 +01:00
.macro XCPT_FRAME start=1 o f f s e t =0
2008-11-21 15:11:32 +01:00
INTR_ F R A M E \ s t a r t , R I P + \ o f f s e t - O R I G _ R A X
2008-11-20 14:40:11 +01:00
/*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
.endm
/ *
* frame t h a t e n a b l e s c a l l i n g i n t o C .
* /
.macro PARTIAL_FRAME start=1 o f f s e t =0
2008-11-21 15:11:32 +01:00
XCPT_ F R A M E \ s t a r t , O R I G _ R A X + \ o f f s e t - A R G O F F S E T
CFI_ R E L _ O F F S E T r d i , R D I + \ o f f s e t - A R G O F F S E T
CFI_ R E L _ O F F S E T r s i , R S I + \ o f f s e t - A R G O F F S E T
CFI_ R E L _ O F F S E T r d x , R D X + \ o f f s e t - A R G O F F S E T
CFI_ R E L _ O F F S E T r c x , R C X + \ o f f s e t - A R G O F F S E T
CFI_ R E L _ O F F S E T r a x , R A X + \ o f f s e t - A R G O F F S E T
CFI_ R E L _ O F F S E T r8 , R 8 + \ o f f s e t - A R G O F F S E T
CFI_ R E L _ O F F S E T r9 , R 9 + \ o f f s e t - A R G O F F S E T
CFI_ R E L _ O F F S E T r10 , R 1 0 + \ o f f s e t - A R G O F F S E T
CFI_ R E L _ O F F S E T r11 , R 1 1 + \ o f f s e t - A R G O F F S E T
2008-11-20 14:40:11 +01:00
.endm
/ *
* frame t h a t e n a b l e s p a s s i n g a c o m p l e t e p t _ r e g s t o a C f u n c t i o n .
* /
.macro DEFAULT_FRAME start=1 o f f s e t =0
2008-11-21 15:11:32 +01:00
PARTIAL_ F R A M E \ s t a r t , R 1 1 + \ o f f s e t - R 1 5
2008-11-20 14:40:11 +01:00
CFI_ R E L _ O F F S E T r b x , R B X + \ o f f s e t
CFI_ R E L _ O F F S E T r b p , R B P + \ o f f s e t
CFI_ R E L _ O F F S E T r12 , R 1 2 + \ o f f s e t
CFI_ R E L _ O F F S E T r13 , R 1 3 + \ o f f s e t
CFI_ R E L _ O F F S E T r14 , R 1 4 + \ o f f s e t
CFI_ R E L _ O F F S E T r15 , R 1 5 + \ o f f s e t
.endm
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
/* save partial stack frame */
2010-11-18 19:16:55 +09:00
.pushsection .kprobes .text , " ax"
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
ENTRY( s a v e _ a r g s )
XCPT_ F R A M E
cld
2011-01-06 15:22:47 +01:00
/ *
* start f r o m r b p i n p t _ r e g s a n d j u m p o v e r
* return a d d r e s s .
* /
movq_ c f i r d i , R D I + 8 - R B P
movq_ c f i r s i , R S I + 8 - R B P
movq_ c f i r d x , R D X + 8 - R B P
movq_ c f i r c x , R C X + 8 - R B P
movq_ c f i r a x , R A X + 8 - R B P
movq_ c f i r8 , R 8 + 8 - R B P
movq_ c f i r9 , R 9 + 8 - R B P
movq_ c f i r10 , R 1 0 + 8 - R B P
movq_ c f i r11 , R 1 1 + 8 - R B P
leaq - R B P + 8 ( % r s p ) ,% r d i / * a r g 1 f o r h a n d l e r * /
2008-11-21 15:20:47 +01:00
movq_ c f i r b p , 8 / * p u s h % r b p * /
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
leaq 8 ( % r s p ) , % r b p / * m o v % r s p , % e b p * /
testl $ 3 , C S ( % r d i )
je 1 f
SWAPGS
/ *
2009-01-19 00:38:58 +09:00
* irq_ c o u n t i s u s e d t o c h e c k i f a C P U i s a l r e a d y o n a n i n t e r r u p t s t a c k
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
* or n o t . W h i l e t h i s i s e s s e n t i a l l y r e d u n d a n t w i t h p r e e m p t _ c o u n t i t i s
* a l i t t l e c h e a p e r t o u s e a s e p a r a t e c o u n t e r i n t h e P D A ( s h o r t o f
* moving i r q _ e n t e r i n t o a s s e m b l y , w h i c h w o u l d b e t o o m u c h w o r k )
* /
2009-01-19 00:38:58 +09:00
1 : incl P E R _ C P U _ V A R ( i r q _ c o u n t )
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
jne 2 f
2008-11-21 15:20:47 +01:00
popq_ c f i % r a x / * m o v e r e t u r n a d d r e s s . . . * /
2009-01-19 00:38:58 +09:00
mov P E R _ C P U _ V A R ( i r q _ s t a c k _ p t r ) ,% r s p
2008-11-20 14:40:11 +01:00
EMPTY_ F R A M E 0
2009-01-30 10:50:54 -06:00
pushq_ c f i % r b p / * b a c k l i n k f o r u n w i n d e r * /
2008-11-21 15:20:47 +01:00
pushq_ c f i % r a x / * . . . t o t h e n e w s t a c k * /
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
/ *
* We e n t e r e d a n i n t e r r u p t c o n t e x t - i r q s a r e o f f :
* /
2 : TRACE_ I R Q S _ O F F
ret
CFI_ E N D P R O C
END( s a v e _ a r g s )
2010-11-18 19:16:55 +09:00
.popsection
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
2008-11-21 16:41:55 +01:00
ENTRY( s a v e _ r e s t )
PARTIAL_ F R A M E 1 R E S T _ S K I P + 8
movq 5 * 8 + 1 6 ( % r s p ) , % r11 / * s a v e r e t u r n a d d r e s s * /
movq_ c f i r b x , R B X + 1 6
movq_ c f i r b p , R B P + 1 6
movq_ c f i r12 , R 1 2 + 1 6
movq_ c f i r13 , R 1 3 + 1 6
movq_ c f i r14 , R 1 4 + 1 6
movq_ c f i r15 , R 1 5 + 1 6
movq % r11 , 8 ( % r s p ) / * r e t u r n a d d r e s s * /
FIXUP_ T O P _ O F _ S T A C K % r11 , 1 6
ret
CFI_ E N D P R O C
END( s a v e _ r e s t )
2008-11-21 16:43:18 +01:00
/* save complete stack frame */
2009-03-12 10:38:55 +00:00
.pushsection .kprobes .text , " ax"
2008-11-21 16:43:18 +01:00
ENTRY( s a v e _ p a r a n o i d )
XCPT_ F R A M E 1 R D I + 8
cld
movq_ c f i r d i , R D I + 8
movq_ c f i r s i , R S I + 8
movq_ c f i r d x , R D X + 8
movq_ c f i r c x , R C X + 8
movq_ c f i r a x , R A X + 8
movq_ c f i r8 , R 8 + 8
movq_ c f i r9 , R 9 + 8
movq_ c f i r10 , R 1 0 + 8
movq_ c f i r11 , R 1 1 + 8
movq_ c f i r b x , R B X + 8
movq_ c f i r b p , R B P + 8
movq_ c f i r12 , R 1 2 + 8
movq_ c f i r13 , R 1 3 + 8
movq_ c f i r14 , R 1 4 + 8
movq_ c f i r15 , R 1 5 + 8
movl $ 1 ,% e b x
movl $ M S R _ G S _ B A S E ,% e c x
rdmsr
testl % e d x ,% e d x
js 1 f / * n e g a t i v e - > i n k e r n e l * /
SWAPGS
xorl % e b x ,% e b x
1 : ret
CFI_ E N D P R O C
END( s a v e _ p a r a n o i d )
2009-03-12 10:38:55 +00:00
.popsection
2008-11-21 16:43:18 +01:00
2005-04-16 15:20:36 -07:00
/ *
2008-11-27 14:41:21 +01:00
* A n e w l y f o r k e d p r o c e s s d i r e c t l y c o n t e x t s w i t c h e s i n t o t h i s a d d r e s s .
*
* rdi : prev t a s k w e s w i t c h e d f r o m
2008-11-16 15:29:00 +01:00
* /
2005-04-16 15:20:36 -07:00
ENTRY( r e t _ f r o m _ f o r k )
2008-11-20 14:40:11 +01:00
DEFAULT_ F R A M E
2008-11-27 14:41:21 +01:00
2009-01-10 23:00:22 -05:00
LOCK ; btr $TIF_FORK,TI_flags(%r8)
2010-09-02 14:07:16 +01:00
pushq_ c f i k e r n e l _ e f l a g s ( % r i p )
popfq_ c f i # r e s e t k e r n e l e f l a g s
2008-11-27 14:41:21 +01:00
call s c h e d u l e _ t a i l # r d i : ' p r e v ' t a s k p a r a m e t e r
2005-04-16 15:20:36 -07:00
GET_ T H R E A D _ I N F O ( % r c x )
2008-11-27 14:41:21 +01:00
2005-04-16 15:20:36 -07:00
RESTORE_ R E S T
2008-11-27 14:41:21 +01:00
testl $ 3 , C S - A R G O F F S E T ( % r s p ) # f r o m k e r n e l _ t h r e a d ?
2005-04-16 15:20:36 -07:00
je i n t _ r e t _ f r o m _ s y s _ c a l l
2008-11-27 14:41:21 +01:00
testl $ _ T I F _ I A 3 2 , T I _ f l a g s ( % r c x ) # 32 - b i t c o m p a t t a s k n e e d s I R E T
2005-04-16 15:20:36 -07:00
jnz i n t _ r e t _ f r o m _ s y s _ c a l l
2008-11-27 14:41:21 +01:00
2008-11-21 16:41:55 +01:00
RESTORE_ T O P _ O F _ S T A C K % r d i , - A R G O F F S E T
2008-11-27 14:41:21 +01:00
jmp r e t _ f r o m _ s y s _ c a l l # g o t o t h e S Y S R E T f a s t p a t h
2005-04-16 15:20:36 -07:00
CFI_ E N D P R O C
2006-06-26 13:56:55 +02:00
END( r e t _ f r o m _ f o r k )
2005-04-16 15:20:36 -07:00
/ *
2011-03-17 16:24:16 -03:00
* System c a l l e n t r y . U p t o 6 a r g u m e n t s i n r e g i s t e r s a r e s u p p o r t e d .
2005-04-16 15:20:36 -07:00
*
* SYSCALL d o e s n o t s a v e a n y t h i n g o n t h e s t a c k a n d d o e s n o t c h a n g e t h e
* stack p o i n t e r .
* /
2008-11-16 15:29:00 +01:00
2005-04-16 15:20:36 -07:00
/ *
2008-11-16 15:29:00 +01:00
* Register s e t u p :
2005-04-16 15:20:36 -07:00
* rax s y s t e m c a l l n u m b e r
* rdi a r g 0
2008-11-16 15:29:00 +01:00
* rcx r e t u r n a d d r e s s f o r s y s c a l l / s y s r e t , C a r g 3
2005-04-16 15:20:36 -07:00
* rsi a r g 1
2008-11-16 15:29:00 +01:00
* rdx a r g 2
2005-04-16 15:20:36 -07:00
* r1 0 a r g 3 ( - - > m o v e d t o r c x f o r C )
* r8 a r g 4
* r9 a r g 5
* r1 1 e f l a g s f o r s y s c a l l / s y s r e t , t e m p o r a r y f o r C
2008-11-16 15:29:00 +01:00
* r1 2 - r15 ,r b p ,r b x s a v e d b y C c o d e , n o t t o u c h e d .
*
2005-04-16 15:20:36 -07:00
* Interrupts a r e o f f o n e n t r y .
* Only c a l l e d f r o m u s e r s p a c e .
*
* XXX i f w e h a d a f r e e s c r a t c h r e g i s t e r w e c o u l d s a v e t h e R S P i n t o t h e s t a c k f r a m e
* and r e p o r t i t p r o p e r l y i n p s . U n f o r t u n a t e l y w e h a v e n ' t .
2006-04-07 19:50:00 +02:00
*
* When u s e r c a n c h a n g e t h e f r a m e s a l w a y s f o r c e I R E T . T h a t i s b e c a u s e
* it d e a l s w i t h u n c a n o n i c a l a d d r e s s e s b e t t e r . S Y S R E T h a s t r o u b l e
* with t h e m d u e t o b u g s i n b o t h A M D a n d I n t e l C P U s .
2008-11-16 15:29:00 +01:00
* /
2005-04-16 15:20:36 -07:00
ENTRY( s y s t e m _ c a l l )
2005-09-12 18:49:24 +02:00
CFI_ S T A R T P R O C s i m p l e
2006-09-26 10:52:41 +02:00
CFI_ S I G N A L _ F R A M E
2009-01-19 00:38:58 +09:00
CFI_ D E F _ C F A r s p ,K E R N E L _ S T A C K _ O F F S E T
2005-09-12 18:49:24 +02:00
CFI_ R E G I S T E R r i p ,r c x
/*CFI_REGISTER rflags,r11*/
2008-01-30 13:32:08 +01:00
SWAPGS_ U N S A F E _ S T A C K
/ *
* A h y p e r v i s o r i m p l e m e n t a t i o n m i g h t w a n t t o u s e a l a b e l
* after t h e s w a p g s , s o t h a t i t c a n d o t h e s w a p g s
* for t h e g u e s t a n d j u m p h e r e o n s y s c a l l .
* /
ENTRY( s y s t e m _ c a l l _ a f t e r _ s w a p g s )
2009-01-19 00:38:58 +09:00
movq % r s p ,P E R _ C P U _ V A R ( o l d _ r s p )
2009-01-19 00:38:58 +09:00
movq P E R _ C P U _ V A R ( k e r n e l _ s t a c k ) ,% r s p
2006-07-03 00:24:45 -07:00
/ *
* No n e e d t o f o l l o w t h i s i r q s o f f / o n s e c t i o n - i t ' s s t r a i g h t
* and s h o r t :
* /
2008-01-30 13:32:08 +01:00
ENABLE_ I N T E R R U P T S ( C L B R _ N O N E )
2005-04-16 15:20:36 -07:00
SAVE_ A R G S 8 ,1
2008-11-16 15:29:00 +01:00
movq % r a x ,O R I G _ R A X - A R G O F F S E T ( % r s p )
2005-09-12 18:49:24 +02:00
movq % r c x ,R I P - A R G O F F S E T ( % r s p )
CFI_ R E L _ O F F S E T r i p ,R I P - A R G O F F S E T
2005-04-16 15:20:36 -07:00
GET_ T H R E A D _ I N F O ( % r c x )
2008-07-09 02:38:07 -07:00
testl $ _ T I F _ W O R K _ S Y S C A L L _ E N T R Y ,T I _ f l a g s ( % r c x )
2005-04-16 15:20:36 -07:00
jnz t r a c e s y s
2008-06-23 15:37:04 -07:00
system_call_fastpath :
2005-04-16 15:20:36 -07:00
cmpq $ _ _ N R _ s y s c a l l _ m a x ,% r a x
ja b a d s y s
movq % r10 ,% r c x
call * s y s _ c a l l _ t a b l e ( ,% r a x ,8 ) # X X X : r i p r e l a t i v e
movq % r a x ,R A X - A R G O F F S E T ( % r s p )
/ *
* Syscall r e t u r n p a t h e n d i n g w i t h S Y S R E T ( f a s t p a t h )
2008-11-16 15:29:00 +01:00
* Has i n c o m p l e t e s t a c k f r a m e a n d u n d e f i n e d t o p o f s t a c k .
* /
2005-04-16 15:20:36 -07:00
ret_from_sys_call :
2005-04-16 15:25:02 -07:00
movl $ _ T I F _ A L L W O R K _ M A S K ,% e d i
2005-04-16 15:20:36 -07:00
/* edi: flagmask */
2008-11-16 15:29:00 +01:00
sysret_check :
2007-10-11 22:11:12 +02:00
LOCKDEP_ S Y S _ E X I T
2005-04-16 15:20:36 -07:00
GET_ T H R E A D _ I N F O ( % r c x )
2008-01-30 13:32:08 +01:00
DISABLE_ I N T E R R U P T S ( C L B R _ N O N E )
2006-07-03 00:24:45 -07:00
TRACE_ I R Q S _ O F F
2008-06-24 11:19:35 -03:00
movl T I _ f l a g s ( % r c x ) ,% e d x
2005-04-16 15:20:36 -07:00
andl % e d i ,% e d x
2008-11-16 15:29:00 +01:00
jnz s y s r e t _ c a r e f u l
2006-12-07 02:14:02 +01:00
CFI_ R E M E M B E R _ S T A T E
2006-07-03 00:24:45 -07:00
/ *
* sysretq w i l l r e - e n a b l e i n t e r r u p t s :
* /
TRACE_ I R Q S _ O N
2005-04-16 15:20:36 -07:00
movq R I P - A R G O F F S E T ( % r s p ) ,% r c x
2005-09-12 18:49:24 +02:00
CFI_ R E G I S T E R r i p ,r c x
2005-04-16 15:20:36 -07:00
RESTORE_ A R G S 0 ,- A R G _ S K I P ,1
2005-09-12 18:49:24 +02:00
/*CFI_REGISTER rflags,r11*/
2009-01-19 00:38:58 +09:00
movq P E R _ C P U _ V A R ( o l d _ r s p ) , % r s p
2008-06-25 00:19:28 -04:00
USERGS_ S Y S R E T 6 4
2005-04-16 15:20:36 -07:00
2006-12-07 02:14:02 +01:00
CFI_ R E S T O R E _ S T A T E
2005-04-16 15:20:36 -07:00
/* Handle reschedules */
2008-11-16 15:29:00 +01:00
/* edx: work, edi: workmask */
2005-04-16 15:20:36 -07:00
sysret_careful :
bt $ T I F _ N E E D _ R E S C H E D ,% e d x
jnc s y s r e t _ s i g n a l
2006-07-03 00:24:45 -07:00
TRACE_ I R Q S _ O N
2008-01-30 13:32:08 +01:00
ENABLE_ I N T E R R U P T S ( C L B R _ N O N E )
2010-09-02 14:07:16 +01:00
pushq_ c f i % r d i
2005-04-16 15:20:36 -07:00
call s c h e d u l e
2010-09-02 14:07:16 +01:00
popq_ c f i % r d i
2005-04-16 15:20:36 -07:00
jmp s y s r e t _ c h e c k
2008-11-16 15:29:00 +01:00
/* Handle a signal */
2005-04-16 15:20:36 -07:00
sysret_signal :
2006-07-03 00:24:45 -07:00
TRACE_ I R Q S _ O N
2008-01-30 13:32:08 +01:00
ENABLE_ I N T E R R U P T S ( C L B R _ N O N E )
2008-06-23 15:37:04 -07:00
# ifdef C O N F I G _ A U D I T S Y S C A L L
bt $ T I F _ S Y S C A L L _ A U D I T ,% e d x
jc s y s r e t _ a u d i t
# endif
2009-09-22 16:46:34 -07:00
/ *
* We h a v e a s i g n a l , o r e x i t t r a c i n g o r s i n g l e - s t e p .
* These a l l w i n d u p w i t h t h e i r e t r e t u r n p a t h a n y w a y ,
* so j u s t j o i n t h a t p a t h r i g h t n o w .
* /
FIXUP_ T O P _ O F _ S T A C K % r11 , - A R G O F F S E T
jmp i n t _ c h e c k _ s y s c a l l _ e x i t _ w o r k
2008-11-16 15:29:00 +01:00
2005-09-12 18:49:24 +02:00
badsys :
movq $ - E N O S Y S ,R A X - A R G O F F S E T ( % r s p )
jmp r e t _ f r o m _ s y s _ c a l l
2008-06-23 15:37:04 -07:00
# ifdef C O N F I G _ A U D I T S Y S C A L L
/ *
* Fast p a t h f o r s y s c a l l a u d i t w i t h o u t f u l l s y s c a l l t r a c e .
* We j u s t c a l l a u d i t _ s y s c a l l _ e n t r y ( ) d i r e c t l y , a n d t h e n
* jump b a c k t o t h e n o r m a l f a s t p a t h .
* /
auditsys :
movq % r10 ,% r9 / * 6 t h a r g : 4 t h s y s c a l l a r g * /
movq % r d x ,% r8 / * 5 t h a r g : 3 r d s y s c a l l a r g * /
movq % r s i ,% r c x / * 4 t h a r g : 2 n d s y s c a l l a r g * /
movq % r d i ,% r d x / * 3 r d a r g : 1 s t s y s c a l l a r g * /
movq % r a x ,% r s i / * 2 n d a r g : s y s c a l l n u m b e r * /
movl $ A U D I T _ A R C H _ X 8 6 _ 6 4 ,% e d i / * 1 s t a r g : a u d i t a r c h * /
call a u d i t _ s y s c a l l _ e n t r y
LOAD_ A R G S 0 / * r e l o a d c a l l - c l o b b e r e d r e g i s t e r s * /
jmp s y s t e m _ c a l l _ f a s t p a t h
/ *
* Return f a s t p a t h f o r s y s c a l l a u d i t . C a l l a u d i t _ s y s c a l l _ e x i t ( )
* directly a n d t h e n j u m p b a c k t o t h e f a s t p a t h w i t h T I F _ S Y S C A L L _ A U D I T
* masked o f f .
* /
sysret_audit :
2010-07-21 17:44:12 -07:00
movq R A X - A R G O F F S E T ( % r s p ) ,% r s i / * s e c o n d a r g , s y s c a l l r e t u r n v a l u e * /
cmpq $ 0 ,% r s i / * i s i t < 0 ? * /
2008-06-23 15:37:04 -07:00
setl % a l / * 1 i f s o , 0 i f n o t * /
movzbl % a l ,% e d i / * z e r o - e x t e n d t h a t i n t o % e d i * /
inc % e d i / * f i r s t a r g , 0 - > 1 ( A U D I T S C _ S U C C E S S ) , 1 - > 2 ( A U D I T S C _ F A I L U R E ) * /
call a u d i t _ s y s c a l l _ e x i t
movl $ ( _ T I F _ A L L W O R K _ M A S K & ~ _ T I F _ S Y S C A L L _ A U D I T ) ,% e d i
jmp s y s r e t _ c h e c k
# endif / * C O N F I G _ A U D I T S Y S C A L L * /
2005-04-16 15:20:36 -07:00
/* Do syscall tracing */
2008-11-16 15:29:00 +01:00
tracesys :
2008-06-23 15:37:04 -07:00
# ifdef C O N F I G _ A U D I T S Y S C A L L
testl $ ( _ T I F _ W O R K _ S Y S C A L L _ E N T R Y & ~ _ T I F _ S Y S C A L L _ A U D I T ) ,T I _ f l a g s ( % r c x )
jz a u d i t s y s
# endif
2005-04-16 15:20:36 -07:00
SAVE_ R E S T
2008-03-16 21:59:11 -07:00
movq $ - E N O S Y S ,R A X ( % r s p ) / * p t r a c e c a n c h a n g e t h i s f o r a b a d s y s c a l l * /
2005-04-16 15:20:36 -07:00
FIXUP_ T O P _ O F _ S T A C K % r d i
movq % r s p ,% r d i
call s y s c a l l _ t r a c e _ e n t e r
2008-07-09 02:38:07 -07:00
/ *
* Reload a r g r e g i s t e r s f r o m s t a c k i n c a s e p t r a c e c h a n g e d t h e m .
* We d o n ' t r e l o a d % r a x b e c a u s e s y s c a l l _ t r a c e _ e n t e r ( ) r e t u r n e d
* the v a l u e i t w a n t s u s t o u s e i n t h e t a b l e l o o k u p .
* /
LOAD_ A R G S A R G O F F S E T , 1
2005-04-16 15:20:36 -07:00
RESTORE_ R E S T
cmpq $ _ _ N R _ s y s c a l l _ m a x ,% r a x
2008-03-16 21:59:11 -07:00
ja i n t _ r e t _ f r o m _ s y s _ c a l l / * R A X ( % r s p ) s e t t o - E N O S Y S a b o v e * /
2005-04-16 15:20:36 -07:00
movq % r10 ,% r c x / * f i x u p f o r C * /
call * s y s _ c a l l _ t a b l e ( ,% r a x ,8 )
2008-03-16 21:59:11 -07:00
movq % r a x ,R A X - A R G O F F S E T ( % r s p )
2006-04-07 19:50:00 +02:00
/* Use IRET because user could have changed frame */
2008-11-16 15:29:00 +01:00
/ *
2005-04-16 15:20:36 -07:00
* Syscall r e t u r n p a t h e n d i n g w i t h I R E T .
* Has c o r r e c t t o p o f s t a c k , b u t p a r t i a l s t a c k f r a m e .
2006-12-07 02:14:02 +01:00
* /
2009-02-23 22:57:01 +03:00
GLOBAL( i n t _ r e t _ f r o m _ s y s _ c a l l )
2008-01-30 13:32:08 +01:00
DISABLE_ I N T E R R U P T S ( C L B R _ N O N E )
2006-07-03 00:24:45 -07:00
TRACE_ I R Q S _ O F F
2005-04-16 15:20:36 -07:00
testl $ 3 ,C S - A R G O F F S E T ( % r s p )
je r e t i n t _ r e s t o r e _ a r g s
movl $ _ T I F _ A L L W O R K _ M A S K ,% e d i
/* edi: mask to check */
2009-02-23 22:57:01 +03:00
GLOBAL( i n t _ w i t h _ c h e c k )
2007-10-11 22:11:12 +02:00
LOCKDEP_ S Y S _ E X I T _ I R Q
2005-04-16 15:20:36 -07:00
GET_ T H R E A D _ I N F O ( % r c x )
2008-06-24 11:19:35 -03:00
movl T I _ f l a g s ( % r c x ) ,% e d x
2005-04-16 15:20:36 -07:00
andl % e d i ,% e d x
jnz i n t _ c a r e f u l
2008-06-24 11:19:35 -03:00
andl $ ~ T S _ C O M P A T ,T I _ s t a t u s ( % r c x )
2005-04-16 15:20:36 -07:00
jmp r e t i n t _ s w a p g s
/* Either reschedule or signal or syscall exit tracking needed. */
/* First do a reschedule test. */
/* edx: work, edi: workmask */
int_careful :
bt $ T I F _ N E E D _ R E S C H E D ,% e d x
jnc i n t _ v e r y _ c a r e f u l
2006-07-03 00:24:45 -07:00
TRACE_ I R Q S _ O N
2008-01-30 13:32:08 +01:00
ENABLE_ I N T E R R U P T S ( C L B R _ N O N E )
2010-09-02 14:07:16 +01:00
pushq_ c f i % r d i
2005-04-16 15:20:36 -07:00
call s c h e d u l e
2010-09-02 14:07:16 +01:00
popq_ c f i % r d i
2008-01-30 13:32:08 +01:00
DISABLE_ I N T E R R U P T S ( C L B R _ N O N E )
2006-07-03 00:24:45 -07:00
TRACE_ I R Q S _ O F F
2005-04-16 15:20:36 -07:00
jmp i n t _ w i t h _ c h e c k
/* handle signals and tracing -- both require a full stack frame */
int_very_careful :
2006-07-03 00:24:45 -07:00
TRACE_ I R Q S _ O N
2008-01-30 13:32:08 +01:00
ENABLE_ I N T E R R U P T S ( C L B R _ N O N E )
2009-09-22 16:46:34 -07:00
int_check_syscall_exit_work :
2005-04-16 15:20:36 -07:00
SAVE_ R E S T
2008-11-16 15:29:00 +01:00
/* Check for syscall exit trace */
2008-07-09 02:38:07 -07:00
testl $ _ T I F _ W O R K _ S Y S C A L L _ E X I T ,% e d x
2005-04-16 15:20:36 -07:00
jz i n t _ s i g n a l
2010-09-02 14:07:16 +01:00
pushq_ c f i % r d i
2008-11-16 15:29:00 +01:00
leaq 8 ( % r s p ) ,% r d i # & p t r e g s - > a r g 1
2005-04-16 15:20:36 -07:00
call s y s c a l l _ t r a c e _ l e a v e
2010-09-02 14:07:16 +01:00
popq_ c f i % r d i
2008-07-09 02:38:07 -07:00
andl $ ~ ( _ T I F _ W O R K _ S Y S C A L L _ E X I T | _ T I F _ S Y S C A L L _ E M U ) ,% e d i
2005-04-16 15:20:36 -07:00
jmp i n t _ r e s t o r e _ r e s t
2008-11-16 15:29:00 +01:00
2005-04-16 15:20:36 -07:00
int_signal :
2008-01-25 21:08:29 +01:00
testl $ _ T I F _ D O _ N O T I F Y _ M A S K ,% e d x
2005-04-16 15:20:36 -07:00
jz 1 f
movq % r s p ,% r d i # & p t r e g s - > a r g 1
xorl % e s i ,% e s i # o l d s e t - > a r g 2
call d o _ n o t i f y _ r e s u m e
x86_64: fix delayed signals
On three of the several paths in entry_64.S that call
do_notify_resume() on the way back to user mode, we fail to properly
check again for newly-arrived work that requires another call to
do_notify_resume() before going to user mode. These paths set the
mask to check only _TIF_NEED_RESCHED, but this is wrong. The other
paths that lead to do_notify_resume() do this correctly already, and
entry_32.S does it correctly in all cases.
All paths back to user mode have to check all the _TIF_WORK_MASK
flags at the last possible stage, with interrupts disabled.
Otherwise, we miss any flags (TIF_SIGPENDING for example) that were
set any time after we entered do_notify_resume(). More work flags
can be set (or left set) synchronously inside do_notify_resume(), as
TIF_SIGPENDING can be, or asynchronously by interrupts or other CPUs
(which then send an asynchronous interrupt).
There are many different scenarios that could hit this bug, most of
them races. The simplest one to demonstrate does not require any
race: when one signal has done handler setup at the check before
returning from a syscall, and there is another signal pending that
should be handled. The second signal's handler should interrupt the
first signal handler before it actually starts (so the interrupted PC
is still at the handler's entry point). Instead, it runs away until
the next kernel entry (next syscall, tick, etc).
This test behaves correctly on 32-bit kernels, and fails on 64-bit
(either 32-bit or 64-bit test binary). With this fix, it works.
#define _GNU_SOURCE
#include <stdio.h>
#include <signal.h>
#include <string.h>
#include <sys/ucontext.h>
#ifndef REG_RIP
#define REG_RIP REG_EIP
#endif
static sig_atomic_t hit1, hit2;
static void
handler (int sig, siginfo_t *info, void *ctx)
{
ucontext_t *uc = ctx;
if ((void *) uc->uc_mcontext.gregs[REG_RIP] == &handler)
{
if (sig == SIGUSR1)
hit1 = 1;
else
hit2 = 1;
}
printf ("%s at %#lx\n", strsignal (sig),
uc->uc_mcontext.gregs[REG_RIP]);
}
int
main (void)
{
struct sigaction sa;
sigset_t set;
sigemptyset (&sa.sa_mask);
sa.sa_flags = SA_SIGINFO;
sa.sa_sigaction = &handler;
if (sigaction (SIGUSR1, &sa, NULL)
|| sigaction (SIGUSR2, &sa, NULL))
return 2;
sigemptyset (&set);
sigaddset (&set, SIGUSR1);
sigaddset (&set, SIGUSR2);
if (sigprocmask (SIG_BLOCK, &set, NULL))
return 3;
printf ("main at %p, handler at %p\n", &main, &handler);
raise (SIGUSR1);
raise (SIGUSR2);
if (sigprocmask (SIG_UNBLOCK, &set, NULL))
return 4;
if (hit1 + hit2 == 1)
{
puts ("PASS");
return 0;
}
puts ("FAIL");
return 1;
}
Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:50:39 -07:00
1 : movl $ _ T I F _ W O R K _ M A S K ,% e d i
2005-04-16 15:20:36 -07:00
int_restore_rest :
RESTORE_ R E S T
2008-01-30 13:32:08 +01:00
DISABLE_ I N T E R R U P T S ( C L B R _ N O N E )
2006-07-03 00:24:45 -07:00
TRACE_ I R Q S _ O F F
2005-04-16 15:20:36 -07:00
jmp i n t _ w i t h _ c h e c k
CFI_ E N D P R O C
2006-12-07 02:14:02 +01:00
END( s y s t e m _ c a l l )
2008-11-16 15:29:00 +01:00
/ *
2005-04-16 15:20:36 -07:00
* Certain s p e c i a l s y s t e m c a l l s t h a t n e e d t o s a v e a c o m p l e t e f u l l s t a c k f r a m e .
2008-11-16 15:29:00 +01:00
* /
2005-04-16 15:20:36 -07:00
.macro PTREGSCALL label,f u n c ,a r g
2008-11-21 16:41:55 +01:00
ENTRY( \ l a b e l )
PARTIAL_ F R A M E 1 8 / * o f f s e t 8 : r e t u r n a d d r e s s * /
subq $ R E S T _ S K I P , % r s p
CFI_ A D J U S T _ C F A _ O F F S E T R E S T _ S K I P
call s a v e _ r e s t
DEFAULT_ F R A M E 0 8 / * o f f s e t 8 : r e t u r n a d d r e s s * /
leaq 8 ( % r s p ) , \ a r g / * p t _ r e g s p o i n t e r * /
call \ f u n c
jmp p t r e g s c a l l _ c o m m o n
CFI_ E N D P R O C
2006-06-26 13:56:55 +02:00
END( \ l a b e l )
2005-04-16 15:20:36 -07:00
.endm
PTREGSCALL s t u b _ c l o n e , s y s _ c l o n e , % r8
PTREGSCALL s t u b _ f o r k , s y s _ f o r k , % r d i
PTREGSCALL s t u b _ v f o r k , s y s _ v f o r k , % r d i
PTREGSCALL s t u b _ s i g a l t s t a c k , s y s _ s i g a l t s t a c k , % r d x
PTREGSCALL s t u b _ i o p l , s y s _ i o p l , % r s i
ENTRY( p t r e g s c a l l _ c o m m o n )
2008-11-21 16:41:55 +01:00
DEFAULT_ F R A M E 1 8 / * o f f s e t 8 : r e t u r n a d d r e s s * /
RESTORE_ T O P _ O F _ S T A C K % r11 , 8
movq_ c f i _ r e s t o r e R 1 5 + 8 , r15
movq_ c f i _ r e s t o r e R 1 4 + 8 , r14
movq_ c f i _ r e s t o r e R 1 3 + 8 , r13
movq_ c f i _ r e s t o r e R 1 2 + 8 , r12
movq_ c f i _ r e s t o r e R B P + 8 , r b p
movq_ c f i _ r e s t o r e R B X + 8 , r b x
ret $ R E S T _ S K I P / * p o p e x t e n d e d r e g i s t e r s * /
2005-04-16 15:20:36 -07:00
CFI_ E N D P R O C
2006-06-26 13:56:55 +02:00
END( p t r e g s c a l l _ c o m m o n )
2008-11-16 15:29:00 +01:00
2005-04-16 15:20:36 -07:00
ENTRY( s t u b _ e x e c v e )
CFI_ S T A R T P R O C
2010-09-02 13:52:45 +01:00
addq $ 8 , % r s p
PARTIAL_ F R A M E 0
2005-04-16 15:20:36 -07:00
SAVE_ R E S T
FIXUP_ T O P _ O F _ S T A C K % r11
2008-02-26 12:55:57 +01:00
movq % r s p , % r c x
2005-04-16 15:20:36 -07:00
call s y s _ e x e c v e
RESTORE_ T O P _ O F _ S T A C K % r11
movq % r a x ,R A X ( % r s p )
RESTORE_ R E S T
jmp i n t _ r e t _ f r o m _ s y s _ c a l l
CFI_ E N D P R O C
2006-06-26 13:56:55 +02:00
END( s t u b _ e x e c v e )
2008-11-16 15:29:00 +01:00
2005-04-16 15:20:36 -07:00
/ *
* sigreturn i s s p e c i a l b e c a u s e i t n e e d s t o r e s t o r e a l l r e g i s t e r s o n r e t u r n .
* This c a n n o t b e d o n e w i t h S Y S R E T , s o u s e t h e I R E T r e t u r n p a t h i n s t e a d .
2008-11-16 15:29:00 +01:00
* /
2005-04-16 15:20:36 -07:00
ENTRY( s t u b _ r t _ s i g r e t u r n )
CFI_ S T A R T P R O C
2005-09-12 18:49:24 +02:00
addq $ 8 , % r s p
2010-09-02 13:52:45 +01:00
PARTIAL_ F R A M E 0
2005-04-16 15:20:36 -07:00
SAVE_ R E S T
movq % r s p ,% r d i
FIXUP_ T O P _ O F _ S T A C K % r11
call s y s _ r t _ s i g r e t u r n
movq % r a x ,R A X ( % r s p ) # f i x m e , t h i s c o u l d b e d o n e a t t h e h i g h e r l a y e r
RESTORE_ R E S T
jmp i n t _ r e t _ f r o m _ s y s _ c a l l
CFI_ E N D P R O C
2006-06-26 13:56:55 +02:00
END( s t u b _ r t _ s i g r e t u r n )
2005-04-16 15:20:36 -07:00
2008-11-11 13:51:52 -08:00
/ *
* Build t h e e n t r y s t u b s a n d p o i n t e r t a b l e w i t h s o m e a s s e m b l e r m a g i c .
* We p a c k 7 s t u b s i n t o a s i n g l e 3 2 - b y t e c h u n k , w h i c h w i l l f i t i n a
* single c a c h e l i n e o n a l l m o d e r n x86 i m p l e m e n t a t i o n s .
* /
.section .init .rodata , " a"
ENTRY( i n t e r r u p t )
2011-03-07 19:10:39 +01:00
.section .entry .text
2008-11-11 13:51:52 -08:00
.p2align 5
.p2align CONFIG_X86_L1_CACHE_SHIFT
ENTRY( i r q _ e n t r i e s _ s t a r t )
INTR_ F R A M E
vector=FIRST_EXTERNAL_VECTOR
.rept ( NR_ V E C T O R S - F I R S T _ E X T E R N A L _ V E C T O R + 6 ) / 7
.balign 32
.rept 7
.if vector < NR_ V E C T O R S
2008-11-12 10:27:35 -08:00
.if vector < > FIRST_ E X T E R N A L _ V E C T O R
2008-11-11 13:51:52 -08:00
CFI_ A D J U S T _ C F A _ O F F S E T - 8
.endif
2010-09-02 14:07:16 +01:00
1 : pushq_ c f i $ ( ~ v e c t o r + 0 x80 ) / * N o t e : a l w a y s i n s i g n e d b y t e r a n g e * /
2008-11-12 10:27:35 -08:00
.if ( ( vector- F I R S T _ E X T E R N A L _ V E C T O R ) % 7 ) < > 6
2008-11-11 13:51:52 -08:00
jmp 2 f
.endif
.previous
.quad 1b
2011-03-07 19:10:39 +01:00
.section .entry .text
2008-11-11 13:51:52 -08:00
vector=vector + 1
.endif
.endr
2 : jmp c o m m o n _ i n t e r r u p t
.endr
CFI_ E N D P R O C
END( i r q _ e n t r i e s _ s t a r t )
.previous
END( i n t e r r u p t )
.previous
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
/ *
2005-04-16 15:20:36 -07:00
* Interrupt e n t r y / e x i t .
*
* Interrupt e n t r y p o i n t s s a v e o n l y c a l l e e c l o b b e r e d r e g i s t e r s i n f a s t p a t h .
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
*
* Entry r u n s w i t h i n t e r r u p t s o f f .
* /
2005-04-16 15:20:36 -07:00
2008-11-13 13:50:20 +01:00
/* 0(%rsp): ~(interrupt number) */
2005-04-16 15:20:36 -07:00
.macro interrupt func
2011-01-06 15:22:47 +01:00
/* reserve pt_regs for scratch regs and rbp */
subq $ O R I G _ R A X - R B P , % r s p
CFI_ A D J U S T _ C F A _ O F F S E T O R I G _ R A X - R B P
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
call s a v e _ a r g s
2008-11-20 14:40:11 +01:00
PARTIAL_ F R A M E 0
2005-04-16 15:20:36 -07:00
call \ f u n c
.endm
2009-08-27 13:23:25 -04:00
/ *
* Interrupt e n t r y / e x i t s h o u l d b e p r o t e c t e d a g a i n s t k p r o b e s
* /
.pushsection .kprobes .text , " ax"
2008-11-13 13:50:20 +01:00
/ *
* The i n t e r r u p t s t u b s p u s h ( ~ v e c t o r + 0 x80 ) o n t o t h e s t a c k a n d
* then j u m p t o c o m m o n _ i n t e r r u p t .
* /
2008-11-11 13:51:52 -08:00
.p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt :
2005-09-12 18:49:24 +02:00
XCPT_ F R A M E
2008-11-13 13:50:20 +01:00
addq $ - 0 x80 ,( % r s p ) / * A d j u s t v e c t o r t o [ - 2 5 6 ,- 1 ] r a n g e * /
2005-04-16 15:20:36 -07:00
interrupt d o _ I R Q
2009-01-19 00:38:58 +09:00
/* 0(%rsp): old_rsp-ARGOFFSET */
2005-09-12 18:49:24 +02:00
ret_from_intr :
2008-01-30 13:32:08 +01:00
DISABLE_ I N T E R R U P T S ( C L B R _ N O N E )
2006-07-03 00:24:45 -07:00
TRACE_ I R Q S _ O F F
2009-01-19 00:38:58 +09:00
decl P E R _ C P U _ V A R ( i r q _ c o u n t )
2006-06-26 13:57:35 +02:00
leaveq
2011-01-06 15:22:47 +01:00
2010-09-02 14:07:16 +01:00
CFI_ R E S T O R E r b p
2005-09-12 18:49:24 +02:00
CFI_ D E F _ C F A _ R E G I S T E R r s p
2006-06-26 13:57:35 +02:00
CFI_ A D J U S T _ C F A _ O F F S E T - 8
2011-01-06 15:22:47 +01:00
/* we did not save rbx, restore only from ARGOFFSET */
addq $ 8 , % r s p
CFI_ A D J U S T _ C F A _ O F F S E T - 8
2005-09-12 18:49:24 +02:00
exit_intr :
2005-04-16 15:20:36 -07:00
GET_ T H R E A D _ I N F O ( % r c x )
testl $ 3 ,C S - A R G O F F S E T ( % r s p )
je r e t i n t _ k e r n e l
2008-11-16 15:29:00 +01:00
2005-04-16 15:20:36 -07:00
/* Interrupt came from user space */
/ *
* Has a c o r r e c t t o p o f s t a c k , b u t a p a r t i a l s t a c k f r a m e
* % rcx : thread i n f o . I n t e r r u p t s o f f .
2008-11-16 15:29:00 +01:00
* /
2005-04-16 15:20:36 -07:00
retint_with_reschedule :
movl $ _ T I F _ W O R K _ M A S K ,% e d i
2005-09-12 18:49:24 +02:00
retint_check :
2007-10-11 22:11:12 +02:00
LOCKDEP_ S Y S _ E X I T _ I R Q
2008-06-24 11:19:35 -03:00
movl T I _ f l a g s ( % r c x ) ,% e d x
2005-04-16 15:20:36 -07:00
andl % e d i ,% e d x
2005-09-12 18:49:24 +02:00
CFI_ R E M E M B E R _ S T A T E
2005-04-16 15:20:36 -07:00
jnz r e t i n t _ c a r e f u l
2007-10-11 22:11:12 +02:00
retint_swapgs : /* return to user-space */
2006-07-03 00:24:45 -07:00
/ *
* The i r e t q c o u l d r e - e n a b l e i n t e r r u p t s :
* /
2008-01-30 13:32:08 +01:00
DISABLE_ I N T E R R U P T S ( C L B R _ A N Y )
2006-07-03 00:24:45 -07:00
TRACE_ I R Q S _ I R E T Q
2008-01-30 13:32:08 +01:00
SWAPGS
2006-07-03 00:24:45 -07:00
jmp r e s t o r e _ a r g s
2007-10-11 22:11:12 +02:00
retint_restore_args : /* return to kernel space */
2008-01-30 13:32:08 +01:00
DISABLE_ I N T E R R U P T S ( C L B R _ A N Y )
2006-07-03 00:24:45 -07:00
/ *
* The i r e t q c o u l d r e - e n a b l e i n t e r r u p t s :
* /
TRACE_ I R Q S _ I R E T Q
restore_args :
2008-02-09 23:24:08 +01:00
RESTORE_ A R G S 0 ,8 ,0
2008-02-13 23:29:53 +02:00
irq_return :
2008-01-30 13:32:08 +01:00
INTERRUPT_ R E T U R N
2008-02-09 23:24:08 +01:00
.section _ _ ex_ t a b l e , " a "
.quad irq_ r e t u r n , b a d _ i r e t
.previous
# ifdef C O N F I G _ P A R A V I R T
2008-01-30 13:32:08 +01:00
ENTRY( n a t i v e _ i r e t )
2005-04-16 15:20:36 -07:00
iretq
.section _ _ ex_ t a b l e ," a "
2008-01-30 13:32:08 +01:00
.quad native_ i r e t , b a d _ i r e t
2005-04-16 15:20:36 -07:00
.previous
2008-02-09 23:24:08 +01:00
# endif
2005-04-16 15:20:36 -07:00
.section .fixup , " ax"
bad_iret :
2008-02-06 22:39:43 +01:00
/ *
* The i r e t t r a p s w h e n t h e % c s o r % s s b e i n g r e s t o r e d i s b o g u s .
* We' v e l o s t t h e o r i g i n a l t r a p v e c t o r a n d e r r o r c o d e .
* # GPF i s t h e m o s t l i k e l y o n e t o g e t f o r a n i n v a l i d s e l e c t o r .
* So p r e t e n d w e c o m p l e t e d t h e i r e t a n d t o o k t h e #G P F i n u s e r m o d e .
*
* We a r e n o w r u n n i n g w i t h t h e k e r n e l G S a f t e r e x c e p t i o n r e c o v e r y .
* But e r r o r _ e n t r y e x p e c t s u s t o h a v e u s e r G S t o m a t c h t h e u s e r % c s ,
* so s w a p b a c k .
* /
pushq $ 0
SWAPGS
jmp g e n e r a l _ p r o t e c t i o n
2008-01-30 13:32:08 +01:00
.previous
2005-09-12 18:49:24 +02:00
/* edi: workmask, edx: work */
2005-04-16 15:20:36 -07:00
retint_careful :
2005-09-12 18:49:24 +02:00
CFI_ R E S T O R E _ S T A T E
2005-04-16 15:20:36 -07:00
bt $ T I F _ N E E D _ R E S C H E D ,% e d x
jnc r e t i n t _ s i g n a l
2006-07-03 00:24:45 -07:00
TRACE_ I R Q S _ O N
2008-01-30 13:32:08 +01:00
ENABLE_ I N T E R R U P T S ( C L B R _ N O N E )
2010-09-02 14:07:16 +01:00
pushq_ c f i % r d i
2005-04-16 15:20:36 -07:00
call s c h e d u l e
2010-09-02 14:07:16 +01:00
popq_ c f i % r d i
2005-04-16 15:20:36 -07:00
GET_ T H R E A D _ I N F O ( % r c x )
2008-01-30 13:32:08 +01:00
DISABLE_ I N T E R R U P T S ( C L B R _ N O N E )
2006-07-03 00:24:45 -07:00
TRACE_ I R Q S _ O F F
2005-04-16 15:20:36 -07:00
jmp r e t i n t _ c h e c k
2008-11-16 15:29:00 +01:00
2005-04-16 15:20:36 -07:00
retint_signal :
2008-01-25 21:08:29 +01:00
testl $ _ T I F _ D O _ N O T I F Y _ M A S K ,% e d x
2005-05-16 21:53:19 -07:00
jz r e t i n t _ s w a p g s
2006-07-03 00:24:45 -07:00
TRACE_ I R Q S _ O N
2008-01-30 13:32:08 +01:00
ENABLE_ I N T E R R U P T S ( C L B R _ N O N E )
2005-04-16 15:20:36 -07:00
SAVE_ R E S T
2008-11-16 15:29:00 +01:00
movq $ - 1 ,O R I G _ R A X ( % r s p )
2005-07-28 21:15:48 -07:00
xorl % e s i ,% e s i # o l d s e t
2005-04-16 15:20:36 -07:00
movq % r s p ,% r d i # & p t _ r e g s
call d o _ n o t i f y _ r e s u m e
RESTORE_ R E S T
2008-01-30 13:32:08 +01:00
DISABLE_ I N T E R R U P T S ( C L B R _ N O N E )
2006-07-03 00:24:45 -07:00
TRACE_ I R Q S _ O F F
2005-05-01 08:58:51 -07:00
GET_ T H R E A D _ I N F O ( % r c x )
x86_64: fix delayed signals
On three of the several paths in entry_64.S that call
do_notify_resume() on the way back to user mode, we fail to properly
check again for newly-arrived work that requires another call to
do_notify_resume() before going to user mode. These paths set the
mask to check only _TIF_NEED_RESCHED, but this is wrong. The other
paths that lead to do_notify_resume() do this correctly already, and
entry_32.S does it correctly in all cases.
All paths back to user mode have to check all the _TIF_WORK_MASK
flags at the last possible stage, with interrupts disabled.
Otherwise, we miss any flags (TIF_SIGPENDING for example) that were
set any time after we entered do_notify_resume(). More work flags
can be set (or left set) synchronously inside do_notify_resume(), as
TIF_SIGPENDING can be, or asynchronously by interrupts or other CPUs
(which then send an asynchronous interrupt).
There are many different scenarios that could hit this bug, most of
them races. The simplest one to demonstrate does not require any
race: when one signal has done handler setup at the check before
returning from a syscall, and there is another signal pending that
should be handled. The second signal's handler should interrupt the
first signal handler before it actually starts (so the interrupted PC
is still at the handler's entry point). Instead, it runs away until
the next kernel entry (next syscall, tick, etc).
This test behaves correctly on 32-bit kernels, and fails on 64-bit
(either 32-bit or 64-bit test binary). With this fix, it works.
#define _GNU_SOURCE
#include <stdio.h>
#include <signal.h>
#include <string.h>
#include <sys/ucontext.h>
#ifndef REG_RIP
#define REG_RIP REG_EIP
#endif
static sig_atomic_t hit1, hit2;
static void
handler (int sig, siginfo_t *info, void *ctx)
{
ucontext_t *uc = ctx;
if ((void *) uc->uc_mcontext.gregs[REG_RIP] == &handler)
{
if (sig == SIGUSR1)
hit1 = 1;
else
hit2 = 1;
}
printf ("%s at %#lx\n", strsignal (sig),
uc->uc_mcontext.gregs[REG_RIP]);
}
int
main (void)
{
struct sigaction sa;
sigset_t set;
sigemptyset (&sa.sa_mask);
sa.sa_flags = SA_SIGINFO;
sa.sa_sigaction = &handler;
if (sigaction (SIGUSR1, &sa, NULL)
|| sigaction (SIGUSR2, &sa, NULL))
return 2;
sigemptyset (&set);
sigaddset (&set, SIGUSR1);
sigaddset (&set, SIGUSR2);
if (sigprocmask (SIG_BLOCK, &set, NULL))
return 3;
printf ("main at %p, handler at %p\n", &main, &handler);
raise (SIGUSR1);
raise (SIGUSR2);
if (sigprocmask (SIG_UNBLOCK, &set, NULL))
return 4;
if (hit1 + hit2 == 1)
{
puts ("PASS");
return 0;
}
puts ("FAIL");
return 1;
}
Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:50:39 -07:00
jmp r e t i n t _ w i t h _ r e s c h e d u l e
2005-04-16 15:20:36 -07:00
# ifdef C O N F I G _ P R E E M P T
/* Returning to kernel space. Check if we need preemption */
/* rcx: threadinfo. interrupts off. */
2006-09-26 10:52:29 +02:00
ENTRY( r e t i n t _ k e r n e l )
2008-06-24 11:19:35 -03:00
cmpl $ 0 ,T I _ p r e e m p t _ c o u n t ( % r c x )
2005-04-16 15:20:36 -07:00
jnz r e t i n t _ r e s t o r e _ a r g s
2008-06-24 11:19:35 -03:00
bt $ T I F _ N E E D _ R E S C H E D ,T I _ f l a g s ( % r c x )
2005-04-16 15:20:36 -07:00
jnc r e t i n t _ r e s t o r e _ a r g s
bt $ 9 ,E F L A G S - A R G O F F S E T ( % r s p ) / * i n t e r r u p t s o f f ? * /
jnc r e t i n t _ r e s t o r e _ a r g s
call p r e e m p t _ s c h e d u l e _ i r q
jmp e x i t _ i n t r
2008-11-16 15:29:00 +01:00
# endif
2006-06-26 13:56:55 +02:00
2005-04-16 15:20:36 -07:00
CFI_ E N D P R O C
2006-06-26 13:56:55 +02:00
END( c o m m o n _ i n t e r r u p t )
2009-08-27 13:23:25 -04:00
/ *
* End o f k p r o b e s s e c t i o n
* /
.popsection
2008-11-16 15:29:00 +01:00
2005-04-16 15:20:36 -07:00
/ *
* APIC i n t e r r u p t s .
2008-11-16 15:29:00 +01:00
* /
2008-11-23 10:08:28 +01:00
.macro apicinterrupt num s y m d o _ s y m
ENTRY( \ s y m )
2005-09-12 18:49:24 +02:00
INTR_ F R A M E
2010-09-02 14:07:16 +01:00
pushq_ c f i $ ~ ( \ n u m )
2008-11-23 10:08:28 +01:00
interrupt \ d o _ s y m
2005-04-16 15:20:36 -07:00
jmp r e t _ f r o m _ i n t r
CFI_ E N D P R O C
2008-11-23 10:08:28 +01:00
END( \ s y m )
.endm
2005-04-16 15:20:36 -07:00
2008-11-23 10:08:28 +01:00
# ifdef C O N F I G _ S M P
apicinterrupt I R Q _ M O V E _ C L E A N U P _ V E C T O R \
irq_ m o v e _ c l e a n u p _ i n t e r r u p t s m p _ i r q _ m o v e _ c l e a n u p _ i n t e r r u p t
x86: fix panic with interrupts off (needed for MCE)
For some time each panic() called with interrupts disabled
triggered the !irqs_disabled() WARN_ON in smp_call_function(),
producing ugly backtraces and confusing users.
This is a common situation with machine checks for example which
tend to call panic with interrupts disabled, but will also hit
in other situations e.g. panic during early boot. In fact it
means that panic cannot be called in many circumstances, which
would be bad.
This all started with the new fancy queued smp_call_function,
which is then used by the shutdown path to shut down the other
CPUs.
On closer examination it turned out that the fancy RCU
smp_call_function() does lots of things not suitable in a panic
situation anyways, like allocating memory and relying on complex
system state.
I originally tried to patch this over by checking for panic
there, but it was quite complicated and the original patch
was also not very popular. This also didn't fix some of the
underlying complexity problems.
The new code in post 2.6.29 tries to patch around this by
checking for oops_in_progress, but that is not enough to make
this fully safe and I don't think that's a real solution
because panic has to be reliable.
So instead use an own vector to reboot. This makes the reboot
code extremly straight forward, which is definitely a big plus
in a panic situation where it is important to avoid relying on
too much kernel state. The new simple code is also safe to be
called from interupts off region because it is very very simple.
There can be situations where it is important that panic
is reliable. For example on a fatal machine check the panic
is needed to get the system up again and running as quickly
as possible. So it's important that panic is reliable and
all function it calls simple.
This is why I came up with this simple vector scheme.
It's very hard to beat in simplicity. Vectors are not
particularly precious anymore since all big systems are
using per CPU vectors.
Another possibility would have been to use an NMI similar
to kdump, but there is still the problem that NMIs don't
work reliably on some systems due to BIOS issues. NMIs
would have been able to stop CPUs running with interrupts
off too. In the sake of universal reliability I opted for
using a non NMI vector for now.
I put the reboot vector into the highest priority bucket of
the APIC vectors and moved the 64bit UV_BAU message down
instead into the next lower priority.
[ Impact: bug fix, fixes an old regression ]
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2009-05-27 21:56:52 +02:00
apicinterrupt R E B O O T _ V E C T O R \
reboot_ i n t e r r u p t s m p _ r e b o o t _ i n t e r r u p t
2008-11-23 10:08:28 +01:00
# endif
2005-04-16 15:20:36 -07:00
2009-01-20 04:36:04 +01:00
# ifdef C O N F I G _ X 8 6 _ U V
2008-11-27 00:02:10 +03:00
apicinterrupt U V _ B A U _ M E S S A G E \
2008-11-23 10:08:28 +01:00
uv_ b a u _ m e s s a g e _ i n t r1 u v _ b a u _ m e s s a g e _ i n t e r r u p t
2009-01-20 04:36:04 +01:00
# endif
2008-11-23 10:08:28 +01:00
apicinterrupt L O C A L _ T I M E R _ V E C T O R \
apic_ t i m e r _ i n t e r r u p t s m p _ a p i c _ t i m e r _ i n t e r r u p t
2009-10-14 09:22:57 -05:00
apicinterrupt X 8 6 _ P L A T F O R M _ I P I _ V E C T O R \
x8 6 _ p l a t f o r m _ i p i s m p _ x86 _ p l a t f o r m _ i p i
2005-11-05 17:25:53 +01:00
2008-11-16 15:29:00 +01:00
# ifdef C O N F I G _ S M P
2011-01-17 10:52:05 +08:00
.irp idx,0 ,1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 ,9 ,1 0 ,1 1 ,1 2 ,1 3 ,1 4 ,1 5 , \
1 6 , 1 7 , 1 8 , 1 9 , 2 0 , 2 1 , 2 2 , 2 3 , 2 4 , 2 5 , 2 6 , 2 7 , 2 8 , 2 9 , 3 0 , 3 1
.if NUM_INVALIDATE_TLB_VECTORS > \ idx
2010-10-19 14:52:26 +01:00
apicinterrupt ( I N V A L I D A T E _ T L B _ V E C T O R _ S T A R T ) + \ i d x \
invalidate_ i n t e r r u p t \ i d x s m p _ i n v a l i d a t e _ i n t e r r u p t
2011-01-17 10:52:05 +08:00
.endif
2010-10-19 14:52:26 +01:00
.endr
2005-04-16 15:20:36 -07:00
# endif
2008-11-23 10:08:28 +01:00
apicinterrupt T H R E S H O L D _ A P I C _ V E C T O R \
2009-04-28 23:32:56 +02:00
threshold_ i n t e r r u p t s m p _ t h r e s h o l d _ i n t e r r u p t
2008-11-23 10:08:28 +01:00
apicinterrupt T H E R M A L _ A P I C _ V E C T O R \
thermal_ i n t e r r u p t s m p _ t h e r m a l _ i n t e r r u p t
2008-06-02 08:56:14 -05:00
2009-05-27 21:56:54 +02:00
# ifdef C O N F I G _ X 8 6 _ M C E
apicinterrupt M C E _ S E L F _ V E C T O R \
mce_ s e l f _ i n t e r r u p t s m p _ m c e _ s e l f _ i n t e r r u p t
# endif
2008-11-23 10:08:28 +01:00
# ifdef C O N F I G _ S M P
apicinterrupt C A L L _ F U N C T I O N _ S I N G L E _ V E C T O R \
call_ f u n c t i o n _ s i n g l e _ i n t e r r u p t s m p _ c a l l _ f u n c t i o n _ s i n g l e _ i n t e r r u p t
apicinterrupt C A L L _ F U N C T I O N _ V E C T O R \
call_ f u n c t i o n _ i n t e r r u p t s m p _ c a l l _ f u n c t i o n _ i n t e r r u p t
apicinterrupt R E S C H E D U L E _ V E C T O R \
reschedule_ i n t e r r u p t s m p _ r e s c h e d u l e _ i n t e r r u p t
# endif
2005-04-16 15:20:36 -07:00
2008-11-23 10:08:28 +01:00
apicinterrupt E R R O R _ A P I C _ V E C T O R \
error_ i n t e r r u p t s m p _ e r r o r _ i n t e r r u p t
apicinterrupt S P U R I O U S _ A P I C _ V E C T O R \
spurious_ i n t e r r u p t s m p _ s p u r i o u s _ i n t e r r u p t
2008-11-16 15:29:00 +01:00
2010-10-14 14:01:34 +08:00
# ifdef C O N F I G _ I R Q _ W O R K
apicinterrupt I R Q _ W O R K _ V E C T O R \
irq_ w o r k _ i n t e r r u p t s m p _ i r q _ w o r k _ i n t e r r u p t
2008-12-03 10:39:53 +01:00
# endif
2005-04-16 15:20:36 -07:00
/ *
* Exception e n t r y p o i n t s .
2008-11-16 15:29:00 +01:00
* /
2008-11-23 10:08:28 +01:00
.macro zeroentry sym d o _ s y m
ENTRY( \ s y m )
2005-09-12 18:49:24 +02:00
INTR_ F R A M E
2008-06-25 00:19:31 -04:00
PARAVIRT_ A D J U S T _ E X C E P T I O N _ F R A M E
2008-11-21 15:20:47 +01:00
pushq_ c f i $ - 1 / * O R I G _ R A X : n o s y s c a l l t o r e s t a r t * /
2010-09-02 13:55:11 +01:00
subq $ O R I G _ R A X - R 1 5 , % r s p
CFI_ A D J U S T _ C F A _ O F F S E T O R I G _ R A X - R 1 5
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
call e r r o r _ e n t r y
2008-11-20 14:40:11 +01:00
DEFAULT_ F R A M E 0
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
movq % r s p ,% r d i / * p t _ r e g s p o i n t e r * /
xorl % e s i ,% e s i / * n o e r r o r c o d e * /
2008-11-23 10:08:28 +01:00
call \ d o _ s y m
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
jmp e r r o r _ e x i t / * % e b x : n o s w a p g s f l a g * /
2005-09-12 18:49:24 +02:00
CFI_ E N D P R O C
2008-11-23 10:08:28 +01:00
END( \ s y m )
.endm
2005-04-16 15:20:36 -07:00
2008-11-23 10:08:28 +01:00
.macro paranoidzeroentry sym d o _ s y m
2008-11-24 13:24:28 +01:00
ENTRY( \ s y m )
2008-11-21 16:44:28 +01:00
INTR_ F R A M E
PARAVIRT_ A D J U S T _ E X C E P T I O N _ F R A M E
2010-09-02 13:55:11 +01:00
pushq_ c f i $ - 1 / * O R I G _ R A X : n o s y s c a l l t o r e s t a r t * /
subq $ O R I G _ R A X - R 1 5 , % r s p
CFI_ A D J U S T _ C F A _ O F F S E T O R I G _ R A X - R 1 5
2008-11-21 16:44:28 +01:00
call s a v e _ p a r a n o i d
TRACE_ I R Q S _ O F F
movq % r s p ,% r d i / * p t _ r e g s p o i n t e r * /
xorl % e s i ,% e s i / * n o e r r o r c o d e * /
2008-11-23 10:08:28 +01:00
call \ d o _ s y m
2008-11-21 16:44:28 +01:00
jmp p a r a n o i d _ e x i t / * % e b x : n o s w a p g s f l a g * /
CFI_ E N D P R O C
2008-11-24 13:24:28 +01:00
END( \ s y m )
2008-11-23 10:08:28 +01:00
.endm
2008-11-21 16:44:28 +01:00
2010-07-31 12:48:22 -04:00
# define I N I T _ T S S _ I S T ( x ) P E R _ C P U _ V A R ( i n i t _ t s s ) + ( T S S _ i s t + ( ( x ) - 1 ) * 8 )
2008-11-23 10:08:28 +01:00
.macro paranoidzeroentry_ist sym d o _ s y m i s t
2008-11-24 13:24:28 +01:00
ENTRY( \ s y m )
2008-11-27 21:10:08 +03:00
INTR_ F R A M E
2008-11-21 16:44:28 +01:00
PARAVIRT_ A D J U S T _ E X C E P T I O N _ F R A M E
2010-09-02 13:55:11 +01:00
pushq_ c f i $ - 1 / * O R I G _ R A X : n o s y s c a l l t o r e s t a r t * /
subq $ O R I G _ R A X - R 1 5 , % r s p
CFI_ A D J U S T _ C F A _ O F F S E T O R I G _ R A X - R 1 5
2008-11-21 16:44:28 +01:00
call s a v e _ p a r a n o i d
TRACE_ I R Q S _ O F F
movq % r s p ,% r d i / * p t _ r e g s p o i n t e r * /
xorl % e s i ,% e s i / * n o e r r o r c o d e * /
2010-07-31 12:48:22 -04:00
subq $ E X C E P T I O N _ S T K S Z , I N I T _ T S S _ I S T ( \ i s t )
2008-11-23 10:08:28 +01:00
call \ d o _ s y m
2010-07-31 12:48:22 -04:00
addq $ E X C E P T I O N _ S T K S Z , I N I T _ T S S _ I S T ( \ i s t )
2008-11-21 16:44:28 +01:00
jmp p a r a n o i d _ e x i t / * % e b x : n o s w a p g s f l a g * /
CFI_ E N D P R O C
2008-11-24 13:24:28 +01:00
END( \ s y m )
2008-11-23 10:08:28 +01:00
.endm
2008-11-21 16:44:28 +01:00
2008-11-24 13:24:28 +01:00
.macro errorentry sym d o _ s y m
2008-11-23 10:08:28 +01:00
ENTRY( \ s y m )
2005-09-12 18:49:24 +02:00
XCPT_ F R A M E
2008-06-25 00:19:31 -04:00
PARAVIRT_ A D J U S T _ E X C E P T I O N _ F R A M E
2010-09-02 13:55:11 +01:00
subq $ O R I G _ R A X - R 1 5 , % r s p
CFI_ A D J U S T _ C F A _ O F F S E T O R I G _ R A X - R 1 5
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
call e r r o r _ e n t r y
2008-11-20 14:40:11 +01:00
DEFAULT_ F R A M E 0
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
movq % r s p ,% r d i / * p t _ r e g s p o i n t e r * /
movq O R I G _ R A X ( % r s p ) ,% r s i / * g e t e r r o r c o d e * /
movq $ - 1 ,O R I G _ R A X ( % r s p ) / * n o s y s c a l l t o r e s t a r t * /
2008-11-23 10:08:28 +01:00
call \ d o _ s y m
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.
The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:
<common_interrupt>:
(5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80214290 <do_IRQ>
<ret_from_intr>:
...
An apic interrupt stub now look like this:
<thermal_interrupt>:
(5) pushq $0xffffffffffffff05 /* ~(vector) */
(4) sub $0x50,%rsp /* space for registers */
(5) callq ffffffff80211290 <save_args>
(5) callq ffffffff80212b8f <smp_thermal_interrupt>
(5) jmpq ffffffff80211f93 <ret_from_intr>
Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:
<overflow>:
(6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(2) pushq $0xffffffffffffffff /* no syscall */
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(2) xor %esi,%esi /* no error code */
(5) callq ffffffff80213446 <do_overflow>
(5) jmpq ffffffff8030e460 <error_exit>
And one for an exception with errorcode like this:
<segment_not_present>:
(6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38>
(4) sub $0x78,%rsp /* space for registers */
(5) callq ffffffff8030e3b0 <error_entry>
(3) mov %rsp,%rdi /* pt_regs pointer */
(5) mov 0x78(%rsp),%rsi /* load error code */
(9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */
(5) callq ffffffff80213209 <do_segment_not_present>
(5) jmpq ffffffff8030e460 <error_exit>
Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).
Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-19 01:18:11 +01:00
jmp e r r o r _ e x i t / * % e b x : n o s w a p g s f l a g * /
2005-09-12 18:49:24 +02:00
CFI_ E N D P R O C
2008-11-23 10:08:28 +01:00
END( \ s y m )
.endm
2005-04-16 15:20:36 -07:00
/* error code is on the stack already */
2008-11-24 13:24:28 +01:00
.macro paranoiderrorentry sym d o _ s y m
2008-11-23 10:08:28 +01:00
ENTRY( \ s y m )
2008-11-21 16:44:28 +01:00
XCPT_ F R A M E
PARAVIRT_ A D J U S T _ E X C E P T I O N _ F R A M E
2010-09-02 13:55:11 +01:00
subq $ O R I G _ R A X - R 1 5 , % r s p
CFI_ A D J U S T _ C F A _ O F F S E T O R I G _ R A X - R 1 5
2008-11-21 16:43:18 +01:00
call s a v e _ p a r a n o i d
DEFAULT_ F R A M E 0
2008-09-26 14:03:03 +02:00
TRACE_ I R Q S _ O F F
2008-11-21 16:44:28 +01:00
movq % r s p ,% r d i / * p t _ r e g s p o i n t e r * /
movq O R I G _ R A X ( % r s p ) ,% r s i / * g e t e r r o r c o d e * /
movq $ - 1 ,O R I G _ R A X ( % r s p ) / * n o s y s c a l l t o r e s t a r t * /
2008-11-23 10:08:28 +01:00
call \ d o _ s y m
2008-11-21 16:44:28 +01:00
jmp p a r a n o i d _ e x i t / * % e b x : n o s w a p g s f l a g * /
CFI_ E N D P R O C
2008-11-23 10:08:28 +01:00
END( \ s y m )
.endm
zeroentry d i v i d e _ e r r o r d o _ d i v i d e _ e r r o r
zeroentry o v e r f l o w d o _ o v e r f l o w
zeroentry b o u n d s d o _ b o u n d s
zeroentry i n v a l i d _ o p d o _ i n v a l i d _ o p
zeroentry d e v i c e _ n o t _ a v a i l a b l e d o _ d e v i c e _ n o t _ a v a i l a b l e
2008-11-24 13:24:28 +01:00
paranoiderrorentry d o u b l e _ f a u l t d o _ d o u b l e _ f a u l t
2008-11-23 10:08:28 +01:00
zeroentry c o p r o c e s s o r _ s e g m e n t _ o v e r r u n d o _ c o p r o c e s s o r _ s e g m e n t _ o v e r r u n
errorentry i n v a l i d _ T S S d o _ i n v a l i d _ T S S
errorentry s e g m e n t _ n o t _ p r e s e n t d o _ s e g m e n t _ n o t _ p r e s e n t
zeroentry s p u r i o u s _ i n t e r r u p t _ b u g d o _ s p u r i o u s _ i n t e r r u p t _ b u g
zeroentry c o p r o c e s s o r _ e r r o r d o _ c o p r o c e s s o r _ e r r o r
errorentry a l i g n m e n t _ c h e c k d o _ a l i g n m e n t _ c h e c k
zeroentry s i m d _ c o p r o c e s s o r _ e r r o r d o _ s i m d _ c o p r o c e s s o r _ e r r o r
2006-07-03 00:24:45 -07:00
2008-11-27 21:10:08 +03:00
/* Reload gs selector with exception handling */
/* edi: new selector */
2008-06-25 00:19:32 -04:00
ENTRY( n a t i v e _ l o a d _ g s _ i n d e x )
2005-09-12 18:49:24 +02:00
CFI_ S T A R T P R O C
2010-09-02 14:07:16 +01:00
pushfq_ c f i
2009-01-28 14:35:03 -08:00
DISABLE_ I N T E R R U P T S ( C L B R _ A N Y & ~ C L B R _ R D I )
2008-11-27 21:10:08 +03:00
SWAPGS
2008-11-16 15:29:00 +01:00
gs_change :
2008-11-27 21:10:08 +03:00
movl % e d i ,% g s
2005-04-16 15:20:36 -07:00
2 : mfence / * w o r k a r o u n d * /
2008-01-30 13:32:08 +01:00
SWAPGS
2010-09-02 14:07:16 +01:00
popfq_ c f i
2008-11-27 21:10:08 +03:00
ret
2005-09-12 18:49:24 +02:00
CFI_ E N D P R O C
2008-11-23 10:15:32 +01:00
END( n a t i v e _ l o a d _ g s _ i n d e x )
2008-11-16 15:29:00 +01:00
2008-11-27 21:10:08 +03:00
.section _ _ ex_ t a b l e ," a "
.align 8
.quad gs_ c h a n g e ,b a d _ g s
.previous
.section .fixup , " ax"
2005-04-16 15:20:36 -07:00
/* running with kernelgs */
2008-11-16 15:29:00 +01:00
bad_gs :
2008-01-30 13:32:08 +01:00
SWAPGS / * s w i t c h b a c k t o u s e r g s * /
2005-04-16 15:20:36 -07:00
xorl % e a x ,% e a x
2008-11-27 21:10:08 +03:00
movl % e a x ,% g s
jmp 2 b
.previous
2008-11-16 15:29:00 +01:00
2009-12-09 12:34:40 -05:00
ENTRY( k e r n e l _ t h r e a d _ h e l p e r )
2006-08-30 19:37:08 +02:00
pushq $ 0 # f a k e r e t u r n a d d r e s s
CFI_ S T A R T P R O C
2005-04-16 15:20:36 -07:00
/ *
* Here w e a r e i n t h e c h i l d a n d t h e r e g i s t e r s a r e s e t a s t h e y w e r e
* at k e r n e l _ t h r e a d ( ) i n v o c a t i o n i n t h e p a r e n t .
* /
2009-12-09 12:34:40 -05:00
call * % r s i
2005-04-16 15:20:36 -07:00
# exit
2007-10-17 18:04:33 +02:00
mov % e a x , % e d i
2005-04-16 15:20:36 -07:00
call d o _ e x i t
2008-11-23 22:47:10 +08:00
ud2 # p a d d i n g f o r c a l l t r a c e
2006-08-30 19:37:08 +02:00
CFI_ E N D P R O C
2009-12-09 12:34:40 -05:00
END( k e r n e l _ t h r e a d _ h e l p e r )
2005-04-16 15:20:36 -07:00
/ *
* execve( ) . T h i s f u n c t i o n n e e d s t o u s e I R E T , n o t S Y S R E T , t o s e t u p a l l s t a t e p r o p e r l y .
*
* C e x t e r n i n t e r f a c e :
2010-08-11 11:26:22 +01:00
* extern l o n g e x e c v e ( c o n s t c h a r * n a m e , c h a r * * a r g v , c h a r * * e n v p )
2005-04-16 15:20:36 -07:00
*
* asm i n p u t a r g u m e n t s :
* rdi : name, r s i : a r g v , r d x : e n v p
*
* We w a n t t o f a l l b a c k i n t o :
2010-08-11 11:26:22 +01:00
* extern l o n g s y s _ e x e c v e ( c o n s t c h a r * n a m e , c h a r * * a r g v ,c h a r * * e n v p , s t r u c t p t _ r e g s * r e g s )
2005-04-16 15:20:36 -07:00
*
* do_ s y s _ e x e c v e a s m f a l l b a c k a r g u m e n t s :
2008-02-26 12:55:57 +01:00
* rdi : name, r s i : a r g v , r d x : e n v p , r c x : f a k e f r a m e o n t h e s t a c k
2005-04-16 15:20:36 -07:00
* /
2006-10-02 02:18:31 -07:00
ENTRY( k e r n e l _ e x e c v e )
2005-04-16 15:20:36 -07:00
CFI_ S T A R T P R O C
FAKE_ S T A C K _ F R A M E $ 0
2008-11-16 15:29:00 +01:00
SAVE_ A L L
2008-02-26 12:55:57 +01:00
movq % r s p ,% r c x
2005-04-16 15:20:36 -07:00
call s y s _ e x e c v e
2008-11-16 15:29:00 +01:00
movq % r a x , R A X ( % r s p )
2005-04-16 15:20:36 -07:00
RESTORE_ R E S T
testq % r a x ,% r a x
je i n t _ r e t _ f r o m _ s y s _ c a l l
RESTORE_ A R G S
UNFAKE_ S T A C K _ F R A M E
ret
CFI_ E N D P R O C
2008-11-23 10:15:32 +01:00
END( k e r n e l _ e x e c v e )
2005-04-16 15:20:36 -07:00
2006-08-02 22:37:28 +02:00
/* Call softirq on interrupt stack. Interrupts are off. */
2005-07-28 21:15:49 -07:00
ENTRY( c a l l _ s o f t i r q )
2005-09-12 18:49:24 +02:00
CFI_ S T A R T P R O C
2010-09-02 14:07:16 +01:00
pushq_ c f i % r b p
2006-08-02 22:37:28 +02:00
CFI_ R E L _ O F F S E T r b p ,0
mov % r s p ,% r b p
CFI_ D E F _ C F A _ R E G I S T E R r b p
2009-01-19 00:38:58 +09:00
incl P E R _ C P U _ V A R ( i r q _ c o u n t )
2009-01-19 00:38:58 +09:00
cmove P E R _ C P U _ V A R ( i r q _ s t a c k _ p t r ) ,% r s p
2006-08-02 22:37:28 +02:00
push % r b p # b a c k l i n k f o r o l d u n w i n d e r
2005-07-28 21:15:49 -07:00
call _ _ d o _ s o f t i r q
2006-08-02 22:37:28 +02:00
leaveq
2010-09-02 14:07:16 +01:00
CFI_ R E S T O R E r b p
2005-09-12 18:49:24 +02:00
CFI_ D E F _ C F A _ R E G I S T E R r s p
2006-08-02 22:37:28 +02:00
CFI_ A D J U S T _ C F A _ O F F S E T - 8
2009-01-19 00:38:58 +09:00
decl P E R _ C P U _ V A R ( i r q _ c o u n t )
2005-07-28 21:15:49 -07:00
ret
2005-09-12 18:49:24 +02:00
CFI_ E N D P R O C
2008-11-23 10:15:32 +01:00
END( c a l l _ s o f t i r q )
2007-06-23 02:29:25 +02:00
2008-07-08 15:06:49 -07:00
# ifdef C O N F I G _ X E N
2008-11-23 10:08:28 +01:00
zeroentry x e n _ h y p e r v i s o r _ c a l l b a c k x e n _ d o _ h y p e r v i s o r _ c a l l b a c k
2008-07-08 15:06:49 -07:00
/ *
2008-11-27 21:10:08 +03:00
* A n o t e o n t h e " c r i t i c a l r e g i o n " i n o u r c a l l b a c k h a n d l e r .
* We w a n t t o a v o i d s t a c k i n g c a l l b a c k h a n d l e r s d u e t o e v e n t s o c c u r r i n g
* during h a n d l i n g o f t h e l a s t e v e n t . T o d o t h i s , w e k e e p e v e n t s d i s a b l e d
* until w e ' v e d o n e a l l p r o c e s s i n g . H O W E V E R , w e m u s t e n a b l e e v e n t s b e f o r e
* popping t h e s t a c k f r a m e ( c a n ' t b e d o n e a t o m i c a l l y ) a n d s o i t w o u l d s t i l l
* be p o s s i b l e t o g e t e n o u g h h a n d l e r a c t i v a t i o n s t o o v e r f l o w t h e s t a c k .
* Although u n l i k e l y , b u g s o f t h a t k i n d a r e h a r d t o t r a c k d o w n , s o w e ' d
* like t o a v o i d t h e p o s s i b i l i t y .
* So, o n e n t r y t o t h e h a n d l e r w e d e t e c t w h e t h e r w e i n t e r r u p t e d a n
* existing a c t i v a t i o n i n i t s c r i t i c a l r e g i o n - - i f s o , w e p o p t h e c u r r e n t
* activation a n d r e s t a r t t h e h a n d l e r u s i n g t h e p r e v i o u s o n e .
* /
2008-07-08 15:06:49 -07:00
ENTRY( x e n _ d o _ h y p e r v i s o r _ c a l l b a c k ) # d o _ h y p e r v i s o r _ c a l l b a c k ( s t r u c t * p t _ r e g s )
CFI_ S T A R T P R O C
2008-11-27 21:10:08 +03:00
/ *
* Since w e d o n ' t m o d i f y % r d i , e v t c h n _ d o _ u p a l l ( s t r u c t * p t _ r e g s ) w i l l
* see t h e c o r r e c t p o i n t e r t o t h e p t _ r e g s
* /
2008-07-08 15:06:49 -07:00
movq % r d i , % r s p # w e d o n ' t r e t u r n , a d j u s t t h e s t a c k f r a m e
CFI_ E N D P R O C
2008-11-20 14:40:11 +01:00
DEFAULT_ F R A M E
2009-01-19 00:38:58 +09:00
11 : incl P E R _ C P U _ V A R ( i r q _ c o u n t )
2008-07-08 15:06:49 -07:00
movq % r s p ,% r b p
CFI_ D E F _ C F A _ R E G I S T E R r b p
2009-01-19 00:38:58 +09:00
cmovzq P E R _ C P U _ V A R ( i r q _ s t a c k _ p t r ) ,% r s p
2008-07-08 15:06:49 -07:00
pushq % r b p # b a c k l i n k f o r o l d u n w i n d e r
call x e n _ e v t c h n _ d o _ u p c a l l
popq % r s p
CFI_ D E F _ C F A _ R E G I S T E R r s p
2009-01-19 00:38:58 +09:00
decl P E R _ C P U _ V A R ( i r q _ c o u n t )
2008-07-08 15:06:49 -07:00
jmp e r r o r _ e x i t
CFI_ E N D P R O C
x86, binutils, xen: Fix another wrong size directive
The latest binutils (2.21.0.20110302/Ubuntu) breaks the build
yet another time, under CONFIG_XEN=y due to a .size directive that
refers to a slightly differently named (hence, to the now very
strict and unforgiving assembler, non-existent) symbol.
[ mingo:
This unnecessary build breakage caused by new binutils
version 2.21 gets escallated back several kernel releases spanning
several years of Linux history, affecting over 130,000 upstream
kernel commits (!), on CONFIG_XEN=y 64-bit kernels (i.e. essentially
affecting all major Linux distro kernel configs).
Git annotate tells us that this slight debug symbol code mismatch
bug has been introduced in 2008 in commit 3d75e1b8:
3d75e1b8 (Jeremy Fitzhardinge 2008-07-08 15:06:49 -0700 1231) ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
The 'bug' is just a slight assymetry in ENTRY()/END()
debug-symbols sequences, with lots of assembly code between the
ENTRY() and the END():
ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
...
END(do_hypervisor_callback)
Human reviewers almost never catch such small mismatches, and binutils
never even warned about it either.
This new binutils version thus breaks the Xen build on all upstream kernels
since v2.6.27, out of the blue.
This makes a straightforward Git bisection of all 64-bit Xen-enabled kernels
impossible on such binutils, for a bisection window of over hundred
thousand historic commits. (!)
This is a major fail on the side of binutils and binutils needs to turn
this show-stopper build failure into a warning ASAP. ]
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Jan Beulich <jbeulich@novell.com>
Cc: H.J. Lu <hjl.tools@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Kees Cook <kees.cook@canonical.com>
LKML-Reference: <1299877178-26063-1-git-send-email-heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-03-11 21:59:38 +01:00
END( x e n _ d o _ h y p e r v i s o r _ c a l l b a c k )
2008-07-08 15:06:49 -07:00
/ *
2008-11-27 21:10:08 +03:00
* Hypervisor u s e s t h i s f o r a p p l i c a t i o n f a u l t s w h i l e i t e x e c u t e s .
* We g e t h e r e f o r t w o r e a s o n s :
* 1 . Fault w h i l e r e l o a d i n g D S , E S , F S o r G S
* 2 . Fault w h i l e e x e c u t i n g I R E T
* Category 1 w e d o n o t n e e d t o f i x u p a s X e n h a s a l r e a d y r e l o a d e d a l l s e g m e n t
* registers t h a t c o u l d b e r e l o a d e d a n d z e r o e d t h e o t h e r s .
* Category 2 w e f i x u p b y k i l l i n g t h e c u r r e n t p r o c e s s . W e c a n n o t u s e t h e
* normal L i n u x r e t u r n p a t h i n t h i s c a s e b e c a u s e i f w e u s e t h e I R E T h y p e r c a l l
* to p o p t h e s t a c k f r a m e w e e n d u p i n a n i n f i n i t e l o o p o f f a i l s a f e c a l l b a c k s .
* We d i s t i n g u i s h b e t w e e n c a t e g o r i e s b y c o m p a r i n g e a c h s a v e d s e g m e n t r e g i s t e r
* with i t s c u r r e n t c o n t e n t s : a n y d i s c r e p a n c y m e a n s w e i n c a t e g o r y 1 .
* /
2008-07-08 15:06:49 -07:00
ENTRY( x e n _ f a i l s a f e _ c a l l b a c k )
2008-11-20 14:40:11 +01:00
INTR_ F R A M E 1 ( 6 * 8 )
/*CFI_REL_OFFSET gs,GS*/
/*CFI_REL_OFFSET fs,FS*/
/*CFI_REL_OFFSET es,ES*/
/*CFI_REL_OFFSET ds,DS*/
CFI_ R E L _ O F F S E T r11 ,8
CFI_ R E L _ O F F S E T r c x ,0
2008-07-08 15:06:49 -07:00
movw % d s ,% c x
cmpw % c x ,0 x10 ( % r s p )
CFI_ R E M E M B E R _ S T A T E
jne 1 f
movw % e s ,% c x
cmpw % c x ,0 x18 ( % r s p )
jne 1 f
movw % f s ,% c x
cmpw % c x ,0 x20 ( % r s p )
jne 1 f
movw % g s ,% c x
cmpw % c x ,0 x28 ( % r s p )
jne 1 f
/* All segments match their saved values => Category 2 (Bad IRET). */
movq ( % r s p ) ,% r c x
CFI_ R E S T O R E r c x
movq 8 ( % r s p ) ,% r11
CFI_ R E S T O R E r11
addq $ 0 x30 ,% r s p
CFI_ A D J U S T _ C F A _ O F F S E T - 0 x30
2008-11-21 15:20:47 +01:00
pushq_ c f i $ 0 / * R I P * /
pushq_ c f i % r11
pushq_ c f i % r c x
2008-07-08 15:07:09 -07:00
jmp g e n e r a l _ p r o t e c t i o n
2008-07-08 15:06:49 -07:00
CFI_ R E S T O R E _ S T A T E
1 : /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
movq ( % r s p ) ,% r c x
CFI_ R E S T O R E r c x
movq 8 ( % r s p ) ,% r11
CFI_ R E S T O R E r11
addq $ 0 x30 ,% r s p
CFI_ A D J U S T _ C F A _ O F F S E T - 0 x30
2008-11-21 15:20:47 +01:00
pushq_ c f i $ 0
2008-07-08 15:06:49 -07:00
SAVE_ A L L
jmp e r r o r _ e x i t
CFI_ E N D P R O C
END( x e n _ f a i l s a f e _ c a l l b a c k )
2010-05-14 12:40:51 +01:00
apicinterrupt X E N _ H V M _ E V T C H N _ C A L L B A C K \
xen_ h v m _ c a l l b a c k _ v e c t o r x e n _ e v t c h n _ d o _ u p c a l l
2008-07-08 15:06:49 -07:00
# endif / * C O N F I G _ X E N * /
2008-11-24 13:24:28 +01:00
/ *
* Some f u n c t i o n s s h o u l d b e p r o t e c t e d a g a i n s t k p r o b e s
* /
.pushsection .kprobes .text , " ax"
paranoidzeroentry_ i s t d e b u g d o _ d e b u g D E B U G _ S T A C K
paranoidzeroentry_ i s t i n t 3 d o _ i n t 3 D E B U G _ S T A C K
paranoiderrorentry s t a c k _ s e g m e n t d o _ s t a c k _ s e g m e n t
2009-03-29 19:56:29 -07:00
# ifdef C O N F I G _ X E N
zeroentry x e n _ d e b u g d o _ d e b u g
zeroentry x e n _ i n t 3 d o _ i n t 3
errorentry x e n _ s t a c k _ s e g m e n t d o _ s t a c k _ s e g m e n t
# endif
2008-11-24 13:24:28 +01:00
errorentry g e n e r a l _ p r o t e c t i o n d o _ g e n e r a l _ p r o t e c t i o n
errorentry p a g e _ f a u l t d o _ p a g e _ f a u l t
2010-10-14 11:22:52 +02:00
# ifdef C O N F I G _ K V M _ G U E S T
errorentry a s y n c _ p a g e _ f a u l t d o _ a s y n c _ p a g e _ f a u l t
# endif
2008-11-24 13:24:28 +01:00
# ifdef C O N F I G _ X 8 6 _ M C E
2009-04-27 19:25:48 +02:00
paranoidzeroentry m a c h i n e _ c h e c k * m a c h i n e _ c h e c k _ v e c t o r ( % r i p )
2008-11-24 13:24:28 +01:00
# endif
/ *
2008-11-27 21:10:08 +03:00
* " Paranoid" e x i t p a t h f r o m e x c e p t i o n s t a c k .
* Paranoid b e c a u s e t h i s i s u s e d b y N M I s a n d c a n n o t t a k e
2008-11-24 13:24:28 +01:00
* any k e r n e l s t a t e f o r g r a n t e d .
* We d o n ' t d o k e r n e l p r e e m p t i o n c h e c k s h e r e , b e c a u s e o n l y
* NMI s h o u l d b e c o m m o n a n d i t d o e s n o t e n a b l e I R Q s a n d
* cannot g e t r e s c h e d u l e t i c k s .
*
* " trace" i s 0 f o r t h e N M I h a n d l e r o n l y , b e c a u s e i r q - t r a c i n g
* is f u n d a m e n t a l l y N M I - u n s a f e . ( w e c a n n o t c h a n g e t h e s o f t a n d
* hard f l a g s a t o n c e , a t o m i c a l l y )
* /
/* ebx: no swapgs flag */
ENTRY( p a r a n o i d _ e x i t )
2010-09-02 13:54:32 +01:00
DEFAULT_ F R A M E
2008-11-24 13:24:28 +01:00
DISABLE_ I N T E R R U P T S ( C L B R _ N O N E )
TRACE_ I R Q S _ O F F
testl % e b x ,% e b x / * s w a p g s n e e d e d ? * /
jnz p a r a n o i d _ r e s t o r e
testl $ 3 ,C S ( % r s p )
jnz p a r a n o i d _ u s e r s p a c e
paranoid_swapgs :
TRACE_ I R Q S _ I R E T Q 0
SWAPGS_ U N S A F E _ S T A C K
2009-04-17 08:33:52 -04:00
RESTORE_ A L L 8
jmp i r q _ r e t u r n
2008-11-24 13:24:28 +01:00
paranoid_restore :
2009-04-17 08:33:52 -04:00
TRACE_ I R Q S _ I R E T Q 0
2008-11-24 13:24:28 +01:00
RESTORE_ A L L 8
jmp i r q _ r e t u r n
paranoid_userspace :
GET_ T H R E A D _ I N F O ( % r c x )
movl T I _ f l a g s ( % r c x ) ,% e b x
andl $ _ T I F _ W O R K _ M A S K ,% e b x
jz p a r a n o i d _ s w a p g s
movq % r s p ,% r d i / * & p t _ r e g s * /
call s y n c _ r e g s
movq % r a x ,% r s p / * s w i t c h s t a c k f o r s c h e d u l i n g * /
testl $ _ T I F _ N E E D _ R E S C H E D ,% e b x
jnz p a r a n o i d _ s c h e d u l e
movl % e b x ,% e d x / * a r g 3 : t h r e a d f l a g s * /
TRACE_ I R Q S _ O N
ENABLE_ I N T E R R U P T S ( C L B R _ N O N E )
xorl % e s i ,% e s i / * a r g 2 : o l d s e t * /
movq % r s p ,% r d i / * a r g 1 : & p t _ r e g s * /
call d o _ n o t i f y _ r e s u m e
DISABLE_ I N T E R R U P T S ( C L B R _ N O N E )
TRACE_ I R Q S _ O F F
jmp p a r a n o i d _ u s e r s p a c e
paranoid_schedule :
TRACE_ I R Q S _ O N
ENABLE_ I N T E R R U P T S ( C L B R _ A N Y )
call s c h e d u l e
DISABLE_ I N T E R R U P T S ( C L B R _ A N Y )
TRACE_ I R Q S _ O F F
jmp p a r a n o i d _ u s e r s p a c e
CFI_ E N D P R O C
END( p a r a n o i d _ e x i t )
/ *
* Exception e n t r y p o i n t . T h i s e x p e c t s a n e r r o r c o d e / o r i g _ r a x o n t h e s t a c k .
* returns i n " n o s w a p g s f l a g " i n % e b x .
* /
ENTRY( e r r o r _ e n t r y )
XCPT_ F R A M E
CFI_ A D J U S T _ C F A _ O F F S E T 1 5 * 8
/* oldrax contains error code */
cld
movq_ c f i r d i , R D I + 8
movq_ c f i r s i , R S I + 8
movq_ c f i r d x , R D X + 8
movq_ c f i r c x , R C X + 8
movq_ c f i r a x , R A X + 8
movq_ c f i r8 , R 8 + 8
movq_ c f i r9 , R 9 + 8
movq_ c f i r10 , R 1 0 + 8
movq_ c f i r11 , R 1 1 + 8
movq_ c f i r b x , R B X + 8
movq_ c f i r b p , R B P + 8
movq_ c f i r12 , R 1 2 + 8
movq_ c f i r13 , R 1 3 + 8
movq_ c f i r14 , R 1 4 + 8
movq_ c f i r15 , R 1 5 + 8
xorl % e b x ,% e b x
testl $ 3 ,C S + 8 ( % r s p )
je e r r o r _ k e r n e l s p a c e
error_swapgs :
SWAPGS
error_sti :
TRACE_ I R Q S _ O F F
ret
/ *
* There a r e t w o p l a c e s i n t h e k e r n e l t h a t c a n p o t e n t i a l l y f a u l t w i t h
* usergs. H a n d l e t h e m h e r e . T h e e x c e p t i o n h a n d l e r s a f t e r i r e t r u n w i t h
* kernel g s a g a i n , s o d o n ' t s e t t h e u s e r s p a c e f l a g . B s t e p p i n g K 8 s
* sometimes r e p o r t a n t r u n c a t e d R I P f o r I R E T e x c e p t i o n s r e t u r n i n g t o
* compat m o d e . C h e c k f o r t h e s e h e r e t o o .
* /
error_kernelspace :
incl % e b x
leaq i r q _ r e t u r n ( % r i p ) ,% r c x
cmpq % r c x ,R I P + 8 ( % r s p )
je e r r o r _ s w a p g s
2009-10-12 10:18:23 -04:00
movl % e c x ,% e a x / * z e r o e x t e n d * /
cmpq % r a x ,R I P + 8 ( % r s p )
je b s t e p _ i r e t
2008-11-24 13:24:28 +01:00
cmpq $ g s _ c h a n g e ,R I P + 8 ( % r s p )
2008-11-27 21:10:08 +03:00
je e r r o r _ s w a p g s
2008-11-24 13:24:28 +01:00
jmp e r r o r _ s t i
2009-10-12 10:18:23 -04:00
bstep_iret :
/* Fix truncated RIP */
movq % r c x ,R I P + 8 ( % r s p )
2009-11-03 14:02:05 -05:00
jmp e r r o r _ s w a p g s
2010-09-02 13:52:45 +01:00
CFI_ E N D P R O C
2008-11-24 13:24:28 +01:00
END( e r r o r _ e n t r y )
/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
ENTRY( e r r o r _ e x i t )
DEFAULT_ F R A M E
movl % e b x ,% e a x
RESTORE_ R E S T
DISABLE_ I N T E R R U P T S ( C L B R _ N O N E )
TRACE_ I R Q S _ O F F
GET_ T H R E A D _ I N F O ( % r c x )
testl % e a x ,% e a x
jne r e t i n t _ k e r n e l
LOCKDEP_ S Y S _ E X I T _ I R Q
movl T I _ f l a g s ( % r c x ) ,% e d x
movl $ _ T I F _ W O R K _ M A S K ,% e d i
andl % e d i ,% e d x
jnz r e t i n t _ c a r e f u l
jmp r e t i n t _ s w a p g s
CFI_ E N D P R O C
END( e r r o r _ e x i t )
/* runs on exception stack */
ENTRY( n m i )
INTR_ F R A M E
PARAVIRT_ A D J U S T _ E X C E P T I O N _ F R A M E
pushq_ c f i $ - 1
2010-09-02 13:55:11 +01:00
subq $ O R I G _ R A X - R 1 5 , % r s p
CFI_ A D J U S T _ C F A _ O F F S E T O R I G _ R A X - R 1 5
2008-11-24 13:24:28 +01:00
call s a v e _ p a r a n o i d
DEFAULT_ F R A M E 0
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
movq % r s p ,% r d i
movq $ - 1 ,% r s i
call d o _ n m i
# ifdef C O N F I G _ T R A C E _ I R Q F L A G S
/* paranoidexit; without TRACE_IRQS_OFF */
/* ebx: no swapgs flag */
DISABLE_ I N T E R R U P T S ( C L B R _ N O N E )
testl % e b x ,% e b x / * s w a p g s n e e d e d ? * /
jnz n m i _ r e s t o r e
testl $ 3 ,C S ( % r s p )
jnz n m i _ u s e r s p a c e
nmi_swapgs :
SWAPGS_ U N S A F E _ S T A C K
nmi_restore :
RESTORE_ A L L 8
jmp i r q _ r e t u r n
nmi_userspace :
GET_ T H R E A D _ I N F O ( % r c x )
movl T I _ f l a g s ( % r c x ) ,% e b x
andl $ _ T I F _ W O R K _ M A S K ,% e b x
jz n m i _ s w a p g s
movq % r s p ,% r d i / * & p t _ r e g s * /
call s y n c _ r e g s
movq % r a x ,% r s p / * s w i t c h s t a c k f o r s c h e d u l i n g * /
testl $ _ T I F _ N E E D _ R E S C H E D ,% e b x
jnz n m i _ s c h e d u l e
movl % e b x ,% e d x / * a r g 3 : t h r e a d f l a g s * /
ENABLE_ I N T E R R U P T S ( C L B R _ N O N E )
xorl % e s i ,% e s i / * a r g 2 : o l d s e t * /
movq % r s p ,% r d i / * a r g 1 : & p t _ r e g s * /
call d o _ n o t i f y _ r e s u m e
DISABLE_ I N T E R R U P T S ( C L B R _ N O N E )
jmp n m i _ u s e r s p a c e
nmi_schedule :
ENABLE_ I N T E R R U P T S ( C L B R _ A N Y )
call s c h e d u l e
DISABLE_ I N T E R R U P T S ( C L B R _ A N Y )
jmp n m i _ u s e r s p a c e
CFI_ E N D P R O C
# else
jmp p a r a n o i d _ e x i t
2008-11-27 21:10:08 +03:00
CFI_ E N D P R O C
2008-11-24 13:24:28 +01:00
# endif
END( n m i )
ENTRY( i g n o r e _ s y s r e t )
CFI_ S T A R T P R O C
mov $ - E N O S Y S ,% e a x
sysret
CFI_ E N D P R O C
END( i g n o r e _ s y s r e t )
/ *
* End o f k p r o b e s s e c t i o n
* /
.popsection