2007-07-19 12:49:22 +04:00
# include < l i n u x / l i n k a g e . h >
# include < l i n u x / l g u e s t . h >
2007-10-22 05:03:36 +04:00
# include < a s m / l g u e s t _ h c a l l . h >
2007-07-19 12:49:22 +04:00
# include < a s m / a s m - o f f s e t s . h >
# include < a s m / t h r e a d _ i n f o . h >
2007-07-20 16:12:56 +04:00
# include < a s m / p r o c e s s o r - f l a g s . h >
2007-07-19 12:49:22 +04:00
2009-07-31 02:03:45 +04:00
/ * G : 0 2 0
* Our s t o r y s t a r t s w i t h t h e k e r n e l b o o t i n g i n t o s t a r t u p _ 3 2 i n
2008-03-28 19:05:53 +03:00
* arch/ x86 / k e r n e l / h e a d _ 3 2 . S . I t e x p e c t s a b o o t h e a d e r , w h i c h i s c r e a t e d b y
* the b o o t l o a d e r ( t h e L a u n c h e r i n o u r c a s e ) .
*
* The s t a r t u p _ 3 2 f u n c t i o n d o e s v e r y l i t t l e : i t c l e a r s t h e u n i n i t i a l i z e d g l o b a l
* C v a r i a b l e s w h i c h w e e x p e c t t o b e z e r o ( i e . B S S ) a n d t h e n c o p i e s t h e b o o t
* header a n d k e r n e l c o m m a n d l i n e s o m e w h e r e s a f e . F i n a l l y i t c h e c k s t h e
* ' hardware_ s u b a r c h ' f i e l d . T h i s w a s i n t r o d u c e d i n 2 . 6 . 2 4 f o r l g u e s t a n d X e n :
* if i t ' s s e t t o ' 1 ' ( l g u e s t ' s a s s i g n e d n u m b e r ) , t h e n i t c a l l s u s h e r e .
2007-10-22 05:03:36 +04:00
*
* WARNING : be v e r y c a r e f u l h e r e ! W e ' r e r u n n i n g a t a d d r e s s e s e q u a l t o p h y s i c a l
* addesses ( a r o u n d 0 ) , n o t a b o v e P A G E _ O F F S E T a s m o s t c o d e e x p e c t e s
* ( eg. 0 x C 0 0 0 0 0 0 0 ) . J u m p s a r e r e l a t i v e , s o t h e y ' r e O K , b u t w e c a n ' t t o u c h a n y
2008-03-28 19:05:53 +03:00
* data w i t h o u t r e m e m b e r i n g t o s u b t r a c t _ _ P A G E _ O F F S E T !
2007-07-19 12:49:22 +04:00
*
2007-07-26 21:41:02 +04:00
* The . s e c t i o n l i n e p u t s t h i s c o d e i n . i n i t . t e x t s o i t w i l l b e d i s c a r d e d a f t e r
2009-07-31 02:03:45 +04:00
* boot.
* /
2007-07-19 12:49:22 +04:00
.section .init .text , " ax" , @progbits
2007-10-22 05:29:44 +04:00
ENTRY( l g u e s t _ e n t r y )
2009-07-31 02:03:45 +04:00
/ *
* We m a k e t h e " i n i t i a l i z a t i o n " h y p e r c a l l n o w t o t e l l t h e H o s t a b o u t
* us, a n d a l s o f i n d o u t w h e r e i t p u t o u r p a g e t a b l e s .
* /
2007-10-22 05:03:36 +04:00
movl $ L H C A L L _ L G U E S T _ I N I T , % e a x
2009-03-14 18:37:52 +03:00
movl $ l g u e s t _ d a t a - _ _ P A G E _ O F F S E T , % e b x
2010-04-15 07:43:54 +04:00
int $ L G U E S T _ T R A P _ E N T R Y
2007-10-22 05:03:36 +04:00
/* Set up the initial stack so we can run C code. */
movl $ ( i n i t _ t h r e a d _ u n i o n + T H R E A D _ S I Z E ) ,% e s p
2009-07-31 02:03:45 +04:00
/* Jumps are relative: we're running __PAGE_OFFSET too low. */
2007-10-22 05:03:36 +04:00
jmp l g u e s t _ i n i t + _ _ P A G E _ O F F S E T
2007-07-19 12:49:22 +04:00
2009-07-31 02:03:45 +04:00
/ * G : 0 5 5
* We c r e a t e a m a c r o w h i c h p u t s t h e a s s e m b l e r c o d e b e t w e e n l g s t a r t _ a n d l g e n d _
* markers. T h e s e t e m p l a t e s a r e p u t i n t h e . t e x t s e c t i o n : t h e y c a n ' t b e
* discarded a f t e r b o o t a s w e m a y n e e d t o p a t c h m o d u l e s , t o o .
* /
2007-09-25 08:24:44 +04:00
.text
2007-07-19 12:49:22 +04:00
# define L G U E S T _ P A T C H ( n a m e , i n s n s . . . ) \
lgstart_ ## n a m e : i n s n s ; l g e n d _ # # n a m e : ; \
.globl lgstart_ ## n a m e ; . g l o b l l g e n d _ # # n a m e
LGUEST_ P A T C H ( c l i , m o v l $ 0 , l g u e s t _ d a t a + L G U E S T _ D A T A _ i r q _ e n a b l e d )
LGUEST_ P A T C H ( p u s h f , m o v l l g u e s t _ d a t a + L G U E S T _ D A T A _ i r q _ e n a b l e d , % e a x )
lguest: optimize by coding restore_flags and irq_enable in assembler.
The downside of the last patch which made restore_flags and irq_enable
check interrupts is that they are now too big to be patched directly
into the callsites, so the C versions are always used.
But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all
the registers. In fact, we don't need any registers in the fast path,
so we can do better than this if we actually code them in assembler.
The results are in the noise, but since it's about the same amount of
code, it's worth applying.
1GB Guest->Host: input(suppressed),output(suppressed)
Before:
Seconds: 0:16.53
Packets: 377268,753673
Interrupts: 22461,24297
Notifications: 1(5245),21303(732370)
Net IRQs triggered: 377023(245),42578(711095)
After:
Seconds: 0:16.48
Packets: 377289,753673
Interrupts: 22281,24465
Notifications: 1(5245),21296(732377)
Net IRQs triggered: 377060(229),42564(711109)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2009-06-13 08:27:03 +04:00
2009-07-31 02:03:45 +04:00
/ * G : 0 3 3
* But u s i n g t h o s e w r a p p e r s i s i n e f f i c i e n t ( w e ' l l s e e w h y t h a t d o e s n ' t m a t t e r
* for s a v e _ f l a n d i r q _ d i s a b l e l a t e r ) . I f w e w r i t e o u r r o u t i n e s c a r e f u l l y i n
* assembler, w e c a n a v o i d c l o b b e r i n g a n y r e g i s t e r s a n d a v o i d j u m p i n g t h r o u g h
* the w r a p p e r f u n c t i o n s .
lguest: optimize by coding restore_flags and irq_enable in assembler.
The downside of the last patch which made restore_flags and irq_enable
check interrupts is that they are now too big to be patched directly
into the callsites, so the C versions are always used.
But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all
the registers. In fact, we don't need any registers in the fast path,
so we can do better than this if we actually code them in assembler.
The results are in the noise, but since it's about the same amount of
code, it's worth applying.
1GB Guest->Host: input(suppressed),output(suppressed)
Before:
Seconds: 0:16.53
Packets: 377268,753673
Interrupts: 22461,24297
Notifications: 1(5245),21303(732370)
Net IRQs triggered: 377023(245),42578(711095)
After:
Seconds: 0:16.48
Packets: 377289,753673
Interrupts: 22281,24465
Notifications: 1(5245),21296(732377)
Net IRQs triggered: 377060(229),42564(711109)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2009-06-13 08:27:03 +04:00
*
* I s k i p p e d o v e r o u r f i r s t p i e c e o f a s s e m b l e r , b u t t h i s o n e i s w o r t h s t u d y i n g
2009-07-31 02:03:45 +04:00
* in a b i t m o r e d e t a i l s o I ' l l d e s c r i b e i n e a s y s t a g e s . F i r s t , t h e r o u t i n e t o
* enable i n t e r r u p t s :
* /
lguest: optimize by coding restore_flags and irq_enable in assembler.
The downside of the last patch which made restore_flags and irq_enable
check interrupts is that they are now too big to be patched directly
into the callsites, so the C versions are always used.
But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all
the registers. In fact, we don't need any registers in the fast path,
so we can do better than this if we actually code them in assembler.
The results are in the noise, but since it's about the same amount of
code, it's worth applying.
1GB Guest->Host: input(suppressed),output(suppressed)
Before:
Seconds: 0:16.53
Packets: 377268,753673
Interrupts: 22461,24297
Notifications: 1(5245),21303(732370)
Net IRQs triggered: 377023(245),42578(711095)
After:
Seconds: 0:16.48
Packets: 377289,753673
Interrupts: 22281,24465
Notifications: 1(5245),21296(732377)
Net IRQs triggered: 377060(229),42564(711109)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2009-06-13 08:27:03 +04:00
ENTRY( l g _ i r q _ e n a b l e )
2009-07-31 02:03:45 +04:00
/ *
* The r e v e r s e o f i r q _ d i s a b l e , t h i s s e t s l g u e s t _ d a t a . i r q _ e n a b l e d t o
* X8 6 _ E F L A G S _ I F ( i e . " I n t e r r u p t s e n a b l e d " ) .
* /
lguest: optimize by coding restore_flags and irq_enable in assembler.
The downside of the last patch which made restore_flags and irq_enable
check interrupts is that they are now too big to be patched directly
into the callsites, so the C versions are always used.
But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all
the registers. In fact, we don't need any registers in the fast path,
so we can do better than this if we actually code them in assembler.
The results are in the noise, but since it's about the same amount of
code, it's worth applying.
1GB Guest->Host: input(suppressed),output(suppressed)
Before:
Seconds: 0:16.53
Packets: 377268,753673
Interrupts: 22461,24297
Notifications: 1(5245),21303(732370)
Net IRQs triggered: 377023(245),42578(711095)
After:
Seconds: 0:16.48
Packets: 377289,753673
Interrupts: 22281,24465
Notifications: 1(5245),21296(732377)
Net IRQs triggered: 377060(229),42564(711109)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2009-06-13 08:27:03 +04:00
movl $ X 8 6 _ E F L A G S _ I F , l g u e s t _ d a t a + L G U E S T _ D A T A _ i r q _ e n a b l e d
2009-07-31 02:03:45 +04:00
/ *
* But n o w w e n e e d t o c h e c k i f t h e H o s t w a n t s t o k n o w : t h e r e m i g h t h a v e
lguest: optimize by coding restore_flags and irq_enable in assembler.
The downside of the last patch which made restore_flags and irq_enable
check interrupts is that they are now too big to be patched directly
into the callsites, so the C versions are always used.
But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all
the registers. In fact, we don't need any registers in the fast path,
so we can do better than this if we actually code them in assembler.
The results are in the noise, but since it's about the same amount of
code, it's worth applying.
1GB Guest->Host: input(suppressed),output(suppressed)
Before:
Seconds: 0:16.53
Packets: 377268,753673
Interrupts: 22461,24297
Notifications: 1(5245),21303(732370)
Net IRQs triggered: 377023(245),42578(711095)
After:
Seconds: 0:16.48
Packets: 377289,753673
Interrupts: 22281,24465
Notifications: 1(5245),21296(732377)
Net IRQs triggered: 377060(229),42564(711109)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2009-06-13 08:27:03 +04:00
* been i n t e r r u p t s w a i t i n g t o b e d e l i v e r e d , i n w h i c h c a s e i t w i l l h a v e
* set l g u e s t _ d a t a . i r q _ p e n d i n g t o X 8 6 _ E F L A G S _ I F . I f i t ' s n o t z e r o , w e
2009-07-31 02:03:45 +04:00
* jump t o s e n d _ i n t e r r u p t s , o t h e r w i s e w e ' r e d o n e .
* /
lguest: optimize by coding restore_flags and irq_enable in assembler.
The downside of the last patch which made restore_flags and irq_enable
check interrupts is that they are now too big to be patched directly
into the callsites, so the C versions are always used.
But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all
the registers. In fact, we don't need any registers in the fast path,
so we can do better than this if we actually code them in assembler.
The results are in the noise, but since it's about the same amount of
code, it's worth applying.
1GB Guest->Host: input(suppressed),output(suppressed)
Before:
Seconds: 0:16.53
Packets: 377268,753673
Interrupts: 22461,24297
Notifications: 1(5245),21303(732370)
Net IRQs triggered: 377023(245),42578(711095)
After:
Seconds: 0:16.48
Packets: 377289,753673
Interrupts: 22281,24465
Notifications: 1(5245),21296(732377)
Net IRQs triggered: 377060(229),42564(711109)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2009-06-13 08:27:03 +04:00
testl $ 0 , l g u e s t _ d a t a + L G U E S T _ D A T A _ i r q _ p e n d i n g
jnz s e n d _ i n t e r r u p t s
2009-07-31 02:03:45 +04:00
/ *
* One c o o l t h i n g a b o u t x86 i s t h a t y o u c a n d o m a n y t h i n g s w i t h o u t u s i n g
lguest: optimize by coding restore_flags and irq_enable in assembler.
The downside of the last patch which made restore_flags and irq_enable
check interrupts is that they are now too big to be patched directly
into the callsites, so the C versions are always used.
But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all
the registers. In fact, we don't need any registers in the fast path,
so we can do better than this if we actually code them in assembler.
The results are in the noise, but since it's about the same amount of
code, it's worth applying.
1GB Guest->Host: input(suppressed),output(suppressed)
Before:
Seconds: 0:16.53
Packets: 377268,753673
Interrupts: 22461,24297
Notifications: 1(5245),21303(732370)
Net IRQs triggered: 377023(245),42578(711095)
After:
Seconds: 0:16.48
Packets: 377289,753673
Interrupts: 22281,24465
Notifications: 1(5245),21296(732377)
Net IRQs triggered: 377060(229),42564(711109)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2009-06-13 08:27:03 +04:00
* a r e g i s t e r . I n t h i s c a s e , t h e n o r m a l p a t h h a s n ' t n e e d e d t o s a v e o r
2009-07-31 02:03:45 +04:00
* restore a n y r e g i s t e r s a t a l l !
* /
lguest: optimize by coding restore_flags and irq_enable in assembler.
The downside of the last patch which made restore_flags and irq_enable
check interrupts is that they are now too big to be patched directly
into the callsites, so the C versions are always used.
But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all
the registers. In fact, we don't need any registers in the fast path,
so we can do better than this if we actually code them in assembler.
The results are in the noise, but since it's about the same amount of
code, it's worth applying.
1GB Guest->Host: input(suppressed),output(suppressed)
Before:
Seconds: 0:16.53
Packets: 377268,753673
Interrupts: 22461,24297
Notifications: 1(5245),21303(732370)
Net IRQs triggered: 377023(245),42578(711095)
After:
Seconds: 0:16.48
Packets: 377289,753673
Interrupts: 22281,24465
Notifications: 1(5245),21296(732377)
Net IRQs triggered: 377060(229),42564(711109)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2009-06-13 08:27:03 +04:00
ret
send_interrupts :
2009-07-31 02:03:45 +04:00
/ *
* OK, n o w w e n e e d a r e g i s t e r : e a x i s u s e d f o r t h e h y p e r c a l l n u m b e r ,
lguest: optimize by coding restore_flags and irq_enable in assembler.
The downside of the last patch which made restore_flags and irq_enable
check interrupts is that they are now too big to be patched directly
into the callsites, so the C versions are always used.
But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all
the registers. In fact, we don't need any registers in the fast path,
so we can do better than this if we actually code them in assembler.
The results are in the noise, but since it's about the same amount of
code, it's worth applying.
1GB Guest->Host: input(suppressed),output(suppressed)
Before:
Seconds: 0:16.53
Packets: 377268,753673
Interrupts: 22461,24297
Notifications: 1(5245),21303(732370)
Net IRQs triggered: 377023(245),42578(711095)
After:
Seconds: 0:16.48
Packets: 377289,753673
Interrupts: 22281,24465
Notifications: 1(5245),21296(732377)
Net IRQs triggered: 377060(229),42564(711109)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2009-06-13 08:27:03 +04:00
* which i s L H C A L L _ S E N D _ I N T E R R U P T S .
*
* We u s e d n o t t o b o t h e r w i t h t h i s p e n d i n g d e t e c t i o n a t a l l , w h i c h w a s
* much s i m p l e r . S o o n e r o r l a t e r t h e H o s t w o u l d r e a l i z e i t h a d t o
* send u s a n i n t e r r u p t . B u t t h a t t u r n s o u t t o m a k e p e r f o r m a n c e 7
* times w o r s e o n a s i m p l e t c p b e n c h m a r k . S o n o w w e d o t h i s t h e h a r d
2009-07-31 02:03:45 +04:00
* way.
* /
lguest: optimize by coding restore_flags and irq_enable in assembler.
The downside of the last patch which made restore_flags and irq_enable
check interrupts is that they are now too big to be patched directly
into the callsites, so the C versions are always used.
But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all
the registers. In fact, we don't need any registers in the fast path,
so we can do better than this if we actually code them in assembler.
The results are in the noise, but since it's about the same amount of
code, it's worth applying.
1GB Guest->Host: input(suppressed),output(suppressed)
Before:
Seconds: 0:16.53
Packets: 377268,753673
Interrupts: 22461,24297
Notifications: 1(5245),21303(732370)
Net IRQs triggered: 377023(245),42578(711095)
After:
Seconds: 0:16.48
Packets: 377289,753673
Interrupts: 22281,24465
Notifications: 1(5245),21296(732377)
Net IRQs triggered: 377060(229),42564(711109)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2009-06-13 08:27:03 +04:00
pushl % e a x
movl $ L H C A L L _ S E N D _ I N T E R R U P T S , % e a x
2009-07-31 02:03:45 +04:00
/ *
* This i s a v m c a l l i n s t r u c t i o n ( s a m e t h i n g t h a t K V M u s e s ) . O l d e r
lguest: optimize by coding restore_flags and irq_enable in assembler.
The downside of the last patch which made restore_flags and irq_enable
check interrupts is that they are now too big to be patched directly
into the callsites, so the C versions are always used.
But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all
the registers. In fact, we don't need any registers in the fast path,
so we can do better than this if we actually code them in assembler.
The results are in the noise, but since it's about the same amount of
code, it's worth applying.
1GB Guest->Host: input(suppressed),output(suppressed)
Before:
Seconds: 0:16.53
Packets: 377268,753673
Interrupts: 22461,24297
Notifications: 1(5245),21303(732370)
Net IRQs triggered: 377023(245),42578(711095)
After:
Seconds: 0:16.48
Packets: 377289,753673
Interrupts: 22281,24465
Notifications: 1(5245),21296(732377)
Net IRQs triggered: 377060(229),42564(711109)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2009-06-13 08:27:03 +04:00
* assembler v e r s i o n s m i g h t n o t k n o w t h e " v m c a l l " i n s t r u c t i o n , s o w e
2009-07-31 02:03:45 +04:00
* create o n e m a n u a l l y h e r e .
* /
lguest: optimize by coding restore_flags and irq_enable in assembler.
The downside of the last patch which made restore_flags and irq_enable
check interrupts is that they are now too big to be patched directly
into the callsites, so the C versions are always used.
But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all
the registers. In fact, we don't need any registers in the fast path,
so we can do better than this if we actually code them in assembler.
The results are in the noise, but since it's about the same amount of
code, it's worth applying.
1GB Guest->Host: input(suppressed),output(suppressed)
Before:
Seconds: 0:16.53
Packets: 377268,753673
Interrupts: 22461,24297
Notifications: 1(5245),21303(732370)
Net IRQs triggered: 377023(245),42578(711095)
After:
Seconds: 0:16.48
Packets: 377289,753673
Interrupts: 22281,24465
Notifications: 1(5245),21296(732377)
Net IRQs triggered: 377060(229),42564(711109)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2009-06-13 08:27:03 +04:00
.byte 0 x0 f ,0 x01 ,0 x c1 / * K V M _ H Y P E R C A L L * /
2009-07-31 02:03:45 +04:00
/* Put eax back the way we found it. */
lguest: optimize by coding restore_flags and irq_enable in assembler.
The downside of the last patch which made restore_flags and irq_enable
check interrupts is that they are now too big to be patched directly
into the callsites, so the C versions are always used.
But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all
the registers. In fact, we don't need any registers in the fast path,
so we can do better than this if we actually code them in assembler.
The results are in the noise, but since it's about the same amount of
code, it's worth applying.
1GB Guest->Host: input(suppressed),output(suppressed)
Before:
Seconds: 0:16.53
Packets: 377268,753673
Interrupts: 22461,24297
Notifications: 1(5245),21303(732370)
Net IRQs triggered: 377023(245),42578(711095)
After:
Seconds: 0:16.48
Packets: 377289,753673
Interrupts: 22281,24465
Notifications: 1(5245),21296(732377)
Net IRQs triggered: 377060(229),42564(711109)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2009-06-13 08:27:03 +04:00
popl % e a x
ret
2009-07-31 02:03:45 +04:00
/ *
* Finally, t h e " p o p f " o r " r e s t o r e f l a g s " r o u t i n e . T h e % e a x r e g i s t e r h o l d s t h e
lguest: optimize by coding restore_flags and irq_enable in assembler.
The downside of the last patch which made restore_flags and irq_enable
check interrupts is that they are now too big to be patched directly
into the callsites, so the C versions are always used.
But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all
the registers. In fact, we don't need any registers in the fast path,
so we can do better than this if we actually code them in assembler.
The results are in the noise, but since it's about the same amount of
code, it's worth applying.
1GB Guest->Host: input(suppressed),output(suppressed)
Before:
Seconds: 0:16.53
Packets: 377268,753673
Interrupts: 22461,24297
Notifications: 1(5245),21303(732370)
Net IRQs triggered: 377023(245),42578(711095)
After:
Seconds: 0:16.48
Packets: 377289,753673
Interrupts: 22281,24465
Notifications: 1(5245),21296(732377)
Net IRQs triggered: 377060(229),42564(711109)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2009-06-13 08:27:03 +04:00
* flags ( i n p r a c t i c e , e i t h e r X 8 6 _ E F L A G S _ I F o r 0 ) : i f i t ' s X 8 6 _ E F L A G S _ I F w e ' r e
2009-07-31 02:03:45 +04:00
* enabling i n t e r r u p t s a g a i n , i f i t ' s 0 w e ' r e l e a v i n g t h e m o f f .
* /
lguest: optimize by coding restore_flags and irq_enable in assembler.
The downside of the last patch which made restore_flags and irq_enable
check interrupts is that they are now too big to be patched directly
into the callsites, so the C versions are always used.
But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all
the registers. In fact, we don't need any registers in the fast path,
so we can do better than this if we actually code them in assembler.
The results are in the noise, but since it's about the same amount of
code, it's worth applying.
1GB Guest->Host: input(suppressed),output(suppressed)
Before:
Seconds: 0:16.53
Packets: 377268,753673
Interrupts: 22461,24297
Notifications: 1(5245),21303(732370)
Net IRQs triggered: 377023(245),42578(711095)
After:
Seconds: 0:16.48
Packets: 377289,753673
Interrupts: 22281,24465
Notifications: 1(5245),21296(732377)
Net IRQs triggered: 377060(229),42564(711109)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2009-06-13 08:27:03 +04:00
ENTRY( l g _ r e s t o r e _ f l )
/* This is just "lguest_data.irq_enabled = flags;" */
movl % e a x , l g u e s t _ d a t a + L G U E S T _ D A T A _ i r q _ e n a b l e d
2009-07-31 02:03:45 +04:00
/ *
* Now, i f t h e % e a x v a l u e h a s e n a b l e d i n t e r r u p t s a n d
lguest: optimize by coding restore_flags and irq_enable in assembler.
The downside of the last patch which made restore_flags and irq_enable
check interrupts is that they are now too big to be patched directly
into the callsites, so the C versions are always used.
But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all
the registers. In fact, we don't need any registers in the fast path,
so we can do better than this if we actually code them in assembler.
The results are in the noise, but since it's about the same amount of
code, it's worth applying.
1GB Guest->Host: input(suppressed),output(suppressed)
Before:
Seconds: 0:16.53
Packets: 377268,753673
Interrupts: 22461,24297
Notifications: 1(5245),21303(732370)
Net IRQs triggered: 377023(245),42578(711095)
After:
Seconds: 0:16.48
Packets: 377289,753673
Interrupts: 22281,24465
Notifications: 1(5245),21296(732377)
Net IRQs triggered: 377060(229),42564(711109)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2009-06-13 08:27:03 +04:00
* lguest_ d a t a . i r q _ p e n d i n g i s s e t , w e w a n t t o t e l l t h e H o s t s o i t c a n
* deliver a n y o u t s t a n d i n g i n t e r r u p t s . F o r t u n a t e l y , b o t h v a l u e s w i l l
* be X 8 6 _ E F L A G S _ I F ( i e . 5 1 2 ) i n t h a t c a s e , a n d t h e " t e s t l "
* instruction w i l l A N D t h e m t o g e t h e r f o r u s . I f b o t h a r e s e t , w e
2009-07-31 02:03:45 +04:00
* jump t o s e n d _ i n t e r r u p t s .
* /
lguest: optimize by coding restore_flags and irq_enable in assembler.
The downside of the last patch which made restore_flags and irq_enable
check interrupts is that they are now too big to be patched directly
into the callsites, so the C versions are always used.
But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all
the registers. In fact, we don't need any registers in the fast path,
so we can do better than this if we actually code them in assembler.
The results are in the noise, but since it's about the same amount of
code, it's worth applying.
1GB Guest->Host: input(suppressed),output(suppressed)
Before:
Seconds: 0:16.53
Packets: 377268,753673
Interrupts: 22461,24297
Notifications: 1(5245),21303(732370)
Net IRQs triggered: 377023(245),42578(711095)
After:
Seconds: 0:16.48
Packets: 377289,753673
Interrupts: 22281,24465
Notifications: 1(5245),21296(732377)
Net IRQs triggered: 377060(229),42564(711109)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2009-06-13 08:27:03 +04:00
testl l g u e s t _ d a t a + L G U E S T _ D A T A _ i r q _ p e n d i n g , % e a x
jnz s e n d _ i n t e r r u p t s
/* Again, the normal path has used no extra registers. Clever, huh? */
ret
2009-07-31 02:03:45 +04:00
/*:*/
2007-07-19 12:49:22 +04:00
/* These demark the EIP range where host should never deliver interrupts. */
.global lguest_noirq_start
.global lguest_noirq_end
2009-07-31 02:03:45 +04:00
/ * M : 0 0 4
* When t h e H o s t r e f l e c t s a t r a p o r i n j e c t s a n i n t e r r u p t i n t o t h e G u e s t , i t
* sets t h e e f l a g s i n t e r r u p t b i t o n t h e s t a c k b a s e d o n l g u e s t _ d a t a . i r q _ e n a b l e d ,
* so t h e G u e s t i r e t l o g i c d o e s t h e r i g h t t h i n g w h e n r e s t o r i n g i t . H o w e v e r ,
* when t h e H o s t s e t s t h e G u e s t u p f o r d i r e c t t r a p s , s u c h a s s y s t e m c a l l s , t h e
* processor i s t h e o n e t o p u s h e f l a g s o n t o t h e s t a c k , a n d t h e i n t e r r u p t b i t
* will b e 1 ( i n r e a l i t y , i n t e r r u p t s a r e a l w a y s e n a b l e d i n t h e G u e s t ) .
2007-07-26 21:41:05 +04:00
*
* This t u r n s o u t t o b e h a r m l e s s : t h e o n l y t r a p w h i c h s h o u l d h a p p e n u n d e r L i n u x
* with i n t e r r u p t s d i s a b l e d i s P a g e F a u l t ( d u e t o o u r l a z y m a p p i n g o f v m a l l o c
* regions) , w h i c h h a s t o b e r e f l e c t e d t h r o u g h t h e H o s t a n y w a y . I f a n o t h e r
* trap * d o e s * g o o f f w h e n i n t e r r u p t s a r e d i s a b l e d , t h e G u e s t w i l l p a n i c , a n d
2009-07-31 02:03:45 +04:00
* we' l l n e v e r g e t t o t h i s i r e t !
: * /
2007-07-26 21:41:05 +04:00
2009-07-31 02:03:45 +04:00
/ * G : 0 4 5
* There i s o n e f i n a l p a r a v i r t _ o p t h a t t h e G u e s t i m p l e m e n t s , a n d g l a n c i n g a t i t
* you c a n s e e w h y I l e f t i t t o l a s t . I t ' s * c o o l * ! I t ' s i n * a s s e m b l e r * !
2007-07-26 21:41:02 +04:00
*
* The " i r e t " i n s t r u c t i o n i s u s e d t o r e t u r n f r o m a n i n t e r r u p t o r t r a p . T h e
* stack l o o k s l i k e t h i s :
* old a d d r e s s
* old c o d e s e g m e n t & p r i v i l e g e l e v e l
* old p r o c e s s o r f l a g s ( " e f l a g s " )
*
* The " i r e t " i n s t r u c t i o n p o p s t h o s e v a l u e s o f f t h e s t a c k a n d r e s t o r e s t h e m a l l
* at o n c e . T h e o n l y p r o b l e m i s t h a t e f l a g s i n c l u d e s t h e I n t e r r u p t F l a g w h i c h
* the G u e s t c a n ' t c h a n g e : t h e C P U w i l l s i m p l y i g n o r e i t w h e n w e d o a n " i r e t " .
* So w e h a v e t o c o p y e f l a g s f r o m t h e s t a c k t o l g u e s t _ d a t a . i r q _ e n a b l e d b e f o r e
* we d o t h e " i r e t " .
*
* There a r e t w o p r o b l e m s w i t h t h i s : f i r s t l y , w e n e e d t o u s e a r e g i s t e r t o d o
* the c o p y a n d s e c o n d l y , t h e w h o l e t h i n g n e e d s t o b e a t o m i c . T h e f i r s t
* problem i s e a s y t o s o l v e : p u s h % e a x o n t h e s t a c k s o w e c a n u s e i t , a n d t h e n
* restore i t a t t h e e n d j u s t b e f o r e t h e r e a l " i r e t " .
*
* The s e c o n d i s h a r d e r : c o p y i n g e f l a g s t o l g u e s t _ d a t a . i r q _ e n a b l e d w i l l t u r n
* interrupts o n b e f o r e w e ' r e f i n i s h e d , s o w e c o u l d b e i n t e r r u p t e d b e f o r e w e
* return t o u s e r s p a c e o r w h e r e v e r . O u r s o l u t i o n t o t h i s i s t o s u r r o u n d t h e
* code w i t h l g u e s t _ n o i r q _ s t a r t : a n d l g u e s t _ n o i r q _ e n d : l a b e l s . W e t e l l t h e
* Host t h a t i t i s * n e v e r * t o i n t e r r u p t u s t h e r e , e v e n i f i n t e r r u p t s s e e m t o b e
2009-07-31 02:03:45 +04:00
* enabled.
* /
2007-07-19 12:49:22 +04:00
ENTRY( l g u e s t _ i r e t )
pushl % e a x
movl 1 2 ( % e s p ) , % e a x
lguest_noirq_start :
2009-07-31 02:03:45 +04:00
/ *
* Note t h e % s s : s e g m e n t p r e f i x h e r e . N o r m a l d a t a a c c e s s e s u s e t h e
2007-07-26 21:41:02 +04:00
* " ds" s e g m e n t , b u t t h a t w i l l h a v e a l r e a d y b e e n r e s t o r e d f o r w h a t e v e r
* we' r e r e t u r n i n g t o ( s u c h a s u s e r s p a c e ) : w e c a n ' t t r u s t i t . T h e % s s :
2009-07-31 02:03:45 +04:00
* prefix m a k e s s u r e w e u s e t h e s t a c k s e g m e n t , w h i c h i s s t i l l v a l i d .
* /
2007-07-19 12:49:22 +04:00
movl % e a x ,% s s : l g u e s t _ d a t a + L G U E S T _ D A T A _ i r q _ e n a b l e d
popl % e a x
iret
lguest_noirq_end :