2009-06-02 21:17:37 +00:00
# include < a s m / p r o c e s s o r . h >
2005-09-26 16:04:21 +10:00
# include < a s m / p p c _ a s m . h >
2005-10-10 22:20:10 +10:00
# include < a s m / r e g . h >
2009-06-02 21:17:37 +00:00
# include < a s m / a s m - o f f s e t s . h >
# include < a s m / c p u t a b l e . h >
# include < a s m / t h r e a d _ i n f o . h >
# include < a s m / p a g e . h >
2010-11-18 15:06:17 +00:00
# include < a s m / p t r a c e . h >
2016-01-13 23:33:46 -05:00
# include < a s m / e x p o r t . h >
2009-06-02 21:17:37 +00:00
2013-09-10 20:21:10 +10:00
/ *
* Load s t a t e f r o m m e m o r y i n t o V M X r e g i s t e r s i n c l u d i n g V S C R .
* Assumes t h e c a l l e r h a s e n a b l e d V M X i n t h e M S R .
* /
_ GLOBAL( l o a d _ v r _ s t a t e )
li r4 ,V R S T A T E _ V S C R
2015-02-10 09:51:22 +11:00
lvx v0 ,r4 ,r3
mtvscr v0
2013-09-10 20:21:10 +10:00
REST_ 3 2 V R S ( 0 ,r4 ,r3 )
blr
2016-01-13 23:33:46 -05:00
EXPORT_ S Y M B O L ( l o a d _ v r _ s t a t e )
2013-09-10 20:21:10 +10:00
/ *
* Store V M X s t a t e i n t o m e m o r y , i n c l u d i n g V S C R .
* Assumes t h e c a l l e r h a s e n a b l e d V M X i n t h e M S R .
* /
_ GLOBAL( s t o r e _ v r _ s t a t e )
SAVE_ 3 2 V R S ( 0 , r4 , r3 )
2015-02-10 09:51:22 +11:00
mfvscr v0
2013-09-10 20:21:10 +10:00
li r4 , V R S T A T E _ V S C R
2015-02-10 09:51:22 +11:00
stvx v0 , r4 , r3
2013-09-10 20:21:10 +10:00
blr
2016-01-13 23:33:46 -05:00
EXPORT_ S Y M B O L ( s t o r e _ v r _ s t a t e )
2013-09-10 20:21:10 +10:00
2009-06-02 21:17:37 +00:00
/ *
* Disable V M X f o r t h e t a s k w h i c h h a d i t p r e v i o u s l y ,
* and s a v e i t s v e c t o r r e g i s t e r s i n i t s t h r e a d _ s t r u c t .
* Enables t h e V M X f o r u s e i n t h e k e r n e l o n r e t u r n .
* On S M P w e k n o w t h e V M X i s f r e e , s i n c e w e g i v e i t u p e v e r y
* switch ( i e , n o l a z y s a v e o f t h e v e c t o r r e g i s t e r s ) .
2013-10-23 09:40:02 +01:00
*
* Note t h a t o n 3 2 - b i t t h i s c a n o n l y u s e r e g i s t e r s t h a t w i l l b e
* restored b y f a s t _ e x c e p t i o n _ r e t u r n , i . e . r3 - r6 , r10 a n d r11 .
2009-06-02 21:17:37 +00:00
* /
_ GLOBAL( l o a d _ u p _ a l t i v e c )
mfmsr r5 / * g r a b t h e c u r r e n t M S R * /
oris r5 ,r5 ,M S R _ V E C @h
MTMSRD( r5 ) / * e n a b l e u s e o f A l t i V e c n o w * /
isync
2016-05-20 04:41:34 +10:00
/ *
* While u s e r s p a c e i n g e n e r a l i g n o r e s V R S A V E , g l i b c u s e s i t a s a b o o l e a n
* to o p t i m i s e u s e r s p a c e c o n t e x t s a v e / r e s t o r e . W h e n e v e r w e t a k e a n
* altivec u n a v a i l a b l e e x c e p t i o n w e m u s t s e t V R S A V E t o s o m e t h i n g n o n
* zero. S e t i t t o a l l 1 s . S e e a l s o t h e p r o g r a m m i n g n o t e i n t h e I S A .
2009-06-02 21:17:37 +00:00
* /
mfspr r4 ,S P R N _ V R S A V E
2009-12-08 18:45:45 +00:00
cmpwi 0 ,r4 ,0
2009-06-02 21:17:37 +00:00
bne+ 1 f
li r4 ,- 1
mtspr S P R N _ V R S A V E ,r4
1 :
/* enable use of VMX after return */
# ifdef C O N F I G _ P P C 3 2
2009-07-14 20:52:54 +00:00
mfspr r5 ,S P R N _ S P R G _ T H R E A D / * c u r r e n t t a s k ' s T H R E A D ( p h y s ) * /
2009-06-02 21:17:37 +00:00
oris r9 ,r9 ,M S R _ V E C @h
# else
ld r4 ,P A C A C U R R E N T ( r13 )
addi r5 ,r4 ,T H R E A D / * G e t T H R E A D * /
oris r12 ,r12 ,M S R _ V E C @h
std r12 ,_ M S R ( r1 )
# endif
2016-02-29 17:53:47 +11:00
/* Don't care if r4 overflows, this is desired behaviour */
lbz r4 ,T H R E A D _ L O A D _ V E C ( r5 )
addi r4 ,r4 ,1
stb r4 ,T H R E A D _ L O A D _ V E C ( r5 )
2013-10-23 09:40:02 +01:00
addi r6 ,r5 ,T H R E A D _ V R S T A T E
2009-06-02 21:17:37 +00:00
li r4 ,1
2013-09-10 20:20:42 +10:00
li r10 ,V R S T A T E _ V S C R
2009-06-02 21:17:37 +00:00
stw r4 ,T H R E A D _ U S E D _ V R ( r5 )
2015-02-10 09:51:22 +11:00
lvx v0 ,r10 ,r6
mtvscr v0
2013-10-23 09:40:02 +01:00
REST_ 3 2 V R S ( 0 ,r4 ,r6 )
2009-06-02 21:17:37 +00:00
/* restore registers and return */
blr
/ *
2016-02-29 17:53:50 +11:00
* save_ a l t i v e c ( t s k )
* Save t h e v e c t o r r e g i s t e r s t o i t s t h r e a d _ s t r u c t
2009-06-02 21:17:37 +00:00
* /
2016-02-29 17:53:50 +11:00
_ GLOBAL( s a v e _ a l t i v e c )
2009-06-02 21:17:37 +00:00
addi r3 ,r3 ,T H R E A D / * w a n t T H R E A D o f t a s k * /
2013-09-10 20:21:10 +10:00
PPC_ L L r7 ,T H R E A D _ V R S A V E A R E A ( r3 )
2009-06-02 21:17:37 +00:00
PPC_ L L r5 ,P T _ R E G S ( r3 )
2013-09-10 20:21:10 +10:00
PPC_ L C M P I 0 ,r7 ,0
bne 2 f
addi r7 ,r3 ,T H R E A D _ V R S T A T E
2016-02-29 17:53:50 +11:00
2 : SAVE_ 3 2 V R S ( 0 ,r4 ,r7 )
2015-02-10 09:51:22 +11:00
mfvscr v0
2013-09-10 20:20:42 +10:00
li r4 ,V R S T A T E _ V S C R
2015-02-10 09:51:22 +11:00
stvx v0 ,r4 ,r7
2009-06-02 21:17:37 +00:00
blr
# ifdef C O N F I G _ V S X
# ifdef C O N F I G _ P P C 3 2
# error T h i s a s m c o d e i s n ' t r e a d y f o r 3 2 - b i t k e r n e l s
# endif
/ *
* load_ u p _ v s x ( u n u s e d , u n u s e d , t s k )
* Disable V S X f o r t h e t a s k w h i c h h a d i t p r e v i o u s l y ,
* and s a v e i t s v e c t o r r e g i s t e r s i n i t s t h r e a d _ s t r u c t .
* Reuse t h e f p a n d v s x s a v e s , b u t f i r s t c h e c k t o s e e i f t h e y h a v e
* been s a v e d a l r e a d y .
* /
_ GLOBAL( l o a d _ u p _ v s x )
/* Load FP and VSX registers if they haven't been done yet */
andi. r5 ,r12 ,M S R _ F P
beql+ l o a d _ u p _ f p u / * s k i p i f a l r e a d y l o a d e d * /
andis. r5 ,r12 ,M S R _ V E C @h
beql+ l o a d _ u p _ a l t i v e c / * s k i p i f a l r e a d y l o a d e d * /
ld r4 ,P A C A C U R R E N T ( r13 )
addi r4 ,r4 ,T H R E A D / * G e t T H R E A D * /
li r6 ,1
stw r6 ,T H R E A D _ U S E D _ V S R ( r4 ) / * . . . a l s o s e t t h r e a d u s e d v s r * /
/* enable use of VSX after return */
oris r12 ,r12 ,M S R _ V S X @h
std r12 ,_ M S R ( r1 )
b f a s t _ e x c e p t i o n _ r e t u r n
# endif / * C O N F I G _ V S X * /
2005-09-26 16:04:21 +10:00
/ *
* The r o u t i n e s b e l o w a r e i n a s s e m b l e r s o w e c a n c l o s e l y c o n t r o l t h e
* usage o f f l o a t i n g - p o i n t r e g i s t e r s . T h e s e r o u t i n e s m u s t b e c a l l e d
* with p r e e m p t d i s a b l e d .
* /
# ifdef C O N F I G _ P P C 3 2
.data
fpzero :
.long 0
fpone :
.long 0x3f800000 /* 1.0 in single-precision FP */
fphalf :
.long 0x3f000000 /* 0.5 in single-precision FP */
# define L D C O N S T ( f r , n a m e ) \
lis r11 ,n a m e @ha; \
lfs f r ,n a m e @l(r11)
# else
.section " .toc " , " aw"
fpzero :
.tc FD_ 0 _ 0 [ T C ] ,0
fpone :
.tc FD_ 3 f f00 0 0 0 _ 0 [ T C ] ,0 x3 f f00 0 0 0 0 0 0 0 0 0 0 0 / * 1 . 0 * /
fphalf :
.tc FD_ 3 f e 0 0 0 0 0 _ 0 [ T C ] ,0 x3 f e 0 0 0 0 0 0 0 0 0 0 0 0 0 / * 0 . 5 * /
# define L D C O N S T ( f r , n a m e ) \
lfd f r ,n a m e @toc(r2)
# endif
.text
/ *
* Internal r o u t i n e t o e n a b l e f l o a t i n g p o i n t a n d s e t F P S C R t o 0 .
* Don' t c a l l i t f r o m C ; it doesn't use the normal calling convention.
* /
fpenable :
# ifdef C O N F I G _ P P C 3 2
stwu r1 ,- 6 4 ( r1 )
# else
stdu r1 ,- 6 4 ( r1 )
# endif
mfmsr r10
ori r11 ,r10 ,M S R _ F P
mtmsr r11
isync
stfd f r0 ,2 4 ( r1 )
stfd f r1 ,1 6 ( r1 )
stfd f r31 ,8 ( r1 )
LDCONST( f r1 , f p z e r o )
mffs f r31
2006-06-10 20:18:39 +10:00
MTFSF_ L ( f r1 )
2005-09-26 16:04:21 +10:00
blr
fpdisable :
mtlr r12
2006-06-10 20:18:39 +10:00
MTFSF_ L ( f r31 )
2005-09-26 16:04:21 +10:00
lfd f r31 ,8 ( r1 )
lfd f r1 ,1 6 ( r1 )
lfd f r0 ,2 4 ( r1 )
mtmsr r10
isync
addi r1 ,r1 ,6 4
blr
/ *
* Vector a d d , f l o a t i n g p o i n t .
* /
_ GLOBAL( v a d d f p )
mflr r12
bl f p e n a b l e
li r0 ,4
mtctr r0
li r6 ,0
1 : lfsx f r0 ,r4 ,r6
lfsx f r1 ,r5 ,r6
fadds f r0 ,f r0 ,f r1
stfsx f r0 ,r3 ,r6
addi r6 ,r6 ,4
bdnz 1 b
b f p d i s a b l e
/ *
* Vector s u b t r a c t , f l o a t i n g p o i n t .
* /
_ GLOBAL( v s u b f p )
mflr r12
bl f p e n a b l e
li r0 ,4
mtctr r0
li r6 ,0
1 : lfsx f r0 ,r4 ,r6
lfsx f r1 ,r5 ,r6
fsubs f r0 ,f r0 ,f r1
stfsx f r0 ,r3 ,r6
addi r6 ,r6 ,4
bdnz 1 b
b f p d i s a b l e
/ *
* Vector m u l t i p l y a n d a d d , f l o a t i n g p o i n t .
* /
_ GLOBAL( v m a d d f p )
mflr r12
bl f p e n a b l e
stfd f r2 ,3 2 ( r1 )
li r0 ,4
mtctr r0
li r7 ,0
1 : lfsx f r0 ,r4 ,r7
lfsx f r1 ,r5 ,r7
lfsx f r2 ,r6 ,r7
fmadds f r0 ,f r0 ,f r2 ,f r1
stfsx f r0 ,r3 ,r7
addi r7 ,r7 ,4
bdnz 1 b
lfd f r2 ,3 2 ( r1 )
b f p d i s a b l e
/ *
* Vector n e g a t i v e m u l t i p l y a n d s u b t r a c t , f l o a t i n g p o i n t .
* /
_ GLOBAL( v n m s u b f p )
mflr r12
bl f p e n a b l e
stfd f r2 ,3 2 ( r1 )
li r0 ,4
mtctr r0
li r7 ,0
1 : lfsx f r0 ,r4 ,r7
lfsx f r1 ,r5 ,r7
lfsx f r2 ,r6 ,r7
fnmsubs f r0 ,f r0 ,f r2 ,f r1
stfsx f r0 ,r3 ,r7
addi r7 ,r7 ,4
bdnz 1 b
lfd f r2 ,3 2 ( r1 )
b f p d i s a b l e
/ *
* Vector r e c i p r o c a l e s t i m a t e . W e j u s t c o m p u t e 1 . 0 / x .
* r3 - > d e s t i n a t i o n , r4 - > s o u r c e .
* /
_ GLOBAL( v r e f p )
mflr r12
bl f p e n a b l e
li r0 ,4
LDCONST( f r1 , f p o n e )
mtctr r0
li r6 ,0
1 : lfsx f r0 ,r4 ,r6
fdivs f r0 ,f r1 ,f r0
stfsx f r0 ,r3 ,r6
addi r6 ,r6 ,4
bdnz 1 b
b f p d i s a b l e
/ *
* Vector r e c i p r o c a l s q u a r e - r o o t e s t i m a t e , f l o a t i n g p o i n t .
* We u s e t h e f r s q r t e i n s t r u c t i o n f o r t h e i n i t i a l e s t i m a t e f o l l o w e d
* by 2 i t e r a t i o n s o f N e w t o n - R a p h s o n t o g e t s u f f i c i e n t a c c u r a c y .
* r3 - > d e s t i n a t i o n , r4 - > s o u r c e .
* /
_ GLOBAL( v r s q r t e f p )
mflr r12
bl f p e n a b l e
stfd f r2 ,3 2 ( r1 )
stfd f r3 ,4 0 ( r1 )
stfd f r4 ,4 8 ( r1 )
stfd f r5 ,5 6 ( r1 )
li r0 ,4
LDCONST( f r4 , f p o n e )
LDCONST( f r5 , f p h a l f )
mtctr r0
li r6 ,0
1 : lfsx f r0 ,r4 ,r6
frsqrte f r1 ,f r0 / * r = f r s q r t e ( s ) * /
fmuls f r3 ,f r1 ,f r0 / * r * s * /
fmuls f r2 ,f r1 ,f r5 / * r * 0 . 5 * /
fnmsubs f r3 ,f r1 ,f r3 ,f r4 / * 1 - s * r * r * /
fmadds f r1 ,f r2 ,f r3 ,f r1 / * r = r + 0 . 5 * r * ( 1 - s * r * r ) * /
fmuls f r3 ,f r1 ,f r0 / * r * s * /
fmuls f r2 ,f r1 ,f r5 / * r * 0 . 5 * /
fnmsubs f r3 ,f r1 ,f r3 ,f r4 / * 1 - s * r * r * /
fmadds f r1 ,f r2 ,f r3 ,f r1 / * r = r + 0 . 5 * r * ( 1 - s * r * r ) * /
stfsx f r1 ,r3 ,r6
addi r6 ,r6 ,4
bdnz 1 b
lfd f r5 ,5 6 ( r1 )
lfd f r4 ,4 8 ( r1 )
lfd f r3 ,4 0 ( r1 )
lfd f r2 ,3 2 ( r1 )
b f p d i s a b l e