2005-09-26 16:04:21 +10:00
# include < a s m / p p c _ a s m . h >
2005-10-10 22:20:10 +10:00
# include < a s m / r e g . h >
2005-09-26 16:04:21 +10:00
/ *
* The r o u t i n e s b e l o w a r e i n a s s e m b l e r s o w e c a n c l o s e l y c o n t r o l t h e
* usage o f f l o a t i n g - p o i n t r e g i s t e r s . T h e s e r o u t i n e s m u s t b e c a l l e d
* with p r e e m p t d i s a b l e d .
* /
# ifdef C O N F I G _ P P C 3 2
.data
fpzero :
.long 0
fpone :
.long 0x3f800000 /* 1.0 in single-precision FP */
fphalf :
.long 0x3f000000 /* 0.5 in single-precision FP */
# define L D C O N S T ( f r , n a m e ) \
lis r11 ,n a m e @ha; \
lfs f r ,n a m e @l(r11)
# else
.section " .toc " , " aw"
fpzero :
.tc FD_ 0 _ 0 [ T C ] ,0
fpone :
.tc FD_ 3 f f00 0 0 0 _ 0 [ T C ] ,0 x3 f f00 0 0 0 0 0 0 0 0 0 0 0 / * 1 . 0 * /
fphalf :
.tc FD_ 3 f e 0 0 0 0 0 _ 0 [ T C ] ,0 x3 f e 0 0 0 0 0 0 0 0 0 0 0 0 0 / * 0 . 5 * /
# define L D C O N S T ( f r , n a m e ) \
lfd f r ,n a m e @toc(r2)
# endif
.text
/ *
* Internal r o u t i n e t o e n a b l e f l o a t i n g p o i n t a n d s e t F P S C R t o 0 .
* Don' t c a l l i t f r o m C ; it doesn't use the normal calling convention.
* /
fpenable :
# ifdef C O N F I G _ P P C 3 2
stwu r1 ,- 6 4 ( r1 )
# else
stdu r1 ,- 6 4 ( r1 )
# endif
mfmsr r10
ori r11 ,r10 ,M S R _ F P
mtmsr r11
isync
stfd f r0 ,2 4 ( r1 )
stfd f r1 ,1 6 ( r1 )
stfd f r31 ,8 ( r1 )
LDCONST( f r1 , f p z e r o )
mffs f r31
2006-06-10 20:18:39 +10:00
MTFSF_ L ( f r1 )
2005-09-26 16:04:21 +10:00
blr
fpdisable :
mtlr r12
2006-06-10 20:18:39 +10:00
MTFSF_ L ( f r31 )
2005-09-26 16:04:21 +10:00
lfd f r31 ,8 ( r1 )
lfd f r1 ,1 6 ( r1 )
lfd f r0 ,2 4 ( r1 )
mtmsr r10
isync
addi r1 ,r1 ,6 4
blr
/ *
* Vector a d d , f l o a t i n g p o i n t .
* /
_ GLOBAL( v a d d f p )
mflr r12
bl f p e n a b l e
li r0 ,4
mtctr r0
li r6 ,0
1 : lfsx f r0 ,r4 ,r6
lfsx f r1 ,r5 ,r6
fadds f r0 ,f r0 ,f r1
stfsx f r0 ,r3 ,r6
addi r6 ,r6 ,4
bdnz 1 b
b f p d i s a b l e
/ *
* Vector s u b t r a c t , f l o a t i n g p o i n t .
* /
_ GLOBAL( v s u b f p )
mflr r12
bl f p e n a b l e
li r0 ,4
mtctr r0
li r6 ,0
1 : lfsx f r0 ,r4 ,r6
lfsx f r1 ,r5 ,r6
fsubs f r0 ,f r0 ,f r1
stfsx f r0 ,r3 ,r6
addi r6 ,r6 ,4
bdnz 1 b
b f p d i s a b l e
/ *
* Vector m u l t i p l y a n d a d d , f l o a t i n g p o i n t .
* /
_ GLOBAL( v m a d d f p )
mflr r12
bl f p e n a b l e
stfd f r2 ,3 2 ( r1 )
li r0 ,4
mtctr r0
li r7 ,0
1 : lfsx f r0 ,r4 ,r7
lfsx f r1 ,r5 ,r7
lfsx f r2 ,r6 ,r7
fmadds f r0 ,f r0 ,f r2 ,f r1
stfsx f r0 ,r3 ,r7
addi r7 ,r7 ,4
bdnz 1 b
lfd f r2 ,3 2 ( r1 )
b f p d i s a b l e
/ *
* Vector n e g a t i v e m u l t i p l y a n d s u b t r a c t , f l o a t i n g p o i n t .
* /
_ GLOBAL( v n m s u b f p )
mflr r12
bl f p e n a b l e
stfd f r2 ,3 2 ( r1 )
li r0 ,4
mtctr r0
li r7 ,0
1 : lfsx f r0 ,r4 ,r7
lfsx f r1 ,r5 ,r7
lfsx f r2 ,r6 ,r7
fnmsubs f r0 ,f r0 ,f r2 ,f r1
stfsx f r0 ,r3 ,r7
addi r7 ,r7 ,4
bdnz 1 b
lfd f r2 ,3 2 ( r1 )
b f p d i s a b l e
/ *
* Vector r e c i p r o c a l e s t i m a t e . W e j u s t c o m p u t e 1 . 0 / x .
* r3 - > d e s t i n a t i o n , r4 - > s o u r c e .
* /
_ GLOBAL( v r e f p )
mflr r12
bl f p e n a b l e
li r0 ,4
LDCONST( f r1 , f p o n e )
mtctr r0
li r6 ,0
1 : lfsx f r0 ,r4 ,r6
fdivs f r0 ,f r1 ,f r0
stfsx f r0 ,r3 ,r6
addi r6 ,r6 ,4
bdnz 1 b
b f p d i s a b l e
/ *
* Vector r e c i p r o c a l s q u a r e - r o o t e s t i m a t e , f l o a t i n g p o i n t .
* We u s e t h e f r s q r t e i n s t r u c t i o n f o r t h e i n i t i a l e s t i m a t e f o l l o w e d
* by 2 i t e r a t i o n s o f N e w t o n - R a p h s o n t o g e t s u f f i c i e n t a c c u r a c y .
* r3 - > d e s t i n a t i o n , r4 - > s o u r c e .
* /
_ GLOBAL( v r s q r t e f p )
mflr r12
bl f p e n a b l e
stfd f r2 ,3 2 ( r1 )
stfd f r3 ,4 0 ( r1 )
stfd f r4 ,4 8 ( r1 )
stfd f r5 ,5 6 ( r1 )
li r0 ,4
LDCONST( f r4 , f p o n e )
LDCONST( f r5 , f p h a l f )
mtctr r0
li r6 ,0
1 : lfsx f r0 ,r4 ,r6
frsqrte f r1 ,f r0 / * r = f r s q r t e ( s ) * /
fmuls f r3 ,f r1 ,f r0 / * r * s * /
fmuls f r2 ,f r1 ,f r5 / * r * 0 . 5 * /
fnmsubs f r3 ,f r1 ,f r3 ,f r4 / * 1 - s * r * r * /
fmadds f r1 ,f r2 ,f r3 ,f r1 / * r = r + 0 . 5 * r * ( 1 - s * r * r ) * /
fmuls f r3 ,f r1 ,f r0 / * r * s * /
fmuls f r2 ,f r1 ,f r5 / * r * 0 . 5 * /
fnmsubs f r3 ,f r1 ,f r3 ,f r4 / * 1 - s * r * r * /
fmadds f r1 ,f r2 ,f r3 ,f r1 / * r = r + 0 . 5 * r * ( 1 - s * r * r ) * /
stfsx f r1 ,r3 ,r6
addi r6 ,r6 ,4
bdnz 1 b
lfd f r5 ,5 6 ( r1 )
lfd f r4 ,4 8 ( r1 )
lfd f r3 ,4 0 ( r1 )
lfd f r2 ,3 2 ( r1 )
b f p d i s a b l e