2005-04-16 15:20:36 -07:00
/* IEEE754 floating point arithmetic
* double precision square root
*/
/*
* MIPS floating point support
* Copyright ( C ) 1994 - 2000 Algorithmics Ltd .
*
* # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
*
* This program is free software ; you can distribute it and / or modify it
* under the terms of the GNU General Public License ( Version 2 ) as
* published by the Free Software Foundation .
*
* This program is distributed in the hope it will be useful , but WITHOUT
* ANY WARRANTY ; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE . See the GNU General Public License
* for more details .
*
* You should have received a copy of the GNU General Public License along
* with this program ; if not , write to the Free Software Foundation , Inc . ,
* 59 Temple Place - Suite 330 , Boston MA 02111 - 1307 , USA .
*
* # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
*/
# include "ieee754dp.h"
static const unsigned table [ ] = {
0 , 1204 , 3062 , 5746 , 9193 , 13348 , 18162 , 23592 ,
29598 , 36145 , 43202 , 50740 , 58733 , 67158 , 75992 ,
85215 , 83599 , 71378 , 60428 , 50647 , 41945 , 34246 ,
27478 , 21581 , 16499 , 12183 , 8588 , 5674 , 3403 ,
1742 , 661 , 130
} ;
ieee754dp ieee754dp_sqrt ( ieee754dp x )
{
2005-04-28 13:39:10 +00:00
struct _ieee754_csr oldcsr ;
2005-04-16 15:20:36 -07:00
ieee754dp y , z , t ;
unsigned scalx , yh ;
COMPXDP ;
EXPLODEXDP ;
CLEARCX ;
FLUSHXDP ;
/* x == INF or NAN? */
switch ( xc ) {
case IEEE754_CLASS_QNAN :
/* sqrt(Nan) = Nan */
return ieee754dp_nanxcpt ( x , " sqrt " ) ;
case IEEE754_CLASS_SNAN :
SETCX ( IEEE754_INVALID_OPERATION ) ;
return ieee754dp_nanxcpt ( ieee754dp_indef ( ) , " sqrt " ) ;
case IEEE754_CLASS_ZERO :
/* sqrt(0) = 0 */
return x ;
case IEEE754_CLASS_INF :
if ( xs ) {
/* sqrt(-Inf) = Nan */
SETCX ( IEEE754_INVALID_OPERATION ) ;
return ieee754dp_nanxcpt ( ieee754dp_indef ( ) , " sqrt " ) ;
}
/* sqrt(+Inf) = Inf */
return x ;
case IEEE754_CLASS_DNORM :
DPDNORMX ;
/* fall through */
case IEEE754_CLASS_NORM :
if ( xs ) {
/* sqrt(-x) = Nan */
SETCX ( IEEE754_INVALID_OPERATION ) ;
return ieee754dp_nanxcpt ( ieee754dp_indef ( ) , " sqrt " ) ;
}
break ;
}
/* save old csr; switch off INX enable & flag; set RN rounding */
oldcsr = ieee754_csr ;
ieee754_csr . mx & = ~ IEEE754_INEXACT ;
ieee754_csr . sx & = ~ IEEE754_INEXACT ;
ieee754_csr . rm = IEEE754_RN ;
/* adjust exponent to prevent overflow */
scalx = 0 ;
if ( xe > 512 ) { /* x > 2**-512? */
xe - = 512 ; /* x = x / 2**512 */
scalx + = 256 ;
} else if ( xe < - 512 ) { /* x < 2**-512? */
xe + = 512 ; /* x = x * 2**512 */
scalx - = 256 ;
}
y = x = builddp ( 0 , xe + DP_EBIAS , xm & ~ DP_HIDDEN_BIT ) ;
/* magic initial approximation to almost 8 sig. bits */
yh = y . bits > > 32 ;
yh = ( yh > > 1 ) + 0x1ff80000 ;
yh = yh - table [ ( yh > > 15 ) & 31 ] ;
y . bits = ( ( u64 ) yh < < 32 ) | ( y . bits & 0xffffffff ) ;
/* Heron's rule once with correction to improve to ~18 sig. bits */
/* t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0; */
t = ieee754dp_div ( x , y ) ;
y = ieee754dp_add ( y , t ) ;
y . bits - = 0x0010000600000000LL ;
y . bits & = 0xffffffff00000000LL ;
/* triple to almost 56 sig. bits: y ~= sqrt(x) to within 1 ulp */
/* t=y*y; z=t; pt[n0]+=0x00100000; t+=z; z=(x-z)*y; */
z = t = ieee754dp_mul ( y , y ) ;
t . parts . bexp + = 0x001 ;
t = ieee754dp_add ( t , z ) ;
z = ieee754dp_mul ( ieee754dp_sub ( x , z ) , y ) ;
/* t=z/(t+x) ; pt[n0]+=0x00100000; y+=t; */
t = ieee754dp_div ( z , ieee754dp_add ( t , x ) ) ;
t . parts . bexp + = 0x001 ;
y = ieee754dp_add ( y , t ) ;
/* twiddle last bit to force y correctly rounded */
/* set RZ, clear INEX flag */
ieee754_csr . rm = IEEE754_RZ ;
ieee754_csr . sx & = ~ IEEE754_INEXACT ;
/* t=x/y; ...chopped quotient, possibly inexact */
t = ieee754dp_div ( x , y ) ;
if ( ieee754_csr . sx & IEEE754_INEXACT | | t . bits ! = y . bits ) {
if ( ! ( ieee754_csr . sx & IEEE754_INEXACT ) )
/* t = t-ulp */
t . bits - = 1 ;
/* add inexact to result status */
oldcsr . cx | = IEEE754_INEXACT ;
oldcsr . sx | = IEEE754_INEXACT ;
switch ( oldcsr . rm ) {
case IEEE754_RP :
y . bits + = 1 ;
/* drop through */
case IEEE754_RN :
t . bits + = 1 ;
break ;
}
/* y=y+t; ...chopped sum */
y = ieee754dp_add ( y , t ) ;
/* adjust scalx for correctly rounded sqrt(x) */
scalx - = 1 ;
}
/* py[n0]=py[n0]+scalx; ...scale back y */
y . parts . bexp + = scalx ;
/* restore rounding mode, possibly set inexact */
ieee754_csr = oldcsr ;
return y ;
}