2019-05-31 11:09:55 +03:00
// SPDX-License-Identifier: GPL-2.0-only
2005-04-17 02:20:36 +04:00
/* IEEE754 floating point arithmetic
* double precision square root
*/
/*
* MIPS floating point support
* Copyright ( C ) 1994 - 2000 Algorithmics Ltd .
*/
# include "ieee754dp.h"
2017-11-02 14:13:59 +03:00
static const unsigned int table [ ] = {
2005-04-17 02:20:36 +04:00
0 , 1204 , 3062 , 5746 , 9193 , 13348 , 18162 , 23592 ,
29598 , 36145 , 43202 , 50740 , 58733 , 67158 , 75992 ,
85215 , 83599 , 71378 , 60428 , 50647 , 41945 , 34246 ,
27478 , 21581 , 16499 , 12183 , 8588 , 5674 , 3403 ,
1742 , 661 , 130
} ;
2014-04-16 03:31:11 +04:00
union ieee754dp ieee754dp_sqrt ( union ieee754dp x )
2005-04-17 02:20:36 +04:00
{
2005-04-28 17:39:10 +04:00
struct _ieee754_csr oldcsr ;
2014-04-16 03:31:11 +04:00
union ieee754dp y , z , t ;
2017-11-02 14:13:59 +03:00
unsigned int scalx , yh ;
2005-04-17 02:20:36 +04:00
COMPXDP ;
EXPLODEXDP ;
2014-04-19 02:36:32 +04:00
ieee754_clearcx ( ) ;
2005-04-17 02:20:36 +04:00
FLUSHXDP ;
/* x == INF or NAN? */
switch ( xc ) {
2015-04-04 01:25:34 +03:00
case IEEE754_CLASS_SNAN :
return ieee754dp_nanxcpt ( x ) ;
2005-04-17 02:20:36 +04:00
case IEEE754_CLASS_QNAN :
/* sqrt(Nan) = Nan */
2015-04-04 01:25:30 +03:00
return x ;
2014-04-26 03:49:14 +04:00
2005-04-17 02:20:36 +04:00
case IEEE754_CLASS_ZERO :
/* sqrt(0) = 0 */
return x ;
2014-04-26 03:49:14 +04:00
2005-04-17 02:20:36 +04:00
case IEEE754_CLASS_INF :
if ( xs ) {
/* sqrt(-Inf) = Nan */
2014-04-19 02:36:32 +04:00
ieee754_setcx ( IEEE754_INVALID_OPERATION ) ;
2015-04-04 01:25:30 +03:00
return ieee754dp_indef ( ) ;
2005-04-17 02:20:36 +04:00
}
/* sqrt(+Inf) = Inf */
return x ;
2014-04-26 03:49:14 +04:00
2005-04-17 02:20:36 +04:00
case IEEE754_CLASS_DNORM :
DPDNORMX ;
2020-05-04 11:51:29 +03:00
fallthrough ;
2005-04-17 02:20:36 +04:00
case IEEE754_CLASS_NORM :
if ( xs ) {
/* sqrt(-x) = Nan */
2014-04-19 02:36:32 +04:00
ieee754_setcx ( IEEE754_INVALID_OPERATION ) ;
2015-04-04 01:25:30 +03:00
return ieee754dp_indef ( ) ;
2005-04-17 02:20:36 +04:00
}
break ;
}
/* save old csr; switch off INX enable & flag; set RN rounding */
oldcsr = ieee754_csr ;
ieee754_csr . mx & = ~ IEEE754_INEXACT ;
ieee754_csr . sx & = ~ IEEE754_INEXACT ;
2014-04-30 13:21:55 +04:00
ieee754_csr . rm = FPU_CSR_RN ;
2005-04-17 02:20:36 +04:00
/* adjust exponent to prevent overflow */
scalx = 0 ;
if ( xe > 512 ) { /* x > 2**-512? */
xe - = 512 ; /* x = x / 2**512 */
scalx + = 256 ;
2013-01-22 15:59:30 +04:00
} else if ( xe < - 512 ) { /* x < 2**-512? */
2005-04-17 02:20:36 +04:00
xe + = 512 ; /* x = x * 2**512 */
scalx - = 256 ;
}
2017-11-02 14:14:04 +03:00
x = builddp ( 0 , xe + DP_EBIAS , xm & ~ DP_HIDDEN_BIT ) ;
y = x ;
2005-04-17 02:20:36 +04:00
/* magic initial approximation to almost 8 sig. bits */
yh = y . bits > > 32 ;
yh = ( yh > > 1 ) + 0x1ff80000 ;
yh = yh - table [ ( yh > > 15 ) & 31 ] ;
y . bits = ( ( u64 ) yh < < 32 ) | ( y . bits & 0xffffffff ) ;
/* Heron's rule once with correction to improve to ~18 sig. bits */
/* t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0; */
t = ieee754dp_div ( x , y ) ;
y = ieee754dp_add ( y , t ) ;
y . bits - = 0x0010000600000000LL ;
y . bits & = 0xffffffff00000000LL ;
/* triple to almost 56 sig. bits: y ~= sqrt(x) to within 1 ulp */
2013-01-22 15:59:30 +04:00
/* t=y*y; z=t; pt[n0]+=0x00100000; t+=z; z=(x-z)*y; */
2017-11-02 14:14:04 +03:00
t = ieee754dp_mul ( y , y ) ;
z = t ;
2014-04-25 17:48:40 +04:00
t . bexp + = 0x001 ;
2005-04-17 02:20:36 +04:00
t = ieee754dp_add ( t , z ) ;
z = ieee754dp_mul ( ieee754dp_sub ( x , z ) , y ) ;
2013-01-22 15:59:30 +04:00
/* t=z/(t+x) ; pt[n0]+=0x00100000; y+=t; */
2005-04-17 02:20:36 +04:00
t = ieee754dp_div ( z , ieee754dp_add ( t , x ) ) ;
2014-04-25 17:48:40 +04:00
t . bexp + = 0x001 ;
2005-04-17 02:20:36 +04:00
y = ieee754dp_add ( y , t ) ;
/* twiddle last bit to force y correctly rounded */
/* set RZ, clear INEX flag */
2014-04-30 13:21:55 +04:00
ieee754_csr . rm = FPU_CSR_RZ ;
2005-04-17 02:20:36 +04:00
ieee754_csr . sx & = ~ IEEE754_INEXACT ;
/* t=x/y; ...chopped quotient, possibly inexact */
t = ieee754dp_div ( x , y ) ;
if ( ieee754_csr . sx & IEEE754_INEXACT | | t . bits ! = y . bits ) {
if ( ! ( ieee754_csr . sx & IEEE754_INEXACT ) )
/* t = t-ulp */
t . bits - = 1 ;
/* add inexact to result status */
oldcsr . cx | = IEEE754_INEXACT ;
oldcsr . sx | = IEEE754_INEXACT ;
switch ( oldcsr . rm ) {
2014-04-30 13:21:55 +04:00
case FPU_CSR_RU :
2005-04-17 02:20:36 +04:00
y . bits + = 1 ;
2020-05-04 11:51:29 +03:00
fallthrough ;
2014-04-30 13:21:55 +04:00
case FPU_CSR_RN :
2005-04-17 02:20:36 +04:00
t . bits + = 1 ;
break ;
}
/* y=y+t; ...chopped sum */
y = ieee754dp_add ( y , t ) ;
/* adjust scalx for correctly rounded sqrt(x) */
scalx - = 1 ;
}
/* py[n0]=py[n0]+scalx; ...scale back y */
2014-04-25 17:48:40 +04:00
y . bexp + = scalx ;
2005-04-17 02:20:36 +04:00
/* restore rounding mode, possibly set inexact */
ieee754_csr = oldcsr ;
return y ;
}