2005-04-17 02:20:36 +04:00
/*
= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
This C source file is part of the SoftFloat IEC / IEEE Floating - point
Arithmetic Package , Release 2.
Written by John R . Hauser . This work was made possible in part by the
International Computer Science Institute , located at Suite 600 , 1947 Center
Street , Berkeley , California 94704. Funding was partially provided by the
National Science Foundation under grant MIP - 9311980. The original version
of this code was written as part of a project to build a fixed - point vector
processor in collaboration with the University of California at Berkeley ,
overseen by Profs . Nelson Morgan and John Wawrzynek . More information
2010-10-16 21:36:23 +04:00
is available through the web page
http : //www.jhauser.us/arithmetic/SoftFloat-2b/SoftFloat-source.txt
2005-04-17 02:20:36 +04:00
THIS SOFTWARE IS DISTRIBUTED AS IS , FOR FREE . Although reasonable effort
has been made to avoid it , THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
TIMES RESULT IN INCORRECT BEHAVIOR . USE OF THIS SOFTWARE IS RESTRICTED TO
PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
AND ALL LOSSES , COSTS , OR OTHER PROBLEMS ARISING FROM ITS USE .
Derivative works are acceptable , even for commercial purposes , so long as
( 1 ) they include prominent notice that the work is derivative , and ( 2 ) they
include prominent notice akin to these three paragraphs for those parts of
this code that are retained .
= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
*/
2005-06-24 00:56:46 +04:00
# include <asm/div64.h>
2005-04-17 02:20:36 +04:00
# include "fpa11.h"
//#include "milieu.h"
//#include "softfloat.h"
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Primitive arithmetic functions , including multi - word arithmetic , and
division and square root approximations . ( Can be specialized to target if
desired . )
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
# include "softfloat-macros"
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Functions and definitions to determine : ( 1 ) whether tininess for underflow
is detected before or after rounding by default , ( 2 ) what ( if anything )
happens when exceptions are raised , ( 3 ) how signaling NaNs are distinguished
from quiet NaNs , ( 4 ) the default generated quiet NaNs , and ( 5 ) how NaNs
are propagated from function inputs to output . These details are target -
specific .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
# include "softfloat-specialize"
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Takes a 64 - bit fixed - point value ` absZ ' with binary point between bits 6
and 7 , and returns the properly rounded 32 - bit integer corresponding to the
input . If ` zSign ' is nonzero , the input is negated before being converted
to an integer . Bit 63 of ` absZ ' must be zero . Ordinarily , the fixed - point
input is simply rounded to an integer , with the inexact exception raised if
the input cannot be represented exactly as an integer . If the fixed - point
input is too large , however , the invalid exception is raised and the largest
positive or negative integer is returned .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
static int32 roundAndPackInt32 ( struct roundingData * roundData , flag zSign , bits64 absZ )
2005-04-17 02:20:36 +04:00
{
int8 roundingMode ;
flag roundNearestEven ;
int8 roundIncrement , roundBits ;
int32 z ;
2005-08-03 22:49:17 +04:00
roundingMode = roundData - > mode ;
2005-04-17 02:20:36 +04:00
roundNearestEven = ( roundingMode = = float_round_nearest_even ) ;
roundIncrement = 0x40 ;
if ( ! roundNearestEven ) {
if ( roundingMode = = float_round_to_zero ) {
roundIncrement = 0 ;
}
else {
roundIncrement = 0x7F ;
if ( zSign ) {
if ( roundingMode = = float_round_up ) roundIncrement = 0 ;
}
else {
if ( roundingMode = = float_round_down ) roundIncrement = 0 ;
}
}
}
roundBits = absZ & 0x7F ;
absZ = ( absZ + roundIncrement ) > > 7 ;
absZ & = ~ ( ( ( roundBits ^ 0x40 ) = = 0 ) & roundNearestEven ) ;
z = absZ ;
if ( zSign ) z = - z ;
if ( ( absZ > > 32 ) | | ( z & & ( ( z < 0 ) ^ zSign ) ) ) {
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return zSign ? 0x80000000 : 0x7FFFFFFF ;
}
2005-08-03 22:49:17 +04:00
if ( roundBits ) roundData - > exception | = float_flag_inexact ;
2005-04-17 02:20:36 +04:00
return z ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the fraction bits of the single - precision floating - point value ` a ' .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
INLINE bits32 extractFloat32Frac ( float32 a )
{
return a & 0x007FFFFF ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the exponent bits of the single - precision floating - point value ` a ' .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
INLINE int16 extractFloat32Exp ( float32 a )
{
return ( a > > 23 ) & 0xFF ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the sign bit of the single - precision floating - point value ` a ' .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
#if 0 /* in softfloat.h */
INLINE flag extractFloat32Sign ( float32 a )
{
return a > > 31 ;
}
# endif
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Normalizes the subnormal single - precision floating - point value represented
by the denormalized significand ` aSig ' . The normalized exponent and
significand are stored at the locations pointed to by ` zExpPtr ' and
` zSigPtr ' , respectively .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
static void
normalizeFloat32Subnormal ( bits32 aSig , int16 * zExpPtr , bits32 * zSigPtr )
{
int8 shiftCount ;
shiftCount = countLeadingZeros32 ( aSig ) - 8 ;
* zSigPtr = aSig < < shiftCount ;
* zExpPtr = 1 - shiftCount ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Packs the sign ` zSign ' , exponent ` zExp ' , and significand ` zSig ' into a
single - precision floating - point value , returning the result . After being
shifted into the proper positions , the three fields are simply added
together to form the result . This means that any integer portion of ` zSig '
will be added into the exponent . Since a properly normalized significand
will have an integer portion equal to 1 , the ` zExp ' input should be 1 less
than the desired result exponent whenever ` zSig ' is a complete , normalized
significand .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
INLINE float32 packFloat32 ( flag zSign , int16 zExp , bits32 zSig )
{
#if 0
float32 f ;
__asm__ ( " @ packFloat32 \n \
mov % 0 , % 1 , asl # 31 \ n \
orr % 0 , % 2 , asl # 23 \ n \
orr % 0 , % 3 "
: /* no outputs */
: " g " ( f ) , " g " ( zSign ) , " g " ( zExp ) , " g " ( zSig )
: " cc " ) ;
return f ;
# else
return ( ( ( bits32 ) zSign ) < < 31 ) + ( ( ( bits32 ) zExp ) < < 23 ) + zSig ;
# endif
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Takes an abstract floating - point value having sign ` zSign ' , exponent ` zExp ' ,
and significand ` zSig ' , and returns the proper single - precision floating -
point value corresponding to the abstract input . Ordinarily , the abstract
value is simply rounded and packed into the single - precision format , with
the inexact exception raised if the abstract input cannot be represented
exactly . If the abstract value is too large , however , the overflow and
inexact exceptions are raised and an infinity or maximal finite value is
returned . If the abstract value is too small , the input value is rounded to
a subnormal number , and the underflow and inexact exceptions are raised if
the abstract input cannot be represented exactly as a subnormal single -
precision floating - point number .
The input significand ` zSig ' has its binary point between bits 30
and 29 , which is 7 bits to the left of the usual location . This shifted
significand must be normalized or smaller . If ` zSig ' is not normalized ,
` zExp ' must be 0 ; in that case , the result returned is a subnormal number ,
and it must not require rounding . In the usual case that ` zSig ' is
normalized , ` zExp ' must be 1 less than the ` ` true ' ' floating - point exponent .
The handling of underflow and overflow follows the IEC / IEEE Standard for
Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
static float32 roundAndPackFloat32 ( struct roundingData * roundData , flag zSign , int16 zExp , bits32 zSig )
2005-04-17 02:20:36 +04:00
{
int8 roundingMode ;
flag roundNearestEven ;
int8 roundIncrement , roundBits ;
flag isTiny ;
2005-08-03 22:49:17 +04:00
roundingMode = roundData - > mode ;
2005-04-17 02:20:36 +04:00
roundNearestEven = ( roundingMode = = float_round_nearest_even ) ;
roundIncrement = 0x40 ;
if ( ! roundNearestEven ) {
if ( roundingMode = = float_round_to_zero ) {
roundIncrement = 0 ;
}
else {
roundIncrement = 0x7F ;
if ( zSign ) {
if ( roundingMode = = float_round_up ) roundIncrement = 0 ;
}
else {
if ( roundingMode = = float_round_down ) roundIncrement = 0 ;
}
}
}
roundBits = zSig & 0x7F ;
if ( 0xFD < = ( bits16 ) zExp ) {
if ( ( 0xFD < zExp )
| | ( ( zExp = = 0xFD )
& & ( ( sbits32 ) ( zSig + roundIncrement ) < 0 ) )
) {
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_overflow | float_flag_inexact ;
2005-04-17 02:20:36 +04:00
return packFloat32 ( zSign , 0xFF , 0 ) - ( roundIncrement = = 0 ) ;
}
if ( zExp < 0 ) {
isTiny =
( float_detect_tininess = = float_tininess_before_rounding )
| | ( zExp < - 1 )
| | ( zSig + roundIncrement < 0x80000000 ) ;
shift32RightJamming ( zSig , - zExp , & zSig ) ;
zExp = 0 ;
roundBits = zSig & 0x7F ;
2005-08-03 22:49:17 +04:00
if ( isTiny & & roundBits ) roundData - > exception | = float_flag_underflow ;
2005-04-17 02:20:36 +04:00
}
}
2005-08-03 22:49:17 +04:00
if ( roundBits ) roundData - > exception | = float_flag_inexact ;
2005-04-17 02:20:36 +04:00
zSig = ( zSig + roundIncrement ) > > 7 ;
zSig & = ~ ( ( ( roundBits ^ 0x40 ) = = 0 ) & roundNearestEven ) ;
if ( zSig = = 0 ) zExp = 0 ;
return packFloat32 ( zSign , zExp , zSig ) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Takes an abstract floating - point value having sign ` zSign ' , exponent ` zExp ' ,
and significand ` zSig ' , and returns the proper single - precision floating -
point value corresponding to the abstract input . This routine is just like
` roundAndPackFloat32 ' except that ` zSig ' does not have to be normalized in
any way . In all cases , ` zExp ' must be 1 less than the ` ` true ' ' floating -
point exponent .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
static float32
2005-08-03 22:49:17 +04:00
normalizeRoundAndPackFloat32 ( struct roundingData * roundData , flag zSign , int16 zExp , bits32 zSig )
2005-04-17 02:20:36 +04:00
{
int8 shiftCount ;
shiftCount = countLeadingZeros32 ( zSig ) - 1 ;
2005-08-03 22:49:17 +04:00
return roundAndPackFloat32 ( roundData , zSign , zExp - shiftCount , zSig < < shiftCount ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the fraction bits of the double - precision floating - point value ` a ' .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
INLINE bits64 extractFloat64Frac ( float64 a )
{
return a & LIT64 ( 0x000FFFFFFFFFFFFF ) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the exponent bits of the double - precision floating - point value ` a ' .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
INLINE int16 extractFloat64Exp ( float64 a )
{
return ( a > > 52 ) & 0x7FF ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the sign bit of the double - precision floating - point value ` a ' .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
#if 0 /* in softfloat.h */
INLINE flag extractFloat64Sign ( float64 a )
{
return a > > 63 ;
}
# endif
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Normalizes the subnormal double - precision floating - point value represented
by the denormalized significand ` aSig ' . The normalized exponent and
significand are stored at the locations pointed to by ` zExpPtr ' and
` zSigPtr ' , respectively .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
static void
normalizeFloat64Subnormal ( bits64 aSig , int16 * zExpPtr , bits64 * zSigPtr )
{
int8 shiftCount ;
shiftCount = countLeadingZeros64 ( aSig ) - 11 ;
* zSigPtr = aSig < < shiftCount ;
* zExpPtr = 1 - shiftCount ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Packs the sign ` zSign ' , exponent ` zExp ' , and significand ` zSig ' into a
double - precision floating - point value , returning the result . After being
shifted into the proper positions , the three fields are simply added
together to form the result . This means that any integer portion of ` zSig '
will be added into the exponent . Since a properly normalized significand
will have an integer portion equal to 1 , the ` zExp ' input should be 1 less
than the desired result exponent whenever ` zSig ' is a complete , normalized
significand .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
INLINE float64 packFloat64 ( flag zSign , int16 zExp , bits64 zSig )
{
return ( ( ( bits64 ) zSign ) < < 63 ) + ( ( ( bits64 ) zExp ) < < 52 ) + zSig ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Takes an abstract floating - point value having sign ` zSign ' , exponent ` zExp ' ,
and significand ` zSig ' , and returns the proper double - precision floating -
point value corresponding to the abstract input . Ordinarily , the abstract
value is simply rounded and packed into the double - precision format , with
the inexact exception raised if the abstract input cannot be represented
exactly . If the abstract value is too large , however , the overflow and
inexact exceptions are raised and an infinity or maximal finite value is
returned . If the abstract value is too small , the input value is rounded to
a subnormal number , and the underflow and inexact exceptions are raised if
the abstract input cannot be represented exactly as a subnormal double -
precision floating - point number .
The input significand ` zSig ' has its binary point between bits 62
and 61 , which is 10 bits to the left of the usual location . This shifted
significand must be normalized or smaller . If ` zSig ' is not normalized ,
` zExp ' must be 0 ; in that case , the result returned is a subnormal number ,
and it must not require rounding . In the usual case that ` zSig ' is
normalized , ` zExp ' must be 1 less than the ` ` true ' ' floating - point exponent .
The handling of underflow and overflow follows the IEC / IEEE Standard for
Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
static float64 roundAndPackFloat64 ( struct roundingData * roundData , flag zSign , int16 zExp , bits64 zSig )
2005-04-17 02:20:36 +04:00
{
int8 roundingMode ;
flag roundNearestEven ;
int16 roundIncrement , roundBits ;
flag isTiny ;
2005-08-03 22:49:17 +04:00
roundingMode = roundData - > mode ;
2005-04-17 02:20:36 +04:00
roundNearestEven = ( roundingMode = = float_round_nearest_even ) ;
roundIncrement = 0x200 ;
if ( ! roundNearestEven ) {
if ( roundingMode = = float_round_to_zero ) {
roundIncrement = 0 ;
}
else {
roundIncrement = 0x3FF ;
if ( zSign ) {
if ( roundingMode = = float_round_up ) roundIncrement = 0 ;
}
else {
if ( roundingMode = = float_round_down ) roundIncrement = 0 ;
}
}
}
roundBits = zSig & 0x3FF ;
if ( 0x7FD < = ( bits16 ) zExp ) {
if ( ( 0x7FD < zExp )
| | ( ( zExp = = 0x7FD )
& & ( ( sbits64 ) ( zSig + roundIncrement ) < 0 ) )
) {
//register int lr = __builtin_return_address(0);
//printk("roundAndPackFloat64 called from 0x%08x\n",lr);
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_overflow | float_flag_inexact ;
2005-04-17 02:20:36 +04:00
return packFloat64 ( zSign , 0x7FF , 0 ) - ( roundIncrement = = 0 ) ;
}
if ( zExp < 0 ) {
isTiny =
( float_detect_tininess = = float_tininess_before_rounding )
| | ( zExp < - 1 )
| | ( zSig + roundIncrement < LIT64 ( 0x8000000000000000 ) ) ;
shift64RightJamming ( zSig , - zExp , & zSig ) ;
zExp = 0 ;
roundBits = zSig & 0x3FF ;
2005-08-03 22:49:17 +04:00
if ( isTiny & & roundBits ) roundData - > exception | = float_flag_underflow ;
2005-04-17 02:20:36 +04:00
}
}
2005-08-03 22:49:17 +04:00
if ( roundBits ) roundData - > exception | = float_flag_inexact ;
2005-04-17 02:20:36 +04:00
zSig = ( zSig + roundIncrement ) > > 10 ;
zSig & = ~ ( ( ( roundBits ^ 0x200 ) = = 0 ) & roundNearestEven ) ;
if ( zSig = = 0 ) zExp = 0 ;
return packFloat64 ( zSign , zExp , zSig ) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Takes an abstract floating - point value having sign ` zSign ' , exponent ` zExp ' ,
and significand ` zSig ' , and returns the proper double - precision floating -
point value corresponding to the abstract input . This routine is just like
` roundAndPackFloat64 ' except that ` zSig ' does not have to be normalized in
any way . In all cases , ` zExp ' must be 1 less than the ` ` true ' ' floating -
point exponent .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
static float64
2005-08-03 22:49:17 +04:00
normalizeRoundAndPackFloat64 ( struct roundingData * roundData , flag zSign , int16 zExp , bits64 zSig )
2005-04-17 02:20:36 +04:00
{
int8 shiftCount ;
shiftCount = countLeadingZeros64 ( zSig ) - 1 ;
2005-08-03 22:49:17 +04:00
return roundAndPackFloat64 ( roundData , zSign , zExp - shiftCount , zSig < < shiftCount ) ;
2005-04-17 02:20:36 +04:00
}
# ifdef FLOATX80
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the fraction bits of the extended double - precision floating - point
value ` a ' .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
INLINE bits64 extractFloatx80Frac ( floatx80 a )
{
return a . low ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the exponent bits of the extended double - precision floating - point
value ` a ' .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
INLINE int32 extractFloatx80Exp ( floatx80 a )
{
return a . high & 0x7FFF ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the sign bit of the extended double - precision floating - point value
` a ' .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
INLINE flag extractFloatx80Sign ( floatx80 a )
{
return a . high > > 15 ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Normalizes the subnormal extended double - precision floating - point value
represented by the denormalized significand ` aSig ' . The normalized exponent
and significand are stored at the locations pointed to by ` zExpPtr ' and
` zSigPtr ' , respectively .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
static void
normalizeFloatx80Subnormal ( bits64 aSig , int32 * zExpPtr , bits64 * zSigPtr )
{
int8 shiftCount ;
shiftCount = countLeadingZeros64 ( aSig ) ;
* zSigPtr = aSig < < shiftCount ;
* zExpPtr = 1 - shiftCount ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Packs the sign ` zSign ' , exponent ` zExp ' , and significand ` zSig ' into an
extended double - precision floating - point value , returning the result .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
INLINE floatx80 packFloatx80 ( flag zSign , int32 zExp , bits64 zSig )
{
floatx80 z ;
z . low = zSig ;
z . high = ( ( ( bits16 ) zSign ) < < 15 ) + zExp ;
2005-11-08 00:12:07 +03:00
z . __padding = 0 ;
2005-04-17 02:20:36 +04:00
return z ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Takes an abstract floating - point value having sign ` zSign ' , exponent ` zExp ' ,
and extended significand formed by the concatenation of ` zSig0 ' and ` zSig1 ' ,
and returns the proper extended double - precision floating - point value
corresponding to the abstract input . Ordinarily , the abstract value is
rounded and packed into the extended double - precision format , with the
inexact exception raised if the abstract input cannot be represented
exactly . If the abstract value is too large , however , the overflow and
inexact exceptions are raised and an infinity or maximal finite value is
returned . If the abstract value is too small , the input value is rounded to
a subnormal number , and the underflow and inexact exceptions are raised if
the abstract input cannot be represented exactly as a subnormal extended
double - precision floating - point number .
If ` roundingPrecision ' is 32 or 64 , the result is rounded to the same
number of bits as single or double precision , respectively . Otherwise , the
result is rounded to the full precision of the extended double - precision
format .
The input significand must be normalized or smaller . If the input
significand is not normalized , ` zExp ' must be 0 ; in that case , the result
returned is a subnormal number , and it must not require rounding . The
handling of underflow and overflow follows the IEC / IEEE Standard for Binary
Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
static floatx80
roundAndPackFloatx80 (
2005-08-03 22:49:17 +04:00
struct roundingData * roundData , flag zSign , int32 zExp , bits64 zSig0 , bits64 zSig1
2005-04-17 02:20:36 +04:00
)
{
2005-08-03 22:49:17 +04:00
int8 roundingMode , roundingPrecision ;
2005-04-17 02:20:36 +04:00
flag roundNearestEven , increment , isTiny ;
int64 roundIncrement , roundMask , roundBits ;
2005-08-03 22:49:17 +04:00
roundingMode = roundData - > mode ;
roundingPrecision = roundData - > precision ;
2005-04-17 02:20:36 +04:00
roundNearestEven = ( roundingMode = = float_round_nearest_even ) ;
if ( roundingPrecision = = 80 ) goto precision80 ;
if ( roundingPrecision = = 64 ) {
roundIncrement = LIT64 ( 0x0000000000000400 ) ;
roundMask = LIT64 ( 0x00000000000007FF ) ;
}
else if ( roundingPrecision = = 32 ) {
roundIncrement = LIT64 ( 0x0000008000000000 ) ;
roundMask = LIT64 ( 0x000000FFFFFFFFFF ) ;
}
else {
goto precision80 ;
}
zSig0 | = ( zSig1 ! = 0 ) ;
if ( ! roundNearestEven ) {
if ( roundingMode = = float_round_to_zero ) {
roundIncrement = 0 ;
}
else {
roundIncrement = roundMask ;
if ( zSign ) {
if ( roundingMode = = float_round_up ) roundIncrement = 0 ;
}
else {
if ( roundingMode = = float_round_down ) roundIncrement = 0 ;
}
}
}
roundBits = zSig0 & roundMask ;
if ( 0x7FFD < = ( bits32 ) ( zExp - 1 ) ) {
if ( ( 0x7FFE < zExp )
| | ( ( zExp = = 0x7FFE ) & & ( zSig0 + roundIncrement < zSig0 ) )
) {
goto overflow ;
}
if ( zExp < = 0 ) {
isTiny =
( float_detect_tininess = = float_tininess_before_rounding )
| | ( zExp < 0 )
| | ( zSig0 < = zSig0 + roundIncrement ) ;
shift64RightJamming ( zSig0 , 1 - zExp , & zSig0 ) ;
zExp = 0 ;
roundBits = zSig0 & roundMask ;
2005-08-03 22:49:17 +04:00
if ( isTiny & & roundBits ) roundData - > exception | = float_flag_underflow ;
if ( roundBits ) roundData - > exception | = float_flag_inexact ;
2005-04-17 02:20:36 +04:00
zSig0 + = roundIncrement ;
if ( ( sbits64 ) zSig0 < 0 ) zExp = 1 ;
roundIncrement = roundMask + 1 ;
if ( roundNearestEven & & ( roundBits < < 1 = = roundIncrement ) ) {
roundMask | = roundIncrement ;
}
zSig0 & = ~ roundMask ;
return packFloatx80 ( zSign , zExp , zSig0 ) ;
}
}
2005-08-03 22:49:17 +04:00
if ( roundBits ) roundData - > exception | = float_flag_inexact ;
2005-04-17 02:20:36 +04:00
zSig0 + = roundIncrement ;
if ( zSig0 < roundIncrement ) {
+ + zExp ;
zSig0 = LIT64 ( 0x8000000000000000 ) ;
}
roundIncrement = roundMask + 1 ;
if ( roundNearestEven & & ( roundBits < < 1 = = roundIncrement ) ) {
roundMask | = roundIncrement ;
}
zSig0 & = ~ roundMask ;
if ( zSig0 = = 0 ) zExp = 0 ;
return packFloatx80 ( zSign , zExp , zSig0 ) ;
precision80 :
increment = ( ( sbits64 ) zSig1 < 0 ) ;
if ( ! roundNearestEven ) {
if ( roundingMode = = float_round_to_zero ) {
increment = 0 ;
}
else {
if ( zSign ) {
increment = ( roundingMode = = float_round_down ) & & zSig1 ;
}
else {
increment = ( roundingMode = = float_round_up ) & & zSig1 ;
}
}
}
if ( 0x7FFD < = ( bits32 ) ( zExp - 1 ) ) {
if ( ( 0x7FFE < zExp )
| | ( ( zExp = = 0x7FFE )
& & ( zSig0 = = LIT64 ( 0xFFFFFFFFFFFFFFFF ) )
& & increment
)
) {
roundMask = 0 ;
overflow :
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_overflow | float_flag_inexact ;
2005-04-17 02:20:36 +04:00
if ( ( roundingMode = = float_round_to_zero )
| | ( zSign & & ( roundingMode = = float_round_up ) )
| | ( ! zSign & & ( roundingMode = = float_round_down ) )
) {
return packFloatx80 ( zSign , 0x7FFE , ~ roundMask ) ;
}
return packFloatx80 ( zSign , 0x7FFF , LIT64 ( 0x8000000000000000 ) ) ;
}
if ( zExp < = 0 ) {
isTiny =
( float_detect_tininess = = float_tininess_before_rounding )
| | ( zExp < 0 )
| | ! increment
| | ( zSig0 < LIT64 ( 0xFFFFFFFFFFFFFFFF ) ) ;
shift64ExtraRightJamming ( zSig0 , zSig1 , 1 - zExp , & zSig0 , & zSig1 ) ;
zExp = 0 ;
2005-08-03 22:49:17 +04:00
if ( isTiny & & zSig1 ) roundData - > exception | = float_flag_underflow ;
if ( zSig1 ) roundData - > exception | = float_flag_inexact ;
2005-04-17 02:20:36 +04:00
if ( roundNearestEven ) {
increment = ( ( sbits64 ) zSig1 < 0 ) ;
}
else {
if ( zSign ) {
increment = ( roundingMode = = float_round_down ) & & zSig1 ;
}
else {
increment = ( roundingMode = = float_round_up ) & & zSig1 ;
}
}
if ( increment ) {
+ + zSig0 ;
zSig0 & = ~ ( ( zSig1 + zSig1 = = 0 ) & roundNearestEven ) ;
if ( ( sbits64 ) zSig0 < 0 ) zExp = 1 ;
}
return packFloatx80 ( zSign , zExp , zSig0 ) ;
}
}
2005-08-03 22:49:17 +04:00
if ( zSig1 ) roundData - > exception | = float_flag_inexact ;
2005-04-17 02:20:36 +04:00
if ( increment ) {
+ + zSig0 ;
if ( zSig0 = = 0 ) {
+ + zExp ;
zSig0 = LIT64 ( 0x8000000000000000 ) ;
}
else {
zSig0 & = ~ ( ( zSig1 + zSig1 = = 0 ) & roundNearestEven ) ;
}
}
else {
if ( zSig0 = = 0 ) zExp = 0 ;
}
return packFloatx80 ( zSign , zExp , zSig0 ) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Takes an abstract floating - point value having sign ` zSign ' , exponent
` zExp ' , and significand formed by the concatenation of ` zSig0 ' and ` zSig1 ' ,
and returns the proper extended double - precision floating - point value
corresponding to the abstract input . This routine is just like
` roundAndPackFloatx80 ' except that the input significand does not have to be
normalized .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
static floatx80
normalizeRoundAndPackFloatx80 (
2005-08-03 22:49:17 +04:00
struct roundingData * roundData , flag zSign , int32 zExp , bits64 zSig0 , bits64 zSig1
2005-04-17 02:20:36 +04:00
)
{
int8 shiftCount ;
if ( zSig0 = = 0 ) {
zSig0 = zSig1 ;
zSig1 = 0 ;
zExp - = 64 ;
}
shiftCount = countLeadingZeros64 ( zSig0 ) ;
shortShift128Left ( zSig0 , zSig1 , shiftCount , & zSig0 , & zSig1 ) ;
zExp - = shiftCount ;
return
2005-08-03 22:49:17 +04:00
roundAndPackFloatx80 ( roundData , zSign , zExp , zSig0 , zSig1 ) ;
2005-04-17 02:20:36 +04:00
}
# endif
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of converting the 32 - bit two ' s complement integer ` a ' to
the single - precision floating - point format . The conversion is performed
according to the IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
float32 int32_to_float32 ( struct roundingData * roundData , int32 a )
2005-04-17 02:20:36 +04:00
{
flag zSign ;
if ( a = = 0 ) return 0 ;
if ( a = = 0x80000000 ) return packFloat32 ( 1 , 0x9E , 0 ) ;
zSign = ( a < 0 ) ;
2005-08-03 22:49:17 +04:00
return normalizeRoundAndPackFloat32 ( roundData , zSign , 0x9C , zSign ? - a : a ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of converting the 32 - bit two ' s complement integer ` a ' to
the double - precision floating - point format . The conversion is performed
according to the IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
float64 int32_to_float64 ( int32 a )
{
flag aSign ;
uint32 absA ;
int8 shiftCount ;
bits64 zSig ;
if ( a = = 0 ) return 0 ;
aSign = ( a < 0 ) ;
absA = aSign ? - a : a ;
shiftCount = countLeadingZeros32 ( absA ) + 21 ;
zSig = absA ;
return packFloat64 ( aSign , 0x432 - shiftCount , zSig < < shiftCount ) ;
}
# ifdef FLOATX80
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of converting the 32 - bit two ' s complement integer ` a '
to the extended double - precision floating - point format . The conversion
is performed according to the IEC / IEEE Standard for Binary Floating - point
Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
floatx80 int32_to_floatx80 ( int32 a )
{
flag zSign ;
uint32 absA ;
int8 shiftCount ;
bits64 zSig ;
if ( a = = 0 ) return packFloatx80 ( 0 , 0 , 0 ) ;
zSign = ( a < 0 ) ;
absA = zSign ? - a : a ;
shiftCount = countLeadingZeros32 ( absA ) + 32 ;
zSig = absA ;
return packFloatx80 ( zSign , 0x403E - shiftCount , zSig < < shiftCount ) ;
}
# endif
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of converting the single - precision floating - point value
` a ' to the 32 - bit two ' s complement integer format . The conversion is
performed according to the IEC / IEEE Standard for Binary Floating - point
Arithmetic - - - which means in particular that the conversion is rounded
according to the current rounding mode . If ` a ' is a NaN , the largest
positive integer is returned . Otherwise , if the conversion overflows , the
largest integer with the same sign as ` a ' is returned .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
int32 float32_to_int32 ( struct roundingData * roundData , float32 a )
2005-04-17 02:20:36 +04:00
{
flag aSign ;
int16 aExp , shiftCount ;
bits32 aSig ;
bits64 zSig ;
aSig = extractFloat32Frac ( a ) ;
aExp = extractFloat32Exp ( a ) ;
aSign = extractFloat32Sign ( a ) ;
if ( ( aExp = = 0x7FF ) & & aSig ) aSign = 0 ;
if ( aExp ) aSig | = 0x00800000 ;
shiftCount = 0xAF - aExp ;
zSig = aSig ;
zSig < < = 32 ;
if ( 0 < shiftCount ) shift64RightJamming ( zSig , shiftCount , & zSig ) ;
2005-08-03 22:49:17 +04:00
return roundAndPackInt32 ( roundData , aSign , zSig ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of converting the single - precision floating - point value
` a ' to the 32 - bit two ' s complement integer format . The conversion is
performed according to the IEC / IEEE Standard for Binary Floating - point
Arithmetic , except that the conversion is always rounded toward zero . If
` a ' is a NaN , the largest positive integer is returned . Otherwise , if the
conversion overflows , the largest integer with the same sign as ` a ' is
returned .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
int32 float32_to_int32_round_to_zero ( float32 a )
{
flag aSign ;
int16 aExp , shiftCount ;
bits32 aSig ;
int32 z ;
aSig = extractFloat32Frac ( a ) ;
aExp = extractFloat32Exp ( a ) ;
aSign = extractFloat32Sign ( a ) ;
shiftCount = aExp - 0x9E ;
if ( 0 < = shiftCount ) {
if ( a = = 0xCF000000 ) return 0x80000000 ;
float_raise ( float_flag_invalid ) ;
if ( ! aSign | | ( ( aExp = = 0xFF ) & & aSig ) ) return 0x7FFFFFFF ;
return 0x80000000 ;
}
else if ( aExp < = 0x7E ) {
2005-08-03 22:49:17 +04:00
if ( aExp | aSig ) float_raise ( float_flag_inexact ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
aSig = ( aSig | 0x00800000 ) < < 8 ;
z = aSig > > ( - shiftCount ) ;
if ( ( bits32 ) ( aSig < < ( shiftCount & 31 ) ) ) {
2005-08-03 22:49:17 +04:00
float_raise ( float_flag_inexact ) ;
2005-04-17 02:20:36 +04:00
}
return aSign ? - z : z ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of converting the single - precision floating - point value
` a ' to the double - precision floating - point format . The conversion is
performed according to the IEC / IEEE Standard for Binary Floating - point
Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
float64 float32_to_float64 ( float32 a )
{
flag aSign ;
int16 aExp ;
bits32 aSig ;
aSig = extractFloat32Frac ( a ) ;
aExp = extractFloat32Exp ( a ) ;
aSign = extractFloat32Sign ( a ) ;
if ( aExp = = 0xFF ) {
if ( aSig ) return commonNaNToFloat64 ( float32ToCommonNaN ( a ) ) ;
return packFloat64 ( aSign , 0x7FF , 0 ) ;
}
if ( aExp = = 0 ) {
if ( aSig = = 0 ) return packFloat64 ( aSign , 0 , 0 ) ;
normalizeFloat32Subnormal ( aSig , & aExp , & aSig ) ;
- - aExp ;
}
return packFloat64 ( aSign , aExp + 0x380 , ( ( bits64 ) aSig ) < < 29 ) ;
}
# ifdef FLOATX80
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of converting the single - precision floating - point value
` a ' to the extended double - precision floating - point format . The conversion
is performed according to the IEC / IEEE Standard for Binary Floating - point
Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
floatx80 float32_to_floatx80 ( float32 a )
{
flag aSign ;
int16 aExp ;
bits32 aSig ;
aSig = extractFloat32Frac ( a ) ;
aExp = extractFloat32Exp ( a ) ;
aSign = extractFloat32Sign ( a ) ;
if ( aExp = = 0xFF ) {
if ( aSig ) return commonNaNToFloatx80 ( float32ToCommonNaN ( a ) ) ;
return packFloatx80 ( aSign , 0x7FFF , LIT64 ( 0x8000000000000000 ) ) ;
}
if ( aExp = = 0 ) {
if ( aSig = = 0 ) return packFloatx80 ( aSign , 0 , 0 ) ;
normalizeFloat32Subnormal ( aSig , & aExp , & aSig ) ;
}
aSig | = 0x00800000 ;
return packFloatx80 ( aSign , aExp + 0x3F80 , ( ( bits64 ) aSig ) < < 40 ) ;
}
# endif
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Rounds the single - precision floating - point value ` a ' to an integer , and
returns the result as a single - precision floating - point value . The
operation is performed according to the IEC / IEEE Standard for Binary
Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
float32 float32_round_to_int ( struct roundingData * roundData , float32 a )
2005-04-17 02:20:36 +04:00
{
flag aSign ;
int16 aExp ;
bits32 lastBitMask , roundBitsMask ;
int8 roundingMode ;
float32 z ;
aExp = extractFloat32Exp ( a ) ;
if ( 0x96 < = aExp ) {
if ( ( aExp = = 0xFF ) & & extractFloat32Frac ( a ) ) {
return propagateFloat32NaN ( a , a ) ;
}
return a ;
}
2005-08-03 22:49:17 +04:00
roundingMode = roundData - > mode ;
2005-04-17 02:20:36 +04:00
if ( aExp < = 0x7E ) {
if ( ( bits32 ) ( a < < 1 ) = = 0 ) return a ;
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_inexact ;
2005-04-17 02:20:36 +04:00
aSign = extractFloat32Sign ( a ) ;
2005-08-03 22:49:17 +04:00
switch ( roundingMode ) {
2005-04-17 02:20:36 +04:00
case float_round_nearest_even :
if ( ( aExp = = 0x7E ) & & extractFloat32Frac ( a ) ) {
return packFloat32 ( aSign , 0x7F , 0 ) ;
}
break ;
case float_round_down :
return aSign ? 0xBF800000 : 0 ;
case float_round_up :
return aSign ? 0x80000000 : 0x3F800000 ;
}
return packFloat32 ( aSign , 0 , 0 ) ;
}
lastBitMask = 1 ;
lastBitMask < < = 0x96 - aExp ;
roundBitsMask = lastBitMask - 1 ;
z = a ;
if ( roundingMode = = float_round_nearest_even ) {
z + = lastBitMask > > 1 ;
if ( ( z & roundBitsMask ) = = 0 ) z & = ~ lastBitMask ;
}
else if ( roundingMode ! = float_round_to_zero ) {
if ( extractFloat32Sign ( z ) ^ ( roundingMode = = float_round_up ) ) {
z + = roundBitsMask ;
}
}
z & = ~ roundBitsMask ;
2005-08-03 22:49:17 +04:00
if ( z ! = a ) roundData - > exception | = float_flag_inexact ;
2005-04-17 02:20:36 +04:00
return z ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of adding the absolute values of the single - precision
floating - point values ` a ' and ` b ' . If ` zSign ' is true , the sum is negated
before being returned . ` zSign ' is ignored if the result is a NaN . The
addition is performed according to the IEC / IEEE Standard for Binary
Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
static float32 addFloat32Sigs ( struct roundingData * roundData , float32 a , float32 b , flag zSign )
2005-04-17 02:20:36 +04:00
{
int16 aExp , bExp , zExp ;
bits32 aSig , bSig , zSig ;
int16 expDiff ;
aSig = extractFloat32Frac ( a ) ;
aExp = extractFloat32Exp ( a ) ;
bSig = extractFloat32Frac ( b ) ;
bExp = extractFloat32Exp ( b ) ;
expDiff = aExp - bExp ;
aSig < < = 6 ;
bSig < < = 6 ;
if ( 0 < expDiff ) {
if ( aExp = = 0xFF ) {
if ( aSig ) return propagateFloat32NaN ( a , b ) ;
return a ;
}
if ( bExp = = 0 ) {
- - expDiff ;
}
else {
bSig | = 0x20000000 ;
}
shift32RightJamming ( bSig , expDiff , & bSig ) ;
zExp = aExp ;
}
else if ( expDiff < 0 ) {
if ( bExp = = 0xFF ) {
if ( bSig ) return propagateFloat32NaN ( a , b ) ;
return packFloat32 ( zSign , 0xFF , 0 ) ;
}
if ( aExp = = 0 ) {
+ + expDiff ;
}
else {
aSig | = 0x20000000 ;
}
shift32RightJamming ( aSig , - expDiff , & aSig ) ;
zExp = bExp ;
}
else {
if ( aExp = = 0xFF ) {
if ( aSig | bSig ) return propagateFloat32NaN ( a , b ) ;
return a ;
}
if ( aExp = = 0 ) return packFloat32 ( zSign , 0 , ( aSig + bSig ) > > 6 ) ;
zSig = 0x40000000 + aSig + bSig ;
zExp = aExp ;
goto roundAndPack ;
}
aSig | = 0x20000000 ;
zSig = ( aSig + bSig ) < < 1 ;
- - zExp ;
if ( ( sbits32 ) zSig < 0 ) {
zSig = aSig + bSig ;
+ + zExp ;
}
roundAndPack :
2005-08-03 22:49:17 +04:00
return roundAndPackFloat32 ( roundData , zSign , zExp , zSig ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of subtracting the absolute values of the single -
precision floating - point values ` a ' and ` b ' . If ` zSign ' is true , the
difference is negated before being returned . ` zSign ' is ignored if the
result is a NaN . The subtraction is performed according to the IEC / IEEE
Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
static float32 subFloat32Sigs ( struct roundingData * roundData , float32 a , float32 b , flag zSign )
2005-04-17 02:20:36 +04:00
{
int16 aExp , bExp , zExp ;
bits32 aSig , bSig , zSig ;
int16 expDiff ;
aSig = extractFloat32Frac ( a ) ;
aExp = extractFloat32Exp ( a ) ;
bSig = extractFloat32Frac ( b ) ;
bExp = extractFloat32Exp ( b ) ;
expDiff = aExp - bExp ;
aSig < < = 7 ;
bSig < < = 7 ;
if ( 0 < expDiff ) goto aExpBigger ;
if ( expDiff < 0 ) goto bExpBigger ;
if ( aExp = = 0xFF ) {
if ( aSig | bSig ) return propagateFloat32NaN ( a , b ) ;
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return float32_default_nan ;
}
if ( aExp = = 0 ) {
aExp = 1 ;
bExp = 1 ;
}
if ( bSig < aSig ) goto aBigger ;
if ( aSig < bSig ) goto bBigger ;
2005-08-03 22:49:17 +04:00
return packFloat32 ( roundData - > mode = = float_round_down , 0 , 0 ) ;
2005-04-17 02:20:36 +04:00
bExpBigger :
if ( bExp = = 0xFF ) {
if ( bSig ) return propagateFloat32NaN ( a , b ) ;
return packFloat32 ( zSign ^ 1 , 0xFF , 0 ) ;
}
if ( aExp = = 0 ) {
+ + expDiff ;
}
else {
aSig | = 0x40000000 ;
}
shift32RightJamming ( aSig , - expDiff , & aSig ) ;
bSig | = 0x40000000 ;
bBigger :
zSig = bSig - aSig ;
zExp = bExp ;
zSign ^ = 1 ;
goto normalizeRoundAndPack ;
aExpBigger :
if ( aExp = = 0xFF ) {
if ( aSig ) return propagateFloat32NaN ( a , b ) ;
return a ;
}
if ( bExp = = 0 ) {
- - expDiff ;
}
else {
bSig | = 0x40000000 ;
}
shift32RightJamming ( bSig , expDiff , & bSig ) ;
aSig | = 0x40000000 ;
aBigger :
zSig = aSig - bSig ;
zExp = aExp ;
normalizeRoundAndPack :
- - zExp ;
2005-08-03 22:49:17 +04:00
return normalizeRoundAndPackFloat32 ( roundData , zSign , zExp , zSig ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of adding the single - precision floating - point values ` a '
and ` b ' . The operation is performed according to the IEC / IEEE Standard for
Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
float32 float32_add ( struct roundingData * roundData , float32 a , float32 b )
2005-04-17 02:20:36 +04:00
{
flag aSign , bSign ;
aSign = extractFloat32Sign ( a ) ;
bSign = extractFloat32Sign ( b ) ;
if ( aSign = = bSign ) {
2005-08-03 22:49:17 +04:00
return addFloat32Sigs ( roundData , a , b , aSign ) ;
2005-04-17 02:20:36 +04:00
}
else {
2005-08-03 22:49:17 +04:00
return subFloat32Sigs ( roundData , a , b , aSign ) ;
2005-04-17 02:20:36 +04:00
}
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of subtracting the single - precision floating - point values
` a ' and ` b ' . The operation is performed according to the IEC / IEEE Standard
for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
float32 float32_sub ( struct roundingData * roundData , float32 a , float32 b )
2005-04-17 02:20:36 +04:00
{
flag aSign , bSign ;
aSign = extractFloat32Sign ( a ) ;
bSign = extractFloat32Sign ( b ) ;
if ( aSign = = bSign ) {
2005-08-03 22:49:17 +04:00
return subFloat32Sigs ( roundData , a , b , aSign ) ;
2005-04-17 02:20:36 +04:00
}
else {
2005-08-03 22:49:17 +04:00
return addFloat32Sigs ( roundData , a , b , aSign ) ;
2005-04-17 02:20:36 +04:00
}
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of multiplying the single - precision floating - point values
` a ' and ` b ' . The operation is performed according to the IEC / IEEE Standard
for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
float32 float32_mul ( struct roundingData * roundData , float32 a , float32 b )
2005-04-17 02:20:36 +04:00
{
flag aSign , bSign , zSign ;
int16 aExp , bExp , zExp ;
bits32 aSig , bSig ;
bits64 zSig64 ;
bits32 zSig ;
aSig = extractFloat32Frac ( a ) ;
aExp = extractFloat32Exp ( a ) ;
aSign = extractFloat32Sign ( a ) ;
bSig = extractFloat32Frac ( b ) ;
bExp = extractFloat32Exp ( b ) ;
bSign = extractFloat32Sign ( b ) ;
zSign = aSign ^ bSign ;
if ( aExp = = 0xFF ) {
if ( aSig | | ( ( bExp = = 0xFF ) & & bSig ) ) {
return propagateFloat32NaN ( a , b ) ;
}
if ( ( bExp | bSig ) = = 0 ) {
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return float32_default_nan ;
}
return packFloat32 ( zSign , 0xFF , 0 ) ;
}
if ( bExp = = 0xFF ) {
if ( bSig ) return propagateFloat32NaN ( a , b ) ;
if ( ( aExp | aSig ) = = 0 ) {
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return float32_default_nan ;
}
return packFloat32 ( zSign , 0xFF , 0 ) ;
}
if ( aExp = = 0 ) {
if ( aSig = = 0 ) return packFloat32 ( zSign , 0 , 0 ) ;
normalizeFloat32Subnormal ( aSig , & aExp , & aSig ) ;
}
if ( bExp = = 0 ) {
if ( bSig = = 0 ) return packFloat32 ( zSign , 0 , 0 ) ;
normalizeFloat32Subnormal ( bSig , & bExp , & bSig ) ;
}
zExp = aExp + bExp - 0x7F ;
aSig = ( aSig | 0x00800000 ) < < 7 ;
bSig = ( bSig | 0x00800000 ) < < 8 ;
shift64RightJamming ( ( ( bits64 ) aSig ) * bSig , 32 , & zSig64 ) ;
zSig = zSig64 ;
if ( 0 < = ( sbits32 ) ( zSig < < 1 ) ) {
zSig < < = 1 ;
- - zExp ;
}
2005-08-03 22:49:17 +04:00
return roundAndPackFloat32 ( roundData , zSign , zExp , zSig ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of dividing the single - precision floating - point value ` a '
by the corresponding value ` b ' . The operation is performed according to the
IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
float32 float32_div ( struct roundingData * roundData , float32 a , float32 b )
2005-04-17 02:20:36 +04:00
{
flag aSign , bSign , zSign ;
int16 aExp , bExp , zExp ;
bits32 aSig , bSig , zSig ;
aSig = extractFloat32Frac ( a ) ;
aExp = extractFloat32Exp ( a ) ;
aSign = extractFloat32Sign ( a ) ;
bSig = extractFloat32Frac ( b ) ;
bExp = extractFloat32Exp ( b ) ;
bSign = extractFloat32Sign ( b ) ;
zSign = aSign ^ bSign ;
if ( aExp = = 0xFF ) {
if ( aSig ) return propagateFloat32NaN ( a , b ) ;
if ( bExp = = 0xFF ) {
if ( bSig ) return propagateFloat32NaN ( a , b ) ;
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return float32_default_nan ;
}
return packFloat32 ( zSign , 0xFF , 0 ) ;
}
if ( bExp = = 0xFF ) {
if ( bSig ) return propagateFloat32NaN ( a , b ) ;
return packFloat32 ( zSign , 0 , 0 ) ;
}
if ( bExp = = 0 ) {
if ( bSig = = 0 ) {
if ( ( aExp | aSig ) = = 0 ) {
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return float32_default_nan ;
}
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_divbyzero ;
2005-04-17 02:20:36 +04:00
return packFloat32 ( zSign , 0xFF , 0 ) ;
}
normalizeFloat32Subnormal ( bSig , & bExp , & bSig ) ;
}
if ( aExp = = 0 ) {
if ( aSig = = 0 ) return packFloat32 ( zSign , 0 , 0 ) ;
normalizeFloat32Subnormal ( aSig , & aExp , & aSig ) ;
}
zExp = aExp - bExp + 0x7D ;
aSig = ( aSig | 0x00800000 ) < < 7 ;
bSig = ( bSig | 0x00800000 ) < < 8 ;
if ( bSig < = ( aSig + aSig ) ) {
aSig > > = 1 ;
+ + zExp ;
}
2005-06-24 00:56:46 +04:00
{
bits64 tmp = ( ( bits64 ) aSig ) < < 32 ;
do_div ( tmp , bSig ) ;
zSig = tmp ;
}
2005-04-17 02:20:36 +04:00
if ( ( zSig & 0x3F ) = = 0 ) {
zSig | = ( ( ( bits64 ) bSig ) * zSig ! = ( ( bits64 ) aSig ) < < 32 ) ;
}
2005-08-03 22:49:17 +04:00
return roundAndPackFloat32 ( roundData , zSign , zExp , zSig ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the remainder of the single - precision floating - point value ` a '
with respect to the corresponding value ` b ' . The operation is performed
according to the IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
float32 float32_rem ( struct roundingData * roundData , float32 a , float32 b )
2005-04-17 02:20:36 +04:00
{
flag aSign , bSign , zSign ;
int16 aExp , bExp , expDiff ;
bits32 aSig , bSig ;
bits32 q ;
bits64 aSig64 , bSig64 , q64 ;
bits32 alternateASig ;
sbits32 sigMean ;
aSig = extractFloat32Frac ( a ) ;
aExp = extractFloat32Exp ( a ) ;
aSign = extractFloat32Sign ( a ) ;
bSig = extractFloat32Frac ( b ) ;
bExp = extractFloat32Exp ( b ) ;
bSign = extractFloat32Sign ( b ) ;
if ( aExp = = 0xFF ) {
if ( aSig | | ( ( bExp = = 0xFF ) & & bSig ) ) {
return propagateFloat32NaN ( a , b ) ;
}
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return float32_default_nan ;
}
if ( bExp = = 0xFF ) {
if ( bSig ) return propagateFloat32NaN ( a , b ) ;
return a ;
}
if ( bExp = = 0 ) {
if ( bSig = = 0 ) {
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return float32_default_nan ;
}
normalizeFloat32Subnormal ( bSig , & bExp , & bSig ) ;
}
if ( aExp = = 0 ) {
if ( aSig = = 0 ) return a ;
normalizeFloat32Subnormal ( aSig , & aExp , & aSig ) ;
}
expDiff = aExp - bExp ;
aSig | = 0x00800000 ;
bSig | = 0x00800000 ;
if ( expDiff < 32 ) {
aSig < < = 8 ;
bSig < < = 8 ;
if ( expDiff < 0 ) {
if ( expDiff < - 1 ) return a ;
aSig > > = 1 ;
}
q = ( bSig < = aSig ) ;
if ( q ) aSig - = bSig ;
if ( 0 < expDiff ) {
2005-06-24 00:56:46 +04:00
bits64 tmp = ( ( bits64 ) aSig ) < < 32 ;
do_div ( tmp , bSig ) ;
q = tmp ;
2005-04-17 02:20:36 +04:00
q > > = 32 - expDiff ;
bSig > > = 2 ;
aSig = ( ( aSig > > 1 ) < < ( expDiff - 1 ) ) - bSig * q ;
}
else {
aSig > > = 2 ;
bSig > > = 2 ;
}
}
else {
if ( bSig < = aSig ) aSig - = bSig ;
aSig64 = ( ( bits64 ) aSig ) < < 40 ;
bSig64 = ( ( bits64 ) bSig ) < < 40 ;
expDiff - = 64 ;
while ( 0 < expDiff ) {
q64 = estimateDiv128To64 ( aSig64 , 0 , bSig64 ) ;
q64 = ( 2 < q64 ) ? q64 - 2 : 0 ;
aSig64 = - ( ( bSig * q64 ) < < 38 ) ;
expDiff - = 62 ;
}
expDiff + = 64 ;
q64 = estimateDiv128To64 ( aSig64 , 0 , bSig64 ) ;
q64 = ( 2 < q64 ) ? q64 - 2 : 0 ;
q = q64 > > ( 64 - expDiff ) ;
bSig < < = 6 ;
aSig = ( ( aSig64 > > 33 ) < < ( expDiff - 1 ) ) - bSig * q ;
}
do {
alternateASig = aSig ;
+ + q ;
aSig - = bSig ;
} while ( 0 < = ( sbits32 ) aSig ) ;
sigMean = aSig + alternateASig ;
if ( ( sigMean < 0 ) | | ( ( sigMean = = 0 ) & & ( q & 1 ) ) ) {
aSig = alternateASig ;
}
zSign = ( ( sbits32 ) aSig < 0 ) ;
if ( zSign ) aSig = - aSig ;
2005-08-03 22:49:17 +04:00
return normalizeRoundAndPackFloat32 ( roundData , aSign ^ zSign , bExp , aSig ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the square root of the single - precision floating - point value ` a ' .
The operation is performed according to the IEC / IEEE Standard for Binary
Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
float32 float32_sqrt ( struct roundingData * roundData , float32 a )
2005-04-17 02:20:36 +04:00
{
flag aSign ;
int16 aExp , zExp ;
bits32 aSig , zSig ;
bits64 rem , term ;
aSig = extractFloat32Frac ( a ) ;
aExp = extractFloat32Exp ( a ) ;
aSign = extractFloat32Sign ( a ) ;
if ( aExp = = 0xFF ) {
if ( aSig ) return propagateFloat32NaN ( a , 0 ) ;
if ( ! aSign ) return a ;
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return float32_default_nan ;
}
if ( aSign ) {
if ( ( aExp | aSig ) = = 0 ) return a ;
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return float32_default_nan ;
}
if ( aExp = = 0 ) {
if ( aSig = = 0 ) return 0 ;
normalizeFloat32Subnormal ( aSig , & aExp , & aSig ) ;
}
zExp = ( ( aExp - 0x7F ) > > 1 ) + 0x7E ;
aSig = ( aSig | 0x00800000 ) < < 8 ;
zSig = estimateSqrt32 ( aExp , aSig ) + 2 ;
if ( ( zSig & 0x7F ) < = 5 ) {
if ( zSig < 2 ) {
zSig = 0xFFFFFFFF ;
}
else {
aSig > > = aExp & 1 ;
term = ( ( bits64 ) zSig ) * zSig ;
rem = ( ( ( bits64 ) aSig ) < < 32 ) - term ;
while ( ( sbits64 ) rem < 0 ) {
- - zSig ;
rem + = ( ( ( bits64 ) zSig ) < < 1 ) | 1 ;
}
zSig | = ( rem ! = 0 ) ;
}
}
shift32RightJamming ( zSig , 1 , & zSig ) ;
2005-08-03 22:49:17 +04:00
return roundAndPackFloat32 ( roundData , 0 , zExp , zSig ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns 1 if the single - precision floating - point value ` a ' is equal to the
corresponding value ` b ' , and 0 otherwise . The comparison is performed
according to the IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
flag float32_eq ( float32 a , float32 b )
{
if ( ( ( extractFloat32Exp ( a ) = = 0xFF ) & & extractFloat32Frac ( a ) )
| | ( ( extractFloat32Exp ( b ) = = 0xFF ) & & extractFloat32Frac ( b ) )
) {
if ( float32_is_signaling_nan ( a ) | | float32_is_signaling_nan ( b ) ) {
float_raise ( float_flag_invalid ) ;
}
return 0 ;
}
return ( a = = b ) | | ( ( bits32 ) ( ( a | b ) < < 1 ) = = 0 ) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns 1 if the single - precision floating - point value ` a ' is less than or
equal to the corresponding value ` b ' , and 0 otherwise . The comparison is
performed according to the IEC / IEEE Standard for Binary Floating - point
Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
flag float32_le ( float32 a , float32 b )
{
flag aSign , bSign ;
if ( ( ( extractFloat32Exp ( a ) = = 0xFF ) & & extractFloat32Frac ( a ) )
| | ( ( extractFloat32Exp ( b ) = = 0xFF ) & & extractFloat32Frac ( b ) )
) {
float_raise ( float_flag_invalid ) ;
return 0 ;
}
aSign = extractFloat32Sign ( a ) ;
bSign = extractFloat32Sign ( b ) ;
if ( aSign ! = bSign ) return aSign | | ( ( bits32 ) ( ( a | b ) < < 1 ) = = 0 ) ;
return ( a = = b ) | | ( aSign ^ ( a < b ) ) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns 1 if the single - precision floating - point value ` a ' is less than
the corresponding value ` b ' , and 0 otherwise . The comparison is performed
according to the IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
flag float32_lt ( float32 a , float32 b )
{
flag aSign , bSign ;
if ( ( ( extractFloat32Exp ( a ) = = 0xFF ) & & extractFloat32Frac ( a ) )
| | ( ( extractFloat32Exp ( b ) = = 0xFF ) & & extractFloat32Frac ( b ) )
) {
float_raise ( float_flag_invalid ) ;
return 0 ;
}
aSign = extractFloat32Sign ( a ) ;
bSign = extractFloat32Sign ( b ) ;
if ( aSign ! = bSign ) return aSign & & ( ( bits32 ) ( ( a | b ) < < 1 ) ! = 0 ) ;
return ( a ! = b ) & & ( aSign ^ ( a < b ) ) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns 1 if the single - precision floating - point value ` a ' is equal to the
corresponding value ` b ' , and 0 otherwise . The invalid exception is raised
if either operand is a NaN . Otherwise , the comparison is performed
according to the IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
flag float32_eq_signaling ( float32 a , float32 b )
{
if ( ( ( extractFloat32Exp ( a ) = = 0xFF ) & & extractFloat32Frac ( a ) )
| | ( ( extractFloat32Exp ( b ) = = 0xFF ) & & extractFloat32Frac ( b ) )
) {
float_raise ( float_flag_invalid ) ;
return 0 ;
}
return ( a = = b ) | | ( ( bits32 ) ( ( a | b ) < < 1 ) = = 0 ) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns 1 if the single - precision floating - point value ` a ' is less than or
equal to the corresponding value ` b ' , and 0 otherwise . Quiet NaNs do not
cause an exception . Otherwise , the comparison is performed according to the
IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
flag float32_le_quiet ( float32 a , float32 b )
{
flag aSign , bSign ;
//int16 aExp, bExp;
if ( ( ( extractFloat32Exp ( a ) = = 0xFF ) & & extractFloat32Frac ( a ) )
| | ( ( extractFloat32Exp ( b ) = = 0xFF ) & & extractFloat32Frac ( b ) )
) {
2005-08-15 23:42:32 +04:00
/* Do nothing, even if NaN as we're quiet */
2005-04-17 02:20:36 +04:00
return 0 ;
}
aSign = extractFloat32Sign ( a ) ;
bSign = extractFloat32Sign ( b ) ;
if ( aSign ! = bSign ) return aSign | | ( ( bits32 ) ( ( a | b ) < < 1 ) = = 0 ) ;
return ( a = = b ) | | ( aSign ^ ( a < b ) ) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns 1 if the single - precision floating - point value ` a ' is less than
the corresponding value ` b ' , and 0 otherwise . Quiet NaNs do not cause an
exception . Otherwise , the comparison is performed according to the IEC / IEEE
Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
flag float32_lt_quiet ( float32 a , float32 b )
{
flag aSign , bSign ;
if ( ( ( extractFloat32Exp ( a ) = = 0xFF ) & & extractFloat32Frac ( a ) )
| | ( ( extractFloat32Exp ( b ) = = 0xFF ) & & extractFloat32Frac ( b ) )
) {
2005-08-15 23:42:32 +04:00
/* Do nothing, even if NaN as we're quiet */
2005-04-17 02:20:36 +04:00
return 0 ;
}
aSign = extractFloat32Sign ( a ) ;
bSign = extractFloat32Sign ( b ) ;
if ( aSign ! = bSign ) return aSign & & ( ( bits32 ) ( ( a | b ) < < 1 ) ! = 0 ) ;
return ( a ! = b ) & & ( aSign ^ ( a < b ) ) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of converting the double - precision floating - point value
` a ' to the 32 - bit two ' s complement integer format . The conversion is
performed according to the IEC / IEEE Standard for Binary Floating - point
Arithmetic - - - which means in particular that the conversion is rounded
according to the current rounding mode . If ` a ' is a NaN , the largest
positive integer is returned . Otherwise , if the conversion overflows , the
largest integer with the same sign as ` a ' is returned .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
int32 float64_to_int32 ( struct roundingData * roundData , float64 a )
2005-04-17 02:20:36 +04:00
{
flag aSign ;
int16 aExp , shiftCount ;
bits64 aSig ;
aSig = extractFloat64Frac ( a ) ;
aExp = extractFloat64Exp ( a ) ;
aSign = extractFloat64Sign ( a ) ;
if ( ( aExp = = 0x7FF ) & & aSig ) aSign = 0 ;
if ( aExp ) aSig | = LIT64 ( 0x0010000000000000 ) ;
shiftCount = 0x42C - aExp ;
if ( 0 < shiftCount ) shift64RightJamming ( aSig , shiftCount , & aSig ) ;
2005-08-03 22:49:17 +04:00
return roundAndPackInt32 ( roundData , aSign , aSig ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of converting the double - precision floating - point value
` a ' to the 32 - bit two ' s complement integer format . The conversion is
performed according to the IEC / IEEE Standard for Binary Floating - point
Arithmetic , except that the conversion is always rounded toward zero . If
` a ' is a NaN , the largest positive integer is returned . Otherwise , if the
conversion overflows , the largest integer with the same sign as ` a ' is
returned .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
int32 float64_to_int32_round_to_zero ( float64 a )
{
flag aSign ;
int16 aExp , shiftCount ;
bits64 aSig , savedASig ;
int32 z ;
aSig = extractFloat64Frac ( a ) ;
aExp = extractFloat64Exp ( a ) ;
aSign = extractFloat64Sign ( a ) ;
shiftCount = 0x433 - aExp ;
if ( shiftCount < 21 ) {
if ( ( aExp = = 0x7FF ) & & aSig ) aSign = 0 ;
goto invalid ;
}
else if ( 52 < shiftCount ) {
2005-08-03 22:49:17 +04:00
if ( aExp | | aSig ) float_raise ( float_flag_inexact ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
aSig | = LIT64 ( 0x0010000000000000 ) ;
savedASig = aSig ;
aSig > > = shiftCount ;
z = aSig ;
if ( aSign ) z = - z ;
if ( ( z < 0 ) ^ aSign ) {
invalid :
2005-08-03 22:49:17 +04:00
float_raise ( float_flag_invalid ) ;
2005-04-17 02:20:36 +04:00
return aSign ? 0x80000000 : 0x7FFFFFFF ;
}
if ( ( aSig < < shiftCount ) ! = savedASig ) {
2005-08-03 22:49:17 +04:00
float_raise ( float_flag_inexact ) ;
2005-04-17 02:20:36 +04:00
}
return z ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of converting the double - precision floating - point value
` a ' to the 32 - bit two ' s complement unsigned integer format . The conversion
is performed according to the IEC / IEEE Standard for Binary Floating - point
Arithmetic - - - which means in particular that the conversion is rounded
according to the current rounding mode . If ` a ' is a NaN , the largest
positive integer is returned . Otherwise , if the conversion overflows , the
largest positive integer is returned .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
int32 float64_to_uint32 ( struct roundingData * roundData , float64 a )
2005-04-17 02:20:36 +04:00
{
flag aSign ;
int16 aExp , shiftCount ;
bits64 aSig ;
aSig = extractFloat64Frac ( a ) ;
aExp = extractFloat64Exp ( a ) ;
aSign = 0 ; //extractFloat64Sign( a );
//if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
if ( aExp ) aSig | = LIT64 ( 0x0010000000000000 ) ;
shiftCount = 0x42C - aExp ;
if ( 0 < shiftCount ) shift64RightJamming ( aSig , shiftCount , & aSig ) ;
2005-08-03 22:49:17 +04:00
return roundAndPackInt32 ( roundData , aSign , aSig ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of converting the double - precision floating - point value
` a ' to the 32 - bit two ' s complement integer format . The conversion is
performed according to the IEC / IEEE Standard for Binary Floating - point
Arithmetic , except that the conversion is always rounded toward zero . If
` a ' is a NaN , the largest positive integer is returned . Otherwise , if the
conversion overflows , the largest positive integer is returned .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
int32 float64_to_uint32_round_to_zero ( float64 a )
{
flag aSign ;
int16 aExp , shiftCount ;
bits64 aSig , savedASig ;
int32 z ;
aSig = extractFloat64Frac ( a ) ;
aExp = extractFloat64Exp ( a ) ;
aSign = extractFloat64Sign ( a ) ;
shiftCount = 0x433 - aExp ;
if ( shiftCount < 21 ) {
if ( ( aExp = = 0x7FF ) & & aSig ) aSign = 0 ;
goto invalid ;
}
else if ( 52 < shiftCount ) {
2005-08-03 22:49:17 +04:00
if ( aExp | | aSig ) float_raise ( float_flag_inexact ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
aSig | = LIT64 ( 0x0010000000000000 ) ;
savedASig = aSig ;
aSig > > = shiftCount ;
z = aSig ;
if ( aSign ) z = - z ;
if ( ( z < 0 ) ^ aSign ) {
invalid :
2005-08-03 22:49:17 +04:00
float_raise ( float_flag_invalid ) ;
2005-04-17 02:20:36 +04:00
return aSign ? 0x80000000 : 0x7FFFFFFF ;
}
if ( ( aSig < < shiftCount ) ! = savedASig ) {
2005-08-03 22:49:17 +04:00
float_raise ( float_flag_inexact ) ;
2005-04-17 02:20:36 +04:00
}
return z ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of converting the double - precision floating - point value
` a ' to the single - precision floating - point format . The conversion is
performed according to the IEC / IEEE Standard for Binary Floating - point
Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
float32 float64_to_float32 ( struct roundingData * roundData , float64 a )
2005-04-17 02:20:36 +04:00
{
flag aSign ;
int16 aExp ;
bits64 aSig ;
bits32 zSig ;
aSig = extractFloat64Frac ( a ) ;
aExp = extractFloat64Exp ( a ) ;
aSign = extractFloat64Sign ( a ) ;
if ( aExp = = 0x7FF ) {
if ( aSig ) return commonNaNToFloat32 ( float64ToCommonNaN ( a ) ) ;
return packFloat32 ( aSign , 0xFF , 0 ) ;
}
shift64RightJamming ( aSig , 22 , & aSig ) ;
zSig = aSig ;
if ( aExp | | zSig ) {
zSig | = 0x40000000 ;
aExp - = 0x381 ;
}
2005-08-03 22:49:17 +04:00
return roundAndPackFloat32 ( roundData , aSign , aExp , zSig ) ;
2005-04-17 02:20:36 +04:00
}
# ifdef FLOATX80
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of converting the double - precision floating - point value
` a ' to the extended double - precision floating - point format . The conversion
is performed according to the IEC / IEEE Standard for Binary Floating - point
Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
floatx80 float64_to_floatx80 ( float64 a )
{
flag aSign ;
int16 aExp ;
bits64 aSig ;
aSig = extractFloat64Frac ( a ) ;
aExp = extractFloat64Exp ( a ) ;
aSign = extractFloat64Sign ( a ) ;
if ( aExp = = 0x7FF ) {
if ( aSig ) return commonNaNToFloatx80 ( float64ToCommonNaN ( a ) ) ;
return packFloatx80 ( aSign , 0x7FFF , LIT64 ( 0x8000000000000000 ) ) ;
}
if ( aExp = = 0 ) {
if ( aSig = = 0 ) return packFloatx80 ( aSign , 0 , 0 ) ;
normalizeFloat64Subnormal ( aSig , & aExp , & aSig ) ;
}
return
packFloatx80 (
aSign , aExp + 0x3C00 , ( aSig | LIT64 ( 0x0010000000000000 ) ) < < 11 ) ;
}
# endif
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Rounds the double - precision floating - point value ` a ' to an integer , and
returns the result as a double - precision floating - point value . The
operation is performed according to the IEC / IEEE Standard for Binary
Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
float64 float64_round_to_int ( struct roundingData * roundData , float64 a )
2005-04-17 02:20:36 +04:00
{
flag aSign ;
int16 aExp ;
bits64 lastBitMask , roundBitsMask ;
int8 roundingMode ;
float64 z ;
aExp = extractFloat64Exp ( a ) ;
if ( 0x433 < = aExp ) {
if ( ( aExp = = 0x7FF ) & & extractFloat64Frac ( a ) ) {
return propagateFloat64NaN ( a , a ) ;
}
return a ;
}
if ( aExp < = 0x3FE ) {
if ( ( bits64 ) ( a < < 1 ) = = 0 ) return a ;
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_inexact ;
2005-04-17 02:20:36 +04:00
aSign = extractFloat64Sign ( a ) ;
2005-08-03 22:49:17 +04:00
switch ( roundData - > mode ) {
2005-04-17 02:20:36 +04:00
case float_round_nearest_even :
if ( ( aExp = = 0x3FE ) & & extractFloat64Frac ( a ) ) {
return packFloat64 ( aSign , 0x3FF , 0 ) ;
}
break ;
case float_round_down :
return aSign ? LIT64 ( 0xBFF0000000000000 ) : 0 ;
case float_round_up :
return
aSign ? LIT64 ( 0x8000000000000000 ) : LIT64 ( 0x3FF0000000000000 ) ;
}
return packFloat64 ( aSign , 0 , 0 ) ;
}
lastBitMask = 1 ;
lastBitMask < < = 0x433 - aExp ;
roundBitsMask = lastBitMask - 1 ;
z = a ;
2005-08-03 22:49:17 +04:00
roundingMode = roundData - > mode ;
2005-04-17 02:20:36 +04:00
if ( roundingMode = = float_round_nearest_even ) {
z + = lastBitMask > > 1 ;
if ( ( z & roundBitsMask ) = = 0 ) z & = ~ lastBitMask ;
}
else if ( roundingMode ! = float_round_to_zero ) {
if ( extractFloat64Sign ( z ) ^ ( roundingMode = = float_round_up ) ) {
z + = roundBitsMask ;
}
}
z & = ~ roundBitsMask ;
2005-08-03 22:49:17 +04:00
if ( z ! = a ) roundData - > exception | = float_flag_inexact ;
2005-04-17 02:20:36 +04:00
return z ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of adding the absolute values of the double - precision
floating - point values ` a ' and ` b ' . If ` zSign ' is true , the sum is negated
before being returned . ` zSign ' is ignored if the result is a NaN . The
addition is performed according to the IEC / IEEE Standard for Binary
Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
static float64 addFloat64Sigs ( struct roundingData * roundData , float64 a , float64 b , flag zSign )
2005-04-17 02:20:36 +04:00
{
int16 aExp , bExp , zExp ;
bits64 aSig , bSig , zSig ;
int16 expDiff ;
aSig = extractFloat64Frac ( a ) ;
aExp = extractFloat64Exp ( a ) ;
bSig = extractFloat64Frac ( b ) ;
bExp = extractFloat64Exp ( b ) ;
expDiff = aExp - bExp ;
aSig < < = 9 ;
bSig < < = 9 ;
if ( 0 < expDiff ) {
if ( aExp = = 0x7FF ) {
if ( aSig ) return propagateFloat64NaN ( a , b ) ;
return a ;
}
if ( bExp = = 0 ) {
- - expDiff ;
}
else {
bSig | = LIT64 ( 0x2000000000000000 ) ;
}
shift64RightJamming ( bSig , expDiff , & bSig ) ;
zExp = aExp ;
}
else if ( expDiff < 0 ) {
if ( bExp = = 0x7FF ) {
if ( bSig ) return propagateFloat64NaN ( a , b ) ;
return packFloat64 ( zSign , 0x7FF , 0 ) ;
}
if ( aExp = = 0 ) {
+ + expDiff ;
}
else {
aSig | = LIT64 ( 0x2000000000000000 ) ;
}
shift64RightJamming ( aSig , - expDiff , & aSig ) ;
zExp = bExp ;
}
else {
if ( aExp = = 0x7FF ) {
if ( aSig | bSig ) return propagateFloat64NaN ( a , b ) ;
return a ;
}
if ( aExp = = 0 ) return packFloat64 ( zSign , 0 , ( aSig + bSig ) > > 9 ) ;
zSig = LIT64 ( 0x4000000000000000 ) + aSig + bSig ;
zExp = aExp ;
goto roundAndPack ;
}
aSig | = LIT64 ( 0x2000000000000000 ) ;
zSig = ( aSig + bSig ) < < 1 ;
- - zExp ;
if ( ( sbits64 ) zSig < 0 ) {
zSig = aSig + bSig ;
+ + zExp ;
}
roundAndPack :
2005-08-03 22:49:17 +04:00
return roundAndPackFloat64 ( roundData , zSign , zExp , zSig ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of subtracting the absolute values of the double -
precision floating - point values ` a ' and ` b ' . If ` zSign ' is true , the
difference is negated before being returned . ` zSign ' is ignored if the
result is a NaN . The subtraction is performed according to the IEC / IEEE
Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
static float64 subFloat64Sigs ( struct roundingData * roundData , float64 a , float64 b , flag zSign )
2005-04-17 02:20:36 +04:00
{
int16 aExp , bExp , zExp ;
bits64 aSig , bSig , zSig ;
int16 expDiff ;
aSig = extractFloat64Frac ( a ) ;
aExp = extractFloat64Exp ( a ) ;
bSig = extractFloat64Frac ( b ) ;
bExp = extractFloat64Exp ( b ) ;
expDiff = aExp - bExp ;
aSig < < = 10 ;
bSig < < = 10 ;
if ( 0 < expDiff ) goto aExpBigger ;
if ( expDiff < 0 ) goto bExpBigger ;
if ( aExp = = 0x7FF ) {
if ( aSig | bSig ) return propagateFloat64NaN ( a , b ) ;
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return float64_default_nan ;
}
if ( aExp = = 0 ) {
aExp = 1 ;
bExp = 1 ;
}
if ( bSig < aSig ) goto aBigger ;
if ( aSig < bSig ) goto bBigger ;
2005-08-03 22:49:17 +04:00
return packFloat64 ( roundData - > mode = = float_round_down , 0 , 0 ) ;
2005-04-17 02:20:36 +04:00
bExpBigger :
if ( bExp = = 0x7FF ) {
if ( bSig ) return propagateFloat64NaN ( a , b ) ;
return packFloat64 ( zSign ^ 1 , 0x7FF , 0 ) ;
}
if ( aExp = = 0 ) {
+ + expDiff ;
}
else {
aSig | = LIT64 ( 0x4000000000000000 ) ;
}
shift64RightJamming ( aSig , - expDiff , & aSig ) ;
bSig | = LIT64 ( 0x4000000000000000 ) ;
bBigger :
zSig = bSig - aSig ;
zExp = bExp ;
zSign ^ = 1 ;
goto normalizeRoundAndPack ;
aExpBigger :
if ( aExp = = 0x7FF ) {
if ( aSig ) return propagateFloat64NaN ( a , b ) ;
return a ;
}
if ( bExp = = 0 ) {
- - expDiff ;
}
else {
bSig | = LIT64 ( 0x4000000000000000 ) ;
}
shift64RightJamming ( bSig , expDiff , & bSig ) ;
aSig | = LIT64 ( 0x4000000000000000 ) ;
aBigger :
zSig = aSig - bSig ;
zExp = aExp ;
normalizeRoundAndPack :
- - zExp ;
2005-08-03 22:49:17 +04:00
return normalizeRoundAndPackFloat64 ( roundData , zSign , zExp , zSig ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of adding the double - precision floating - point values ` a '
and ` b ' . The operation is performed according to the IEC / IEEE Standard for
Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
float64 float64_add ( struct roundingData * roundData , float64 a , float64 b )
2005-04-17 02:20:36 +04:00
{
flag aSign , bSign ;
aSign = extractFloat64Sign ( a ) ;
bSign = extractFloat64Sign ( b ) ;
if ( aSign = = bSign ) {
2005-08-03 22:49:17 +04:00
return addFloat64Sigs ( roundData , a , b , aSign ) ;
2005-04-17 02:20:36 +04:00
}
else {
2005-08-03 22:49:17 +04:00
return subFloat64Sigs ( roundData , a , b , aSign ) ;
2005-04-17 02:20:36 +04:00
}
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of subtracting the double - precision floating - point values
` a ' and ` b ' . The operation is performed according to the IEC / IEEE Standard
for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
float64 float64_sub ( struct roundingData * roundData , float64 a , float64 b )
2005-04-17 02:20:36 +04:00
{
flag aSign , bSign ;
aSign = extractFloat64Sign ( a ) ;
bSign = extractFloat64Sign ( b ) ;
if ( aSign = = bSign ) {
2005-08-03 22:49:17 +04:00
return subFloat64Sigs ( roundData , a , b , aSign ) ;
2005-04-17 02:20:36 +04:00
}
else {
2005-08-03 22:49:17 +04:00
return addFloat64Sigs ( roundData , a , b , aSign ) ;
2005-04-17 02:20:36 +04:00
}
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of multiplying the double - precision floating - point values
` a ' and ` b ' . The operation is performed according to the IEC / IEEE Standard
for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
float64 float64_mul ( struct roundingData * roundData , float64 a , float64 b )
2005-04-17 02:20:36 +04:00
{
flag aSign , bSign , zSign ;
int16 aExp , bExp , zExp ;
bits64 aSig , bSig , zSig0 , zSig1 ;
aSig = extractFloat64Frac ( a ) ;
aExp = extractFloat64Exp ( a ) ;
aSign = extractFloat64Sign ( a ) ;
bSig = extractFloat64Frac ( b ) ;
bExp = extractFloat64Exp ( b ) ;
bSign = extractFloat64Sign ( b ) ;
zSign = aSign ^ bSign ;
if ( aExp = = 0x7FF ) {
if ( aSig | | ( ( bExp = = 0x7FF ) & & bSig ) ) {
return propagateFloat64NaN ( a , b ) ;
}
if ( ( bExp | bSig ) = = 0 ) {
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return float64_default_nan ;
}
return packFloat64 ( zSign , 0x7FF , 0 ) ;
}
if ( bExp = = 0x7FF ) {
if ( bSig ) return propagateFloat64NaN ( a , b ) ;
if ( ( aExp | aSig ) = = 0 ) {
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return float64_default_nan ;
}
return packFloat64 ( zSign , 0x7FF , 0 ) ;
}
if ( aExp = = 0 ) {
if ( aSig = = 0 ) return packFloat64 ( zSign , 0 , 0 ) ;
normalizeFloat64Subnormal ( aSig , & aExp , & aSig ) ;
}
if ( bExp = = 0 ) {
if ( bSig = = 0 ) return packFloat64 ( zSign , 0 , 0 ) ;
normalizeFloat64Subnormal ( bSig , & bExp , & bSig ) ;
}
zExp = aExp + bExp - 0x3FF ;
aSig = ( aSig | LIT64 ( 0x0010000000000000 ) ) < < 10 ;
bSig = ( bSig | LIT64 ( 0x0010000000000000 ) ) < < 11 ;
mul64To128 ( aSig , bSig , & zSig0 , & zSig1 ) ;
zSig0 | = ( zSig1 ! = 0 ) ;
if ( 0 < = ( sbits64 ) ( zSig0 < < 1 ) ) {
zSig0 < < = 1 ;
- - zExp ;
}
2005-08-03 22:49:17 +04:00
return roundAndPackFloat64 ( roundData , zSign , zExp , zSig0 ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of dividing the double - precision floating - point value ` a '
by the corresponding value ` b ' . The operation is performed according to
the IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
float64 float64_div ( struct roundingData * roundData , float64 a , float64 b )
2005-04-17 02:20:36 +04:00
{
flag aSign , bSign , zSign ;
int16 aExp , bExp , zExp ;
bits64 aSig , bSig , zSig ;
bits64 rem0 , rem1 ;
bits64 term0 , term1 ;
aSig = extractFloat64Frac ( a ) ;
aExp = extractFloat64Exp ( a ) ;
aSign = extractFloat64Sign ( a ) ;
bSig = extractFloat64Frac ( b ) ;
bExp = extractFloat64Exp ( b ) ;
bSign = extractFloat64Sign ( b ) ;
zSign = aSign ^ bSign ;
if ( aExp = = 0x7FF ) {
if ( aSig ) return propagateFloat64NaN ( a , b ) ;
if ( bExp = = 0x7FF ) {
if ( bSig ) return propagateFloat64NaN ( a , b ) ;
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return float64_default_nan ;
}
return packFloat64 ( zSign , 0x7FF , 0 ) ;
}
if ( bExp = = 0x7FF ) {
if ( bSig ) return propagateFloat64NaN ( a , b ) ;
return packFloat64 ( zSign , 0 , 0 ) ;
}
if ( bExp = = 0 ) {
if ( bSig = = 0 ) {
if ( ( aExp | aSig ) = = 0 ) {
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return float64_default_nan ;
}
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_divbyzero ;
2005-04-17 02:20:36 +04:00
return packFloat64 ( zSign , 0x7FF , 0 ) ;
}
normalizeFloat64Subnormal ( bSig , & bExp , & bSig ) ;
}
if ( aExp = = 0 ) {
if ( aSig = = 0 ) return packFloat64 ( zSign , 0 , 0 ) ;
normalizeFloat64Subnormal ( aSig , & aExp , & aSig ) ;
}
zExp = aExp - bExp + 0x3FD ;
aSig = ( aSig | LIT64 ( 0x0010000000000000 ) ) < < 10 ;
bSig = ( bSig | LIT64 ( 0x0010000000000000 ) ) < < 11 ;
if ( bSig < = ( aSig + aSig ) ) {
aSig > > = 1 ;
+ + zExp ;
}
zSig = estimateDiv128To64 ( aSig , 0 , bSig ) ;
if ( ( zSig & 0x1FF ) < = 2 ) {
mul64To128 ( bSig , zSig , & term0 , & term1 ) ;
sub128 ( aSig , 0 , term0 , term1 , & rem0 , & rem1 ) ;
while ( ( sbits64 ) rem0 < 0 ) {
- - zSig ;
add128 ( rem0 , rem1 , 0 , bSig , & rem0 , & rem1 ) ;
}
zSig | = ( rem1 ! = 0 ) ;
}
2005-08-03 22:49:17 +04:00
return roundAndPackFloat64 ( roundData , zSign , zExp , zSig ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the remainder of the double - precision floating - point value ` a '
with respect to the corresponding value ` b ' . The operation is performed
according to the IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
float64 float64_rem ( struct roundingData * roundData , float64 a , float64 b )
2005-04-17 02:20:36 +04:00
{
flag aSign , bSign , zSign ;
int16 aExp , bExp , expDiff ;
bits64 aSig , bSig ;
bits64 q , alternateASig ;
sbits64 sigMean ;
aSig = extractFloat64Frac ( a ) ;
aExp = extractFloat64Exp ( a ) ;
aSign = extractFloat64Sign ( a ) ;
bSig = extractFloat64Frac ( b ) ;
bExp = extractFloat64Exp ( b ) ;
bSign = extractFloat64Sign ( b ) ;
if ( aExp = = 0x7FF ) {
if ( aSig | | ( ( bExp = = 0x7FF ) & & bSig ) ) {
return propagateFloat64NaN ( a , b ) ;
}
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return float64_default_nan ;
}
if ( bExp = = 0x7FF ) {
if ( bSig ) return propagateFloat64NaN ( a , b ) ;
return a ;
}
if ( bExp = = 0 ) {
if ( bSig = = 0 ) {
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return float64_default_nan ;
}
normalizeFloat64Subnormal ( bSig , & bExp , & bSig ) ;
}
if ( aExp = = 0 ) {
if ( aSig = = 0 ) return a ;
normalizeFloat64Subnormal ( aSig , & aExp , & aSig ) ;
}
expDiff = aExp - bExp ;
aSig = ( aSig | LIT64 ( 0x0010000000000000 ) ) < < 11 ;
bSig = ( bSig | LIT64 ( 0x0010000000000000 ) ) < < 11 ;
if ( expDiff < 0 ) {
if ( expDiff < - 1 ) return a ;
aSig > > = 1 ;
}
q = ( bSig < = aSig ) ;
if ( q ) aSig - = bSig ;
expDiff - = 64 ;
while ( 0 < expDiff ) {
q = estimateDiv128To64 ( aSig , 0 , bSig ) ;
q = ( 2 < q ) ? q - 2 : 0 ;
aSig = - ( ( bSig > > 2 ) * q ) ;
expDiff - = 62 ;
}
expDiff + = 64 ;
if ( 0 < expDiff ) {
q = estimateDiv128To64 ( aSig , 0 , bSig ) ;
q = ( 2 < q ) ? q - 2 : 0 ;
q > > = 64 - expDiff ;
bSig > > = 2 ;
aSig = ( ( aSig > > 1 ) < < ( expDiff - 1 ) ) - bSig * q ;
}
else {
aSig > > = 2 ;
bSig > > = 2 ;
}
do {
alternateASig = aSig ;
+ + q ;
aSig - = bSig ;
} while ( 0 < = ( sbits64 ) aSig ) ;
sigMean = aSig + alternateASig ;
if ( ( sigMean < 0 ) | | ( ( sigMean = = 0 ) & & ( q & 1 ) ) ) {
aSig = alternateASig ;
}
zSign = ( ( sbits64 ) aSig < 0 ) ;
if ( zSign ) aSig = - aSig ;
2005-08-03 22:49:17 +04:00
return normalizeRoundAndPackFloat64 ( roundData , aSign ^ zSign , bExp , aSig ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the square root of the double - precision floating - point value ` a ' .
The operation is performed according to the IEC / IEEE Standard for Binary
Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
float64 float64_sqrt ( struct roundingData * roundData , float64 a )
2005-04-17 02:20:36 +04:00
{
flag aSign ;
int16 aExp , zExp ;
bits64 aSig , zSig ;
bits64 rem0 , rem1 , term0 , term1 ; //, shiftedRem;
//float64 z;
aSig = extractFloat64Frac ( a ) ;
aExp = extractFloat64Exp ( a ) ;
aSign = extractFloat64Sign ( a ) ;
if ( aExp = = 0x7FF ) {
if ( aSig ) return propagateFloat64NaN ( a , a ) ;
if ( ! aSign ) return a ;
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return float64_default_nan ;
}
if ( aSign ) {
if ( ( aExp | aSig ) = = 0 ) return a ;
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
return float64_default_nan ;
}
if ( aExp = = 0 ) {
if ( aSig = = 0 ) return 0 ;
normalizeFloat64Subnormal ( aSig , & aExp , & aSig ) ;
}
zExp = ( ( aExp - 0x3FF ) > > 1 ) + 0x3FE ;
aSig | = LIT64 ( 0x0010000000000000 ) ;
zSig = estimateSqrt32 ( aExp , aSig > > 21 ) ;
zSig < < = 31 ;
aSig < < = 9 - ( aExp & 1 ) ;
zSig = estimateDiv128To64 ( aSig , 0 , zSig ) + zSig + 2 ;
if ( ( zSig & 0x3FF ) < = 5 ) {
if ( zSig < 2 ) {
zSig = LIT64 ( 0xFFFFFFFFFFFFFFFF ) ;
}
else {
aSig < < = 2 ;
mul64To128 ( zSig , zSig , & term0 , & term1 ) ;
sub128 ( aSig , 0 , term0 , term1 , & rem0 , & rem1 ) ;
while ( ( sbits64 ) rem0 < 0 ) {
- - zSig ;
shortShift128Left ( 0 , zSig , 1 , & term0 , & term1 ) ;
term1 | = 1 ;
add128 ( rem0 , rem1 , term0 , term1 , & rem0 , & rem1 ) ;
}
zSig | = ( ( rem0 | rem1 ) ! = 0 ) ;
}
}
shift64RightJamming ( zSig , 1 , & zSig ) ;
2005-08-03 22:49:17 +04:00
return roundAndPackFloat64 ( roundData , 0 , zExp , zSig ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns 1 if the double - precision floating - point value ` a ' is equal to the
corresponding value ` b ' , and 0 otherwise . The comparison is performed
according to the IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
flag float64_eq ( float64 a , float64 b )
{
if ( ( ( extractFloat64Exp ( a ) = = 0x7FF ) & & extractFloat64Frac ( a ) )
| | ( ( extractFloat64Exp ( b ) = = 0x7FF ) & & extractFloat64Frac ( b ) )
) {
if ( float64_is_signaling_nan ( a ) | | float64_is_signaling_nan ( b ) ) {
float_raise ( float_flag_invalid ) ;
}
return 0 ;
}
return ( a = = b ) | | ( ( bits64 ) ( ( a | b ) < < 1 ) = = 0 ) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns 1 if the double - precision floating - point value ` a ' is less than or
equal to the corresponding value ` b ' , and 0 otherwise . The comparison is
performed according to the IEC / IEEE Standard for Binary Floating - point
Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
flag float64_le ( float64 a , float64 b )
{
flag aSign , bSign ;
if ( ( ( extractFloat64Exp ( a ) = = 0x7FF ) & & extractFloat64Frac ( a ) )
| | ( ( extractFloat64Exp ( b ) = = 0x7FF ) & & extractFloat64Frac ( b ) )
) {
float_raise ( float_flag_invalid ) ;
return 0 ;
}
aSign = extractFloat64Sign ( a ) ;
bSign = extractFloat64Sign ( b ) ;
if ( aSign ! = bSign ) return aSign | | ( ( bits64 ) ( ( a | b ) < < 1 ) = = 0 ) ;
return ( a = = b ) | | ( aSign ^ ( a < b ) ) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns 1 if the double - precision floating - point value ` a ' is less than
the corresponding value ` b ' , and 0 otherwise . The comparison is performed
according to the IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
flag float64_lt ( float64 a , float64 b )
{
flag aSign , bSign ;
if ( ( ( extractFloat64Exp ( a ) = = 0x7FF ) & & extractFloat64Frac ( a ) )
| | ( ( extractFloat64Exp ( b ) = = 0x7FF ) & & extractFloat64Frac ( b ) )
) {
float_raise ( float_flag_invalid ) ;
return 0 ;
}
aSign = extractFloat64Sign ( a ) ;
bSign = extractFloat64Sign ( b ) ;
if ( aSign ! = bSign ) return aSign & & ( ( bits64 ) ( ( a | b ) < < 1 ) ! = 0 ) ;
return ( a ! = b ) & & ( aSign ^ ( a < b ) ) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns 1 if the double - precision floating - point value ` a ' is equal to the
corresponding value ` b ' , and 0 otherwise . The invalid exception is raised
if either operand is a NaN . Otherwise , the comparison is performed
according to the IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
flag float64_eq_signaling ( float64 a , float64 b )
{
if ( ( ( extractFloat64Exp ( a ) = = 0x7FF ) & & extractFloat64Frac ( a ) )
| | ( ( extractFloat64Exp ( b ) = = 0x7FF ) & & extractFloat64Frac ( b ) )
) {
float_raise ( float_flag_invalid ) ;
return 0 ;
}
return ( a = = b ) | | ( ( bits64 ) ( ( a | b ) < < 1 ) = = 0 ) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns 1 if the double - precision floating - point value ` a ' is less than or
equal to the corresponding value ` b ' , and 0 otherwise . Quiet NaNs do not
cause an exception . Otherwise , the comparison is performed according to the
IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
flag float64_le_quiet ( float64 a , float64 b )
{
flag aSign , bSign ;
//int16 aExp, bExp;
if ( ( ( extractFloat64Exp ( a ) = = 0x7FF ) & & extractFloat64Frac ( a ) )
| | ( ( extractFloat64Exp ( b ) = = 0x7FF ) & & extractFloat64Frac ( b ) )
) {
2005-08-15 23:42:32 +04:00
/* Do nothing, even if NaN as we're quiet */
2005-04-17 02:20:36 +04:00
return 0 ;
}
aSign = extractFloat64Sign ( a ) ;
bSign = extractFloat64Sign ( b ) ;
if ( aSign ! = bSign ) return aSign | | ( ( bits64 ) ( ( a | b ) < < 1 ) = = 0 ) ;
return ( a = = b ) | | ( aSign ^ ( a < b ) ) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns 1 if the double - precision floating - point value ` a ' is less than
the corresponding value ` b ' , and 0 otherwise . Quiet NaNs do not cause an
exception . Otherwise , the comparison is performed according to the IEC / IEEE
Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
flag float64_lt_quiet ( float64 a , float64 b )
{
flag aSign , bSign ;
if ( ( ( extractFloat64Exp ( a ) = = 0x7FF ) & & extractFloat64Frac ( a ) )
| | ( ( extractFloat64Exp ( b ) = = 0x7FF ) & & extractFloat64Frac ( b ) )
) {
2005-08-15 23:42:32 +04:00
/* Do nothing, even if NaN as we're quiet */
2005-04-17 02:20:36 +04:00
return 0 ;
}
aSign = extractFloat64Sign ( a ) ;
bSign = extractFloat64Sign ( b ) ;
if ( aSign ! = bSign ) return aSign & & ( ( bits64 ) ( ( a | b ) < < 1 ) ! = 0 ) ;
return ( a ! = b ) & & ( aSign ^ ( a < b ) ) ;
}
# ifdef FLOATX80
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of converting the extended double - precision floating -
point value ` a ' to the 32 - bit two ' s complement integer format . The
conversion is performed according to the IEC / IEEE Standard for Binary
Floating - point Arithmetic - - - which means in particular that the conversion
is rounded according to the current rounding mode . If ` a ' is a NaN , the
largest positive integer is returned . Otherwise , if the conversion
overflows , the largest integer with the same sign as ` a ' is returned .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
int32 floatx80_to_int32 ( struct roundingData * roundData , floatx80 a )
2005-04-17 02:20:36 +04:00
{
flag aSign ;
int32 aExp , shiftCount ;
bits64 aSig ;
aSig = extractFloatx80Frac ( a ) ;
aExp = extractFloatx80Exp ( a ) ;
aSign = extractFloatx80Sign ( a ) ;
if ( ( aExp = = 0x7FFF ) & & ( bits64 ) ( aSig < < 1 ) ) aSign = 0 ;
shiftCount = 0x4037 - aExp ;
if ( shiftCount < = 0 ) shiftCount = 1 ;
shift64RightJamming ( aSig , shiftCount , & aSig ) ;
2005-08-03 22:49:17 +04:00
return roundAndPackInt32 ( roundData , aSign , aSig ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of converting the extended double - precision floating -
point value ` a ' to the 32 - bit two ' s complement integer format . The
conversion is performed according to the IEC / IEEE Standard for Binary
Floating - point Arithmetic , except that the conversion is always rounded
toward zero . If ` a ' is a NaN , the largest positive integer is returned .
Otherwise , if the conversion overflows , the largest integer with the same
sign as ` a ' is returned .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
int32 floatx80_to_int32_round_to_zero ( floatx80 a )
{
flag aSign ;
int32 aExp , shiftCount ;
bits64 aSig , savedASig ;
int32 z ;
aSig = extractFloatx80Frac ( a ) ;
aExp = extractFloatx80Exp ( a ) ;
aSign = extractFloatx80Sign ( a ) ;
shiftCount = 0x403E - aExp ;
if ( shiftCount < 32 ) {
if ( ( aExp = = 0x7FFF ) & & ( bits64 ) ( aSig < < 1 ) ) aSign = 0 ;
goto invalid ;
}
else if ( 63 < shiftCount ) {
2005-08-03 22:49:17 +04:00
if ( aExp | | aSig ) float_raise ( float_flag_inexact ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
savedASig = aSig ;
aSig > > = shiftCount ;
z = aSig ;
if ( aSign ) z = - z ;
if ( ( z < 0 ) ^ aSign ) {
invalid :
2005-08-03 22:49:17 +04:00
float_raise ( float_flag_invalid ) ;
2005-04-17 02:20:36 +04:00
return aSign ? 0x80000000 : 0x7FFFFFFF ;
}
if ( ( aSig < < shiftCount ) ! = savedASig ) {
2005-08-03 22:49:17 +04:00
float_raise ( float_flag_inexact ) ;
2005-04-17 02:20:36 +04:00
}
return z ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of converting the extended double - precision floating -
point value ` a ' to the single - precision floating - point format . The
conversion is performed according to the IEC / IEEE Standard for Binary
Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
float32 floatx80_to_float32 ( struct roundingData * roundData , floatx80 a )
2005-04-17 02:20:36 +04:00
{
flag aSign ;
int32 aExp ;
bits64 aSig ;
aSig = extractFloatx80Frac ( a ) ;
aExp = extractFloatx80Exp ( a ) ;
aSign = extractFloatx80Sign ( a ) ;
if ( aExp = = 0x7FFF ) {
if ( ( bits64 ) ( aSig < < 1 ) ) {
return commonNaNToFloat32 ( floatx80ToCommonNaN ( a ) ) ;
}
return packFloat32 ( aSign , 0xFF , 0 ) ;
}
shift64RightJamming ( aSig , 33 , & aSig ) ;
if ( aExp | | aSig ) aExp - = 0x3F81 ;
2005-08-03 22:49:17 +04:00
return roundAndPackFloat32 ( roundData , aSign , aExp , aSig ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of converting the extended double - precision floating -
point value ` a ' to the double - precision floating - point format . The
conversion is performed according to the IEC / IEEE Standard for Binary
Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
float64 floatx80_to_float64 ( struct roundingData * roundData , floatx80 a )
2005-04-17 02:20:36 +04:00
{
flag aSign ;
int32 aExp ;
bits64 aSig , zSig ;
aSig = extractFloatx80Frac ( a ) ;
aExp = extractFloatx80Exp ( a ) ;
aSign = extractFloatx80Sign ( a ) ;
if ( aExp = = 0x7FFF ) {
if ( ( bits64 ) ( aSig < < 1 ) ) {
return commonNaNToFloat64 ( floatx80ToCommonNaN ( a ) ) ;
}
return packFloat64 ( aSign , 0x7FF , 0 ) ;
}
shift64RightJamming ( aSig , 1 , & zSig ) ;
if ( aExp | | aSig ) aExp - = 0x3C01 ;
2005-08-03 22:49:17 +04:00
return roundAndPackFloat64 ( roundData , aSign , aExp , zSig ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Rounds the extended double - precision floating - point value ` a ' to an integer ,
and returns the result as an extended quadruple - precision floating - point
value . The operation is performed according to the IEC / IEEE Standard for
Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
floatx80 floatx80_round_to_int ( struct roundingData * roundData , floatx80 a )
2005-04-17 02:20:36 +04:00
{
flag aSign ;
int32 aExp ;
bits64 lastBitMask , roundBitsMask ;
int8 roundingMode ;
floatx80 z ;
aExp = extractFloatx80Exp ( a ) ;
if ( 0x403E < = aExp ) {
if ( ( aExp = = 0x7FFF ) & & ( bits64 ) ( extractFloatx80Frac ( a ) < < 1 ) ) {
return propagateFloatx80NaN ( a , a ) ;
}
return a ;
}
if ( aExp < = 0x3FFE ) {
if ( ( aExp = = 0 )
& & ( ( bits64 ) ( extractFloatx80Frac ( a ) < < 1 ) = = 0 ) ) {
return a ;
}
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_inexact ;
2005-04-17 02:20:36 +04:00
aSign = extractFloatx80Sign ( a ) ;
2005-08-03 22:49:17 +04:00
switch ( roundData - > mode ) {
2005-04-17 02:20:36 +04:00
case float_round_nearest_even :
if ( ( aExp = = 0x3FFE ) & & ( bits64 ) ( extractFloatx80Frac ( a ) < < 1 )
) {
return
packFloatx80 ( aSign , 0x3FFF , LIT64 ( 0x8000000000000000 ) ) ;
}
break ;
case float_round_down :
return
aSign ?
packFloatx80 ( 1 , 0x3FFF , LIT64 ( 0x8000000000000000 ) )
: packFloatx80 ( 0 , 0 , 0 ) ;
case float_round_up :
return
aSign ? packFloatx80 ( 1 , 0 , 0 )
: packFloatx80 ( 0 , 0x3FFF , LIT64 ( 0x8000000000000000 ) ) ;
}
return packFloatx80 ( aSign , 0 , 0 ) ;
}
lastBitMask = 1 ;
lastBitMask < < = 0x403E - aExp ;
roundBitsMask = lastBitMask - 1 ;
z = a ;
2005-08-03 22:49:17 +04:00
roundingMode = roundData - > mode ;
2005-04-17 02:20:36 +04:00
if ( roundingMode = = float_round_nearest_even ) {
z . low + = lastBitMask > > 1 ;
if ( ( z . low & roundBitsMask ) = = 0 ) z . low & = ~ lastBitMask ;
}
else if ( roundingMode ! = float_round_to_zero ) {
if ( extractFloatx80Sign ( z ) ^ ( roundingMode = = float_round_up ) ) {
z . low + = roundBitsMask ;
}
}
z . low & = ~ roundBitsMask ;
if ( z . low = = 0 ) {
+ + z . high ;
z . low = LIT64 ( 0x8000000000000000 ) ;
}
2005-08-03 22:49:17 +04:00
if ( z . low ! = a . low ) roundData - > exception | = float_flag_inexact ;
2005-04-17 02:20:36 +04:00
return z ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of adding the absolute values of the extended double -
precision floating - point values ` a ' and ` b ' . If ` zSign ' is true , the sum is
negated before being returned . ` zSign ' is ignored if the result is a NaN .
The addition is performed according to the IEC / IEEE Standard for Binary
Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
static floatx80 addFloatx80Sigs ( struct roundingData * roundData , floatx80 a , floatx80 b , flag zSign )
2005-04-17 02:20:36 +04:00
{
int32 aExp , bExp , zExp ;
bits64 aSig , bSig , zSig0 , zSig1 ;
int32 expDiff ;
aSig = extractFloatx80Frac ( a ) ;
aExp = extractFloatx80Exp ( a ) ;
bSig = extractFloatx80Frac ( b ) ;
bExp = extractFloatx80Exp ( b ) ;
expDiff = aExp - bExp ;
if ( 0 < expDiff ) {
if ( aExp = = 0x7FFF ) {
if ( ( bits64 ) ( aSig < < 1 ) ) return propagateFloatx80NaN ( a , b ) ;
return a ;
}
if ( bExp = = 0 ) - - expDiff ;
shift64ExtraRightJamming ( bSig , 0 , expDiff , & bSig , & zSig1 ) ;
zExp = aExp ;
}
else if ( expDiff < 0 ) {
if ( bExp = = 0x7FFF ) {
if ( ( bits64 ) ( bSig < < 1 ) ) return propagateFloatx80NaN ( a , b ) ;
return packFloatx80 ( zSign , 0x7FFF , LIT64 ( 0x8000000000000000 ) ) ;
}
if ( aExp = = 0 ) + + expDiff ;
shift64ExtraRightJamming ( aSig , 0 , - expDiff , & aSig , & zSig1 ) ;
zExp = bExp ;
}
else {
if ( aExp = = 0x7FFF ) {
if ( ( bits64 ) ( ( aSig | bSig ) < < 1 ) ) {
return propagateFloatx80NaN ( a , b ) ;
}
return a ;
}
zSig1 = 0 ;
zSig0 = aSig + bSig ;
if ( aExp = = 0 ) {
normalizeFloatx80Subnormal ( zSig0 , & zExp , & zSig0 ) ;
goto roundAndPack ;
}
zExp = aExp ;
goto shiftRight1 ;
}
zSig0 = aSig + bSig ;
if ( ( sbits64 ) zSig0 < 0 ) goto roundAndPack ;
shiftRight1 :
shift64ExtraRightJamming ( zSig0 , zSig1 , 1 , & zSig0 , & zSig1 ) ;
zSig0 | = LIT64 ( 0x8000000000000000 ) ;
+ + zExp ;
roundAndPack :
return
roundAndPackFloatx80 (
2005-08-03 22:49:17 +04:00
roundData , zSign , zExp , zSig0 , zSig1 ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of subtracting the absolute values of the extended
double - precision floating - point values ` a ' and ` b ' . If ` zSign ' is true ,
the difference is negated before being returned . ` zSign ' is ignored if the
result is a NaN . The subtraction is performed according to the IEC / IEEE
Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
static floatx80 subFloatx80Sigs ( struct roundingData * roundData , floatx80 a , floatx80 b , flag zSign )
2005-04-17 02:20:36 +04:00
{
int32 aExp , bExp , zExp ;
bits64 aSig , bSig , zSig0 , zSig1 ;
int32 expDiff ;
floatx80 z ;
aSig = extractFloatx80Frac ( a ) ;
aExp = extractFloatx80Exp ( a ) ;
bSig = extractFloatx80Frac ( b ) ;
bExp = extractFloatx80Exp ( b ) ;
expDiff = aExp - bExp ;
if ( 0 < expDiff ) goto aExpBigger ;
if ( expDiff < 0 ) goto bExpBigger ;
if ( aExp = = 0x7FFF ) {
if ( ( bits64 ) ( ( aSig | bSig ) < < 1 ) ) {
return propagateFloatx80NaN ( a , b ) ;
}
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
z . low = floatx80_default_nan_low ;
z . high = floatx80_default_nan_high ;
2005-11-08 00:12:07 +03:00
z . __padding = 0 ;
2005-04-17 02:20:36 +04:00
return z ;
}
if ( aExp = = 0 ) {
aExp = 1 ;
bExp = 1 ;
}
zSig1 = 0 ;
if ( bSig < aSig ) goto aBigger ;
if ( aSig < bSig ) goto bBigger ;
2005-08-03 22:49:17 +04:00
return packFloatx80 ( roundData - > mode = = float_round_down , 0 , 0 ) ;
2005-04-17 02:20:36 +04:00
bExpBigger :
if ( bExp = = 0x7FFF ) {
if ( ( bits64 ) ( bSig < < 1 ) ) return propagateFloatx80NaN ( a , b ) ;
return packFloatx80 ( zSign ^ 1 , 0x7FFF , LIT64 ( 0x8000000000000000 ) ) ;
}
if ( aExp = = 0 ) + + expDiff ;
shift128RightJamming ( aSig , 0 , - expDiff , & aSig , & zSig1 ) ;
bBigger :
sub128 ( bSig , 0 , aSig , zSig1 , & zSig0 , & zSig1 ) ;
zExp = bExp ;
zSign ^ = 1 ;
goto normalizeRoundAndPack ;
aExpBigger :
if ( aExp = = 0x7FFF ) {
if ( ( bits64 ) ( aSig < < 1 ) ) return propagateFloatx80NaN ( a , b ) ;
return a ;
}
if ( bExp = = 0 ) - - expDiff ;
shift128RightJamming ( bSig , 0 , expDiff , & bSig , & zSig1 ) ;
aBigger :
sub128 ( aSig , 0 , bSig , zSig1 , & zSig0 , & zSig1 ) ;
zExp = aExp ;
normalizeRoundAndPack :
return
normalizeRoundAndPackFloatx80 (
2005-08-03 22:49:17 +04:00
roundData , zSign , zExp , zSig0 , zSig1 ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of adding the extended double - precision floating - point
values ` a ' and ` b ' . The operation is performed according to the IEC / IEEE
Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
floatx80 floatx80_add ( struct roundingData * roundData , floatx80 a , floatx80 b )
2005-04-17 02:20:36 +04:00
{
flag aSign , bSign ;
aSign = extractFloatx80Sign ( a ) ;
bSign = extractFloatx80Sign ( b ) ;
if ( aSign = = bSign ) {
2005-08-03 22:49:17 +04:00
return addFloatx80Sigs ( roundData , a , b , aSign ) ;
2005-04-17 02:20:36 +04:00
}
else {
2005-08-03 22:49:17 +04:00
return subFloatx80Sigs ( roundData , a , b , aSign ) ;
2005-04-17 02:20:36 +04:00
}
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of subtracting the extended double - precision floating -
point values ` a ' and ` b ' . The operation is performed according to the
IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
floatx80 floatx80_sub ( struct roundingData * roundData , floatx80 a , floatx80 b )
2005-04-17 02:20:36 +04:00
{
flag aSign , bSign ;
aSign = extractFloatx80Sign ( a ) ;
bSign = extractFloatx80Sign ( b ) ;
if ( aSign = = bSign ) {
2005-08-03 22:49:17 +04:00
return subFloatx80Sigs ( roundData , a , b , aSign ) ;
2005-04-17 02:20:36 +04:00
}
else {
2005-08-03 22:49:17 +04:00
return addFloatx80Sigs ( roundData , a , b , aSign ) ;
2005-04-17 02:20:36 +04:00
}
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of multiplying the extended double - precision floating -
point values ` a ' and ` b ' . The operation is performed according to the
IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
floatx80 floatx80_mul ( struct roundingData * roundData , floatx80 a , floatx80 b )
2005-04-17 02:20:36 +04:00
{
flag aSign , bSign , zSign ;
int32 aExp , bExp , zExp ;
bits64 aSig , bSig , zSig0 , zSig1 ;
floatx80 z ;
aSig = extractFloatx80Frac ( a ) ;
aExp = extractFloatx80Exp ( a ) ;
aSign = extractFloatx80Sign ( a ) ;
bSig = extractFloatx80Frac ( b ) ;
bExp = extractFloatx80Exp ( b ) ;
bSign = extractFloatx80Sign ( b ) ;
zSign = aSign ^ bSign ;
if ( aExp = = 0x7FFF ) {
if ( ( bits64 ) ( aSig < < 1 )
| | ( ( bExp = = 0x7FFF ) & & ( bits64 ) ( bSig < < 1 ) ) ) {
return propagateFloatx80NaN ( a , b ) ;
}
if ( ( bExp | bSig ) = = 0 ) goto invalid ;
return packFloatx80 ( zSign , 0x7FFF , LIT64 ( 0x8000000000000000 ) ) ;
}
if ( bExp = = 0x7FFF ) {
if ( ( bits64 ) ( bSig < < 1 ) ) return propagateFloatx80NaN ( a , b ) ;
if ( ( aExp | aSig ) = = 0 ) {
invalid :
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
z . low = floatx80_default_nan_low ;
z . high = floatx80_default_nan_high ;
2005-11-08 00:12:07 +03:00
z . __padding = 0 ;
2005-04-17 02:20:36 +04:00
return z ;
}
return packFloatx80 ( zSign , 0x7FFF , LIT64 ( 0x8000000000000000 ) ) ;
}
if ( aExp = = 0 ) {
if ( aSig = = 0 ) return packFloatx80 ( zSign , 0 , 0 ) ;
normalizeFloatx80Subnormal ( aSig , & aExp , & aSig ) ;
}
if ( bExp = = 0 ) {
if ( bSig = = 0 ) return packFloatx80 ( zSign , 0 , 0 ) ;
normalizeFloatx80Subnormal ( bSig , & bExp , & bSig ) ;
}
zExp = aExp + bExp - 0x3FFE ;
mul64To128 ( aSig , bSig , & zSig0 , & zSig1 ) ;
if ( 0 < ( sbits64 ) zSig0 ) {
shortShift128Left ( zSig0 , zSig1 , 1 , & zSig0 , & zSig1 ) ;
- - zExp ;
}
return
roundAndPackFloatx80 (
2005-08-03 22:49:17 +04:00
roundData , zSign , zExp , zSig0 , zSig1 ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the result of dividing the extended double - precision floating - point
value ` a ' by the corresponding value ` b ' . The operation is performed
according to the IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
floatx80 floatx80_div ( struct roundingData * roundData , floatx80 a , floatx80 b )
2005-04-17 02:20:36 +04:00
{
flag aSign , bSign , zSign ;
int32 aExp , bExp , zExp ;
bits64 aSig , bSig , zSig0 , zSig1 ;
bits64 rem0 , rem1 , rem2 , term0 , term1 , term2 ;
floatx80 z ;
aSig = extractFloatx80Frac ( a ) ;
aExp = extractFloatx80Exp ( a ) ;
aSign = extractFloatx80Sign ( a ) ;
bSig = extractFloatx80Frac ( b ) ;
bExp = extractFloatx80Exp ( b ) ;
bSign = extractFloatx80Sign ( b ) ;
zSign = aSign ^ bSign ;
if ( aExp = = 0x7FFF ) {
if ( ( bits64 ) ( aSig < < 1 ) ) return propagateFloatx80NaN ( a , b ) ;
if ( bExp = = 0x7FFF ) {
if ( ( bits64 ) ( bSig < < 1 ) ) return propagateFloatx80NaN ( a , b ) ;
goto invalid ;
}
return packFloatx80 ( zSign , 0x7FFF , LIT64 ( 0x8000000000000000 ) ) ;
}
if ( bExp = = 0x7FFF ) {
if ( ( bits64 ) ( bSig < < 1 ) ) return propagateFloatx80NaN ( a , b ) ;
return packFloatx80 ( zSign , 0 , 0 ) ;
}
if ( bExp = = 0 ) {
if ( bSig = = 0 ) {
if ( ( aExp | aSig ) = = 0 ) {
invalid :
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
z . low = floatx80_default_nan_low ;
z . high = floatx80_default_nan_high ;
2005-11-08 00:12:07 +03:00
z . __padding = 0 ;
2005-04-17 02:20:36 +04:00
return z ;
}
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_divbyzero ;
2005-04-17 02:20:36 +04:00
return packFloatx80 ( zSign , 0x7FFF , LIT64 ( 0x8000000000000000 ) ) ;
}
normalizeFloatx80Subnormal ( bSig , & bExp , & bSig ) ;
}
if ( aExp = = 0 ) {
if ( aSig = = 0 ) return packFloatx80 ( zSign , 0 , 0 ) ;
normalizeFloatx80Subnormal ( aSig , & aExp , & aSig ) ;
}
zExp = aExp - bExp + 0x3FFE ;
rem1 = 0 ;
if ( bSig < = aSig ) {
shift128Right ( aSig , 0 , 1 , & aSig , & rem1 ) ;
+ + zExp ;
}
zSig0 = estimateDiv128To64 ( aSig , rem1 , bSig ) ;
mul64To128 ( bSig , zSig0 , & term0 , & term1 ) ;
sub128 ( aSig , rem1 , term0 , term1 , & rem0 , & rem1 ) ;
while ( ( sbits64 ) rem0 < 0 ) {
- - zSig0 ;
add128 ( rem0 , rem1 , 0 , bSig , & rem0 , & rem1 ) ;
}
zSig1 = estimateDiv128To64 ( rem1 , 0 , bSig ) ;
if ( ( bits64 ) ( zSig1 < < 1 ) < = 8 ) {
mul64To128 ( bSig , zSig1 , & term1 , & term2 ) ;
sub128 ( rem1 , 0 , term1 , term2 , & rem1 , & rem2 ) ;
while ( ( sbits64 ) rem1 < 0 ) {
- - zSig1 ;
add128 ( rem1 , rem2 , 0 , bSig , & rem1 , & rem2 ) ;
}
zSig1 | = ( ( rem1 | rem2 ) ! = 0 ) ;
}
return
roundAndPackFloatx80 (
2005-08-03 22:49:17 +04:00
roundData , zSign , zExp , zSig0 , zSig1 ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the remainder of the extended double - precision floating - point value
` a ' with respect to the corresponding value ` b ' . The operation is performed
according to the IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
floatx80 floatx80_rem ( struct roundingData * roundData , floatx80 a , floatx80 b )
2005-04-17 02:20:36 +04:00
{
flag aSign , bSign , zSign ;
int32 aExp , bExp , expDiff ;
bits64 aSig0 , aSig1 , bSig ;
bits64 q , term0 , term1 , alternateASig0 , alternateASig1 ;
floatx80 z ;
aSig0 = extractFloatx80Frac ( a ) ;
aExp = extractFloatx80Exp ( a ) ;
aSign = extractFloatx80Sign ( a ) ;
bSig = extractFloatx80Frac ( b ) ;
bExp = extractFloatx80Exp ( b ) ;
bSign = extractFloatx80Sign ( b ) ;
if ( aExp = = 0x7FFF ) {
if ( ( bits64 ) ( aSig0 < < 1 )
| | ( ( bExp = = 0x7FFF ) & & ( bits64 ) ( bSig < < 1 ) ) ) {
return propagateFloatx80NaN ( a , b ) ;
}
goto invalid ;
}
if ( bExp = = 0x7FFF ) {
if ( ( bits64 ) ( bSig < < 1 ) ) return propagateFloatx80NaN ( a , b ) ;
return a ;
}
if ( bExp = = 0 ) {
if ( bSig = = 0 ) {
invalid :
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
z . low = floatx80_default_nan_low ;
z . high = floatx80_default_nan_high ;
2005-11-08 00:12:07 +03:00
z . __padding = 0 ;
2005-04-17 02:20:36 +04:00
return z ;
}
normalizeFloatx80Subnormal ( bSig , & bExp , & bSig ) ;
}
if ( aExp = = 0 ) {
if ( ( bits64 ) ( aSig0 < < 1 ) = = 0 ) return a ;
normalizeFloatx80Subnormal ( aSig0 , & aExp , & aSig0 ) ;
}
bSig | = LIT64 ( 0x8000000000000000 ) ;
zSign = aSign ;
expDiff = aExp - bExp ;
aSig1 = 0 ;
if ( expDiff < 0 ) {
if ( expDiff < - 1 ) return a ;
shift128Right ( aSig0 , 0 , 1 , & aSig0 , & aSig1 ) ;
expDiff = 0 ;
}
q = ( bSig < = aSig0 ) ;
if ( q ) aSig0 - = bSig ;
expDiff - = 64 ;
while ( 0 < expDiff ) {
q = estimateDiv128To64 ( aSig0 , aSig1 , bSig ) ;
q = ( 2 < q ) ? q - 2 : 0 ;
mul64To128 ( bSig , q , & term0 , & term1 ) ;
sub128 ( aSig0 , aSig1 , term0 , term1 , & aSig0 , & aSig1 ) ;
shortShift128Left ( aSig0 , aSig1 , 62 , & aSig0 , & aSig1 ) ;
expDiff - = 62 ;
}
expDiff + = 64 ;
if ( 0 < expDiff ) {
q = estimateDiv128To64 ( aSig0 , aSig1 , bSig ) ;
q = ( 2 < q ) ? q - 2 : 0 ;
q > > = 64 - expDiff ;
mul64To128 ( bSig , q < < ( 64 - expDiff ) , & term0 , & term1 ) ;
sub128 ( aSig0 , aSig1 , term0 , term1 , & aSig0 , & aSig1 ) ;
shortShift128Left ( 0 , bSig , 64 - expDiff , & term0 , & term1 ) ;
while ( le128 ( term0 , term1 , aSig0 , aSig1 ) ) {
+ + q ;
sub128 ( aSig0 , aSig1 , term0 , term1 , & aSig0 , & aSig1 ) ;
}
}
else {
term1 = 0 ;
term0 = bSig ;
}
sub128 ( term0 , term1 , aSig0 , aSig1 , & alternateASig0 , & alternateASig1 ) ;
if ( lt128 ( alternateASig0 , alternateASig1 , aSig0 , aSig1 )
| | ( eq128 ( alternateASig0 , alternateASig1 , aSig0 , aSig1 )
& & ( q & 1 ) )
) {
aSig0 = alternateASig0 ;
aSig1 = alternateASig1 ;
zSign = ! zSign ;
}
2005-08-03 22:49:17 +04:00
2005-04-17 02:20:36 +04:00
return
normalizeRoundAndPackFloatx80 (
2005-08-03 22:49:17 +04:00
roundData , zSign , bExp + expDiff , aSig0 , aSig1 ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns the square root of the extended double - precision floating - point
value ` a ' . The operation is performed according to the IEC / IEEE Standard
for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2005-08-03 22:49:17 +04:00
floatx80 floatx80_sqrt ( struct roundingData * roundData , floatx80 a )
2005-04-17 02:20:36 +04:00
{
flag aSign ;
int32 aExp , zExp ;
bits64 aSig0 , aSig1 , zSig0 , zSig1 ;
bits64 rem0 , rem1 , rem2 , rem3 , term0 , term1 , term2 , term3 ;
bits64 shiftedRem0 , shiftedRem1 ;
floatx80 z ;
aSig0 = extractFloatx80Frac ( a ) ;
aExp = extractFloatx80Exp ( a ) ;
aSign = extractFloatx80Sign ( a ) ;
if ( aExp = = 0x7FFF ) {
if ( ( bits64 ) ( aSig0 < < 1 ) ) return propagateFloatx80NaN ( a , a ) ;
if ( ! aSign ) return a ;
goto invalid ;
}
if ( aSign ) {
if ( ( aExp | aSig0 ) = = 0 ) return a ;
invalid :
2005-08-03 22:49:17 +04:00
roundData - > exception | = float_flag_invalid ;
2005-04-17 02:20:36 +04:00
z . low = floatx80_default_nan_low ;
z . high = floatx80_default_nan_high ;
2005-11-08 00:12:07 +03:00
z . __padding = 0 ;
2005-04-17 02:20:36 +04:00
return z ;
}
if ( aExp = = 0 ) {
if ( aSig0 = = 0 ) return packFloatx80 ( 0 , 0 , 0 ) ;
normalizeFloatx80Subnormal ( aSig0 , & aExp , & aSig0 ) ;
}
zExp = ( ( aExp - 0x3FFF ) > > 1 ) + 0x3FFF ;
zSig0 = estimateSqrt32 ( aExp , aSig0 > > 32 ) ;
zSig0 < < = 31 ;
aSig1 = 0 ;
shift128Right ( aSig0 , 0 , ( aExp & 1 ) + 2 , & aSig0 , & aSig1 ) ;
zSig0 = estimateDiv128To64 ( aSig0 , aSig1 , zSig0 ) + zSig0 + 4 ;
if ( 0 < = ( sbits64 ) zSig0 ) zSig0 = LIT64 ( 0xFFFFFFFFFFFFFFFF ) ;
shortShift128Left ( aSig0 , aSig1 , 2 , & aSig0 , & aSig1 ) ;
mul64To128 ( zSig0 , zSig0 , & term0 , & term1 ) ;
sub128 ( aSig0 , aSig1 , term0 , term1 , & rem0 , & rem1 ) ;
while ( ( sbits64 ) rem0 < 0 ) {
- - zSig0 ;
shortShift128Left ( 0 , zSig0 , 1 , & term0 , & term1 ) ;
term1 | = 1 ;
add128 ( rem0 , rem1 , term0 , term1 , & rem0 , & rem1 ) ;
}
shortShift128Left ( rem0 , rem1 , 63 , & shiftedRem0 , & shiftedRem1 ) ;
zSig1 = estimateDiv128To64 ( shiftedRem0 , shiftedRem1 , zSig0 ) ;
if ( ( bits64 ) ( zSig1 < < 1 ) < = 10 ) {
if ( zSig1 = = 0 ) zSig1 = 1 ;
mul64To128 ( zSig0 , zSig1 , & term1 , & term2 ) ;
shortShift128Left ( term1 , term2 , 1 , & term1 , & term2 ) ;
sub128 ( rem1 , 0 , term1 , term2 , & rem1 , & rem2 ) ;
mul64To128 ( zSig1 , zSig1 , & term2 , & term3 ) ;
sub192 ( rem1 , rem2 , 0 , 0 , term2 , term3 , & rem1 , & rem2 , & rem3 ) ;
while ( ( sbits64 ) rem1 < 0 ) {
- - zSig1 ;
shortShift192Left ( 0 , zSig0 , zSig1 , 1 , & term1 , & term2 , & term3 ) ;
term3 | = 1 ;
add192 (
rem1 , rem2 , rem3 , term1 , term2 , term3 , & rem1 , & rem2 , & rem3 ) ;
}
zSig1 | = ( ( rem1 | rem2 | rem3 ) ! = 0 ) ;
}
return
roundAndPackFloatx80 (
2005-08-03 22:49:17 +04:00
roundData , 0 , zExp , zSig0 , zSig1 ) ;
2005-04-17 02:20:36 +04:00
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns 1 if the extended double - precision floating - point value ` a ' is
equal to the corresponding value ` b ' , and 0 otherwise . The comparison is
performed according to the IEC / IEEE Standard for Binary Floating - point
Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
flag floatx80_eq ( floatx80 a , floatx80 b )
{
if ( ( ( extractFloatx80Exp ( a ) = = 0x7FFF )
& & ( bits64 ) ( extractFloatx80Frac ( a ) < < 1 ) )
| | ( ( extractFloatx80Exp ( b ) = = 0x7FFF )
& & ( bits64 ) ( extractFloatx80Frac ( b ) < < 1 ) )
) {
if ( floatx80_is_signaling_nan ( a )
| | floatx80_is_signaling_nan ( b ) ) {
2005-08-15 23:42:32 +04:00
float_raise ( float_flag_invalid ) ;
2005-04-17 02:20:36 +04:00
}
return 0 ;
}
return
( a . low = = b . low )
& & ( ( a . high = = b . high )
| | ( ( a . low = = 0 )
& & ( ( bits16 ) ( ( a . high | b . high ) < < 1 ) = = 0 ) )
) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns 1 if the extended double - precision floating - point value ` a ' is
less than or equal to the corresponding value ` b ' , and 0 otherwise . The
comparison is performed according to the IEC / IEEE Standard for Binary
Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
flag floatx80_le ( floatx80 a , floatx80 b )
{
flag aSign , bSign ;
if ( ( ( extractFloatx80Exp ( a ) = = 0x7FFF )
& & ( bits64 ) ( extractFloatx80Frac ( a ) < < 1 ) )
| | ( ( extractFloatx80Exp ( b ) = = 0x7FFF )
& & ( bits64 ) ( extractFloatx80Frac ( b ) < < 1 ) )
) {
2005-08-15 23:42:32 +04:00
float_raise ( float_flag_invalid ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
aSign = extractFloatx80Sign ( a ) ;
bSign = extractFloatx80Sign ( b ) ;
if ( aSign ! = bSign ) {
return
aSign
| | ( ( ( ( bits16 ) ( ( a . high | b . high ) < < 1 ) ) | a . low | b . low )
= = 0 ) ;
}
return
aSign ? le128 ( b . high , b . low , a . high , a . low )
: le128 ( a . high , a . low , b . high , b . low ) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns 1 if the extended double - precision floating - point value ` a ' is
less than the corresponding value ` b ' , and 0 otherwise . The comparison
is performed according to the IEC / IEEE Standard for Binary Floating - point
Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
flag floatx80_lt ( floatx80 a , floatx80 b )
{
flag aSign , bSign ;
if ( ( ( extractFloatx80Exp ( a ) = = 0x7FFF )
& & ( bits64 ) ( extractFloatx80Frac ( a ) < < 1 ) )
| | ( ( extractFloatx80Exp ( b ) = = 0x7FFF )
& & ( bits64 ) ( extractFloatx80Frac ( b ) < < 1 ) )
) {
2005-08-15 23:42:32 +04:00
float_raise ( float_flag_invalid ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
aSign = extractFloatx80Sign ( a ) ;
bSign = extractFloatx80Sign ( b ) ;
if ( aSign ! = bSign ) {
return
aSign
& & ( ( ( ( bits16 ) ( ( a . high | b . high ) < < 1 ) ) | a . low | b . low )
! = 0 ) ;
}
return
aSign ? lt128 ( b . high , b . low , a . high , a . low )
: lt128 ( a . high , a . low , b . high , b . low ) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns 1 if the extended double - precision floating - point value ` a ' is equal
to the corresponding value ` b ' , and 0 otherwise . The invalid exception is
raised if either operand is a NaN . Otherwise , the comparison is performed
according to the IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
flag floatx80_eq_signaling ( floatx80 a , floatx80 b )
{
if ( ( ( extractFloatx80Exp ( a ) = = 0x7FFF )
& & ( bits64 ) ( extractFloatx80Frac ( a ) < < 1 ) )
| | ( ( extractFloatx80Exp ( b ) = = 0x7FFF )
& & ( bits64 ) ( extractFloatx80Frac ( b ) < < 1 ) )
) {
2005-08-15 23:42:32 +04:00
float_raise ( float_flag_invalid ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
return
( a . low = = b . low )
& & ( ( a . high = = b . high )
| | ( ( a . low = = 0 )
& & ( ( bits16 ) ( ( a . high | b . high ) < < 1 ) = = 0 ) )
) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns 1 if the extended double - precision floating - point value ` a ' is less
than or equal to the corresponding value ` b ' , and 0 otherwise . Quiet NaNs
do not cause an exception . Otherwise , the comparison is performed according
to the IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
flag floatx80_le_quiet ( floatx80 a , floatx80 b )
{
flag aSign , bSign ;
if ( ( ( extractFloatx80Exp ( a ) = = 0x7FFF )
& & ( bits64 ) ( extractFloatx80Frac ( a ) < < 1 ) )
| | ( ( extractFloatx80Exp ( b ) = = 0x7FFF )
& & ( bits64 ) ( extractFloatx80Frac ( b ) < < 1 ) )
) {
2005-08-15 23:42:32 +04:00
/* Do nothing, even if NaN as we're quiet */
2005-04-17 02:20:36 +04:00
return 0 ;
}
aSign = extractFloatx80Sign ( a ) ;
bSign = extractFloatx80Sign ( b ) ;
if ( aSign ! = bSign ) {
return
aSign
| | ( ( ( ( bits16 ) ( ( a . high | b . high ) < < 1 ) ) | a . low | b . low )
= = 0 ) ;
}
return
aSign ? le128 ( b . high , b . low , a . high , a . low )
: le128 ( a . high , a . low , b . high , b . low ) ;
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Returns 1 if the extended double - precision floating - point value ` a ' is less
than the corresponding value ` b ' , and 0 otherwise . Quiet NaNs do not cause
an exception . Otherwise , the comparison is performed according to the
IEC / IEEE Standard for Binary Floating - point Arithmetic .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
flag floatx80_lt_quiet ( floatx80 a , floatx80 b )
{
flag aSign , bSign ;
if ( ( ( extractFloatx80Exp ( a ) = = 0x7FFF )
& & ( bits64 ) ( extractFloatx80Frac ( a ) < < 1 ) )
| | ( ( extractFloatx80Exp ( b ) = = 0x7FFF )
& & ( bits64 ) ( extractFloatx80Frac ( b ) < < 1 ) )
) {
2005-08-15 23:42:32 +04:00
/* Do nothing, even if NaN as we're quiet */
2005-04-17 02:20:36 +04:00
return 0 ;
}
aSign = extractFloatx80Sign ( a ) ;
bSign = extractFloatx80Sign ( b ) ;
if ( aSign ! = bSign ) {
return
aSign
& & ( ( ( ( bits16 ) ( ( a . high | b . high ) < < 1 ) ) | a . low | b . low )
! = 0 ) ;
}
return
aSign ? lt128 ( b . high , b . low , a . high , a . low )
: lt128 ( a . high , a . low , b . high , b . low ) ;
}
# endif