~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
softfloat.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~
Diff markup

Differences between /arch/arm/nwfpe/softfloat.c (Version linux-6.12-rc7) and /arch/m68k/nwfpe/softfloat.c (Version linux-5.2.21)

  1 /*                                                  1 
  2 ==============================================    
  3                                                   
  4 This C source file is part of the SoftFloat IE    
  5 Arithmetic Package, Release 2.                    
  6                                                   
  7 Written by John R. Hauser.  This work was made    
  8 International Computer Science Institute, loca    
  9 Street, Berkeley, California 94704.  Funding w    
 10 National Science Foundation under grant MIP-93    
 11 of this code was written as part of a project     
 12 processor in collaboration with the University    
 13 overseen by Profs. Nelson Morgan and John Wawr    
 14 is available through the web page                 
 15 http://www.jhauser.us/arithmetic/SoftFloat-2b/    
 16                                                   
 17 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.     
 18 has been made to avoid it, THIS SOFTWARE MAY C    
 19 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF TH    
 20 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAK    
 21 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISI    
 22                                                   
 23 Derivative works are acceptable, even for comm    
 24 (1) they include prominent notice that the wor    
 25 include prominent notice akin to these three p    
 26 this code that are retained.                      
 27                                                   
 28 ==============================================    
 29 */                                                
 30                                                   
 31 #include <asm/div64.h>                            
 32                                                   
 33 #include "fpa11.h"                                
 34 //#include "milieu.h"                             
 35 //#include "softfloat.h"                          
 36                                                   
 37 /*                                                
 38 ----------------------------------------------    
 39 Primitive arithmetic functions, including mult    
 40 division and square root approximations.  (Can    
 41 desired.)                                         
 42 ----------------------------------------------    
 43 */                                                
 44 #include "softfloat-macros"                       
 45                                                   
 46 /*                                                
 47 ----------------------------------------------    
 48 Functions and definitions to determine:  (1) w    
 49 is detected before or after rounding by defaul    
 50 happens when exceptions are raised, (3) how si    
 51 from quiet NaNs, (4) the default generated qui    
 52 are propagated from function inputs to output.    
 53 specific.                                         
 54 ----------------------------------------------    
 55 */                                                
 56 #include "softfloat-specialize"                   
 57                                                   
 58 /*                                                
 59 ----------------------------------------------    
 60 Takes a 64-bit fixed-point value `absZ' with b    
 61 and 7, and returns the properly rounded 32-bit    
 62 input.  If `zSign' is nonzero, the input is ne    
 63 to an integer.  Bit 63 of `absZ' must be zero.    
 64 input is simply rounded to an integer, with th    
 65 the input cannot be represented exactly as an     
 66 input is too large, however, the invalid excep    
 67 positive or negative integer is returned.         
 68 ----------------------------------------------    
 69 */                                                
 70 static int32 roundAndPackInt32( struct roundin    
 71 {                                                 
 72     int8 roundingMode;                            
 73     flag roundNearestEven;                        
 74     int8 roundIncrement, roundBits;               
 75     int32 z;                                      
 76                                                   
 77     roundingMode = roundData->mode;               
 78     roundNearestEven = ( roundingMode == float    
 79     roundIncrement = 0x40;                        
 80     if ( ! roundNearestEven ) {                   
 81         if ( roundingMode == float_round_to_ze    
 82             roundIncrement = 0;                   
 83         }                                         
 84         else {                                    
 85             roundIncrement = 0x7F;                
 86             if ( zSign ) {                        
 87                 if ( roundingMode == float_rou    
 88             }                                     
 89             else {                                
 90                 if ( roundingMode == float_rou    
 91             }                                     
 92         }                                         
 93     }                                             
 94     roundBits = absZ & 0x7F;                      
 95     absZ = ( absZ + roundIncrement )>>7;          
 96     absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 )     
 97     z = absZ;                                     
 98     if ( zSign ) z = - z;                         
 99     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^     
100         roundData->exception |= float_flag_inv    
101         return zSign ? 0x80000000 : 0x7FFFFFFF    
102     }                                             
103     if ( roundBits ) roundData->exception |= f    
104     return z;                                     
105                                                   
106 }                                                 
107                                                   
108 /*                                                
109 ----------------------------------------------    
110 Returns the fraction bits of the single-precis    
111 ----------------------------------------------    
112 */                                                
113 INLINE bits32 extractFloat32Frac( float32 a )     
114 {                                                 
115                                                   
116     return a & 0x007FFFFF;                        
117                                                   
118 }                                                 
119                                                   
120 /*                                                
121 ----------------------------------------------    
122 Returns the exponent bits of the single-precis    
123 ----------------------------------------------    
124 */                                                
125 INLINE int16 extractFloat32Exp( float32 a )       
126 {                                                 
127                                                   
128     return ( a>>23 ) & 0xFF;                      
129                                                   
130 }                                                 
131                                                   
132 /*                                                
133 ----------------------------------------------    
134 Returns the sign bit of the single-precision f    
135 ----------------------------------------------    
136 */                                                
137 #if 0   /* in softfloat.h */                      
138 INLINE flag extractFloat32Sign( float32 a )       
139 {                                                 
140                                                   
141     return a>>31;                                 
142                                                   
143 }                                                 
144 #endif                                            
145                                                   
146 /*                                                
147 ----------------------------------------------    
148 Normalizes the subnormal single-precision floa    
149 by the denormalized significand `aSig'.  The n    
150 significand are stored at the locations pointe    
151 `zSigPtr', respectively.                          
152 ----------------------------------------------    
153 */                                                
154 static void                                       
155  normalizeFloat32Subnormal( bits32 aSig, int16    
156 {                                                 
157     int8 shiftCount;                              
158                                                   
159     shiftCount = countLeadingZeros32( aSig ) -    
160     *zSigPtr = aSig<<shiftCount;                  
161     *zExpPtr = 1 - shiftCount;                    
162                                                   
163 }                                                 
164                                                   
165 /*                                                
166 ----------------------------------------------    
167 Packs the sign `zSign', exponent `zExp', and s    
168 single-precision floating-point value, returni    
169 shifted into the proper positions, the three f    
170 together to form the result.  This means that     
171 will be added into the exponent.  Since a prop    
172 will have an integer portion equal to 1, the `    
173 than the desired result exponent whenever `zSi    
174 significand.                                      
175 ----------------------------------------------    
176 */                                                
177 INLINE float32 packFloat32( flag zSign, int16     
178 {                                                 
179 #if 0                                             
180    float32 f;                                     
181    __asm__("@ packFloat32                         
182             mov %0, %1, asl #31                   
183             orr %0, %2, asl #23                   
184             orr %0, %3"                           
185             : /* no outputs */                    
186             : "g" (f), "g" (zSign), "g" (zExp)    
187             : "cc");                              
188    return f;                                      
189 #else                                             
190     return ( ( (bits32) zSign )<<31 ) + ( ( (b    
191 #endif                                            
192 }                                                 
193                                                   
194 /*                                                
195 ----------------------------------------------    
196 Takes an abstract floating-point value having     
197 and significand `zSig', and returns the proper    
198 point value corresponding to the abstract inpu    
199 value is simply rounded and packed into the si    
200 the inexact exception raised if the abstract i    
201 exactly.  If the abstract value is too large,     
202 inexact exceptions are raised and an infinity     
203 returned.  If the abstract value is too small,    
204 a subnormal number, and the underflow and inex    
205 the abstract input cannot be represented exact    
206 precision floating-point number.                  
207     The input significand `zSig' has its binar    
208 and 29, which is 7 bits to the left of the usu    
209 significand must be normalized or smaller.  If    
210 `zExp' must be 0; in that case, the result ret    
211 and it must not require rounding.  In the usua    
212 normalized, `zExp' must be 1 less than the ``t    
213 The handling of underflow and overflow follows    
214 Binary Floating-point Arithmetic.                 
215 ----------------------------------------------    
216 */                                                
217 static float32 roundAndPackFloat32( struct rou    
218 {                                                 
219     int8 roundingMode;                            
220     flag roundNearestEven;                        
221     int8 roundIncrement, roundBits;               
222     flag isTiny;                                  
223                                                   
224     roundingMode = roundData->mode;               
225     roundNearestEven = ( roundingMode == float    
226     roundIncrement = 0x40;                        
227     if ( ! roundNearestEven ) {                   
228         if ( roundingMode == float_round_to_ze    
229             roundIncrement = 0;                   
230         }                                         
231         else {                                    
232             roundIncrement = 0x7F;                
233             if ( zSign ) {                        
234                 if ( roundingMode == float_rou    
235             }                                     
236             else {                                
237                 if ( roundingMode == float_rou    
238             }                                     
239         }                                         
240     }                                             
241     roundBits = zSig & 0x7F;                      
242     if ( 0xFD <= (bits16) zExp ) {                
243         if (    ( 0xFD < zExp )                   
244              || (    ( zExp == 0xFD )             
245                   && ( (sbits32) ( zSig + roun    
246            ) {                                    
247             roundData->exception |= float_flag    
248             return packFloat32( zSign, 0xFF, 0    
249         }                                         
250         if ( zExp < 0 ) {                         
251             isTiny =                              
252                    ( float_detect_tininess ==     
253                 || ( zExp < -1 )                  
254                 || ( zSig + roundIncrement < 0    
255             shift32RightJamming( zSig, - zExp,    
256             zExp = 0;                             
257             roundBits = zSig & 0x7F;              
258             if ( isTiny && roundBits ) roundDa    
259         }                                         
260     }                                             
261     if ( roundBits ) roundData->exception |= f    
262     zSig = ( zSig + roundIncrement )>>7;          
263     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 )     
264     if ( zSig == 0 ) zExp = 0;                    
265     return packFloat32( zSign, zExp, zSig );      
266                                                   
267 }                                                 
268                                                   
269 /*                                                
270 ----------------------------------------------    
271 Takes an abstract floating-point value having     
272 and significand `zSig', and returns the proper    
273 point value corresponding to the abstract inpu    
274 `roundAndPackFloat32' except that `zSig' does     
275 any way.  In all cases, `zExp' must be 1 less     
276 point exponent.                                   
277 ----------------------------------------------    
278 */                                                
279 static float32                                    
280  normalizeRoundAndPackFloat32( struct rounding    
281 {                                                 
282     int8 shiftCount;                              
283                                                   
284     shiftCount = countLeadingZeros32( zSig ) -    
285     return roundAndPackFloat32( roundData, zSi    
286                                                   
287 }                                                 
288                                                   
289 /*                                                
290 ----------------------------------------------    
291 Returns the fraction bits of the double-precis    
292 ----------------------------------------------    
293 */                                                
294 INLINE bits64 extractFloat64Frac( float64 a )     
295 {                                                 
296                                                   
297     return a & LIT64( 0x000FFFFFFFFFFFFF );       
298                                                   
299 }                                                 
300                                                   
301 /*                                                
302 ----------------------------------------------    
303 Returns the exponent bits of the double-precis    
304 ----------------------------------------------    
305 */                                                
306 INLINE int16 extractFloat64Exp( float64 a )       
307 {                                                 
308                                                   
309     return ( a>>52 ) & 0x7FF;                     
310                                                   
311 }                                                 
312                                                   
313 /*                                                
314 ----------------------------------------------    
315 Returns the sign bit of the double-precision f    
316 ----------------------------------------------    
317 */                                                
318 #if 0   /* in softfloat.h */                      
319 INLINE flag extractFloat64Sign( float64 a )       
320 {                                                 
321                                                   
322     return a>>63;                                 
323                                                   
324 }                                                 
325 #endif                                            
326                                                   
327 /*                                                
328 ----------------------------------------------    
329 Normalizes the subnormal double-precision floa    
330 by the denormalized significand `aSig'.  The n    
331 significand are stored at the locations pointe    
332 `zSigPtr', respectively.                          
333 ----------------------------------------------    
334 */                                                
335 static void                                       
336  normalizeFloat64Subnormal( bits64 aSig, int16    
337 {                                                 
338     int8 shiftCount;                              
339                                                   
340     shiftCount = countLeadingZeros64( aSig ) -    
341     *zSigPtr = aSig<<shiftCount;                  
342     *zExpPtr = 1 - shiftCount;                    
343                                                   
344 }                                                 
345                                                   
346 /*                                                
347 ----------------------------------------------    
348 Packs the sign `zSign', exponent `zExp', and s    
349 double-precision floating-point value, returni    
350 shifted into the proper positions, the three f    
351 together to form the result.  This means that     
352 will be added into the exponent.  Since a prop    
353 will have an integer portion equal to 1, the `    
354 than the desired result exponent whenever `zSi    
355 significand.                                      
356 ----------------------------------------------    
357 */                                                
358 INLINE float64 packFloat64( flag zSign, int16     
359 {                                                 
360                                                   
361     return ( ( (bits64) zSign )<<63 ) + ( ( (b    
362                                                   
363 }                                                 
364                                                   
365 /*                                                
366 ----------------------------------------------    
367 Takes an abstract floating-point value having     
368 and significand `zSig', and returns the proper    
369 point value corresponding to the abstract inpu    
370 value is simply rounded and packed into the do    
371 the inexact exception raised if the abstract i    
372 exactly.  If the abstract value is too large,     
373 inexact exceptions are raised and an infinity     
374 returned.  If the abstract value is too small,    
375 a subnormal number, and the underflow and inex    
376 the abstract input cannot be represented exact    
377 precision floating-point number.                  
378     The input significand `zSig' has its binar    
379 and 61, which is 10 bits to the left of the us    
380 significand must be normalized or smaller.  If    
381 `zExp' must be 0; in that case, the result ret    
382 and it must not require rounding.  In the usua    
383 normalized, `zExp' must be 1 less than the ``t    
384 The handling of underflow and overflow follows    
385 Binary Floating-point Arithmetic.                 
386 ----------------------------------------------    
387 */                                                
388 static float64 roundAndPackFloat64( struct rou    
389 {                                                 
390     int8 roundingMode;                            
391     flag roundNearestEven;                        
392     int16 roundIncrement, roundBits;              
393     flag isTiny;                                  
394                                                   
395     roundingMode = roundData->mode;               
396     roundNearestEven = ( roundingMode == float    
397     roundIncrement = 0x200;                       
398     if ( ! roundNearestEven ) {                   
399         if ( roundingMode == float_round_to_ze    
400             roundIncrement = 0;                   
401         }                                         
402         else {                                    
403             roundIncrement = 0x3FF;               
404             if ( zSign ) {                        
405                 if ( roundingMode == float_rou    
406             }                                     
407             else {                                
408                 if ( roundingMode == float_rou    
409             }                                     
410         }                                         
411     }                                             
412     roundBits = zSig & 0x3FF;                     
413     if ( 0x7FD <= (bits16) zExp ) {               
414         if (    ( 0x7FD < zExp )                  
415              || (    ( zExp == 0x7FD )            
416                   && ( (sbits64) ( zSig + roun    
417            ) {                                    
418             //register int lr = __builtin_retu    
419             //printk("roundAndPackFloat64 call    
420             roundData->exception |= float_flag    
421             return packFloat64( zSign, 0x7FF,     
422         }                                         
423         if ( zExp < 0 ) {                         
424             isTiny =                              
425                    ( float_detect_tininess ==     
426                 || ( zExp < -1 )                  
427                 || ( zSig + roundIncrement < L    
428             shift64RightJamming( zSig, - zExp,    
429             zExp = 0;                             
430             roundBits = zSig & 0x3FF;             
431             if ( isTiny && roundBits ) roundDa    
432         }                                         
433     }                                             
434     if ( roundBits ) roundData->exception |= f    
435     zSig = ( zSig + roundIncrement )>>10;         
436     zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 )    
437     if ( zSig == 0 ) zExp = 0;                    
438     return packFloat64( zSign, zExp, zSig );      
439                                                   
440 }                                                 
441                                                   
442 /*                                                
443 ----------------------------------------------    
444 Takes an abstract floating-point value having     
445 and significand `zSig', and returns the proper    
446 point value corresponding to the abstract inpu    
447 `roundAndPackFloat64' except that `zSig' does     
448 any way.  In all cases, `zExp' must be 1 less     
449 point exponent.                                   
450 ----------------------------------------------    
451 */                                                
452 static float64                                    
453  normalizeRoundAndPackFloat64( struct rounding    
454 {                                                 
455     int8 shiftCount;                              
456                                                   
457     shiftCount = countLeadingZeros64( zSig ) -    
458     return roundAndPackFloat64( roundData, zSi    
459                                                   
460 }                                                 
461                                                   
462 #ifdef FLOATX80                                   
463                                                   
464 /*                                                
465 ----------------------------------------------    
466 Returns the fraction bits of the extended doub    
467 value `a'.                                        
468 ----------------------------------------------    
469 */                                                
470 INLINE bits64 extractFloatx80Frac( floatx80 a     
471 {                                                 
472                                                   
473     return a.low;                                 
474                                                   
475 }                                                 
476                                                   
477 /*                                                
478 ----------------------------------------------    
479 Returns the exponent bits of the extended doub    
480 value `a'.                                        
481 ----------------------------------------------    
482 */                                                
483 INLINE int32 extractFloatx80Exp( floatx80 a )     
484 {                                                 
485                                                   
486     return a.high & 0x7FFF;                       
487                                                   
488 }                                                 
489                                                   
490 /*                                                
491 ----------------------------------------------    
492 Returns the sign bit of the extended double-pr    
493 `a'.                                              
494 ----------------------------------------------    
495 */                                                
496 INLINE flag extractFloatx80Sign( floatx80 a )     
497 {                                                 
498                                                   
499     return a.high>>15;                            
500                                                   
501 }                                                 
502                                                   
503 /*                                                
504 ----------------------------------------------    
505 Normalizes the subnormal extended double-preci    
506 represented by the denormalized significand `a    
507 and significand are stored at the locations po    
508 `zSigPtr', respectively.                          
509 ----------------------------------------------    
510 */                                                
511 static void                                       
512  normalizeFloatx80Subnormal( bits64 aSig, int3    
513 {                                                 
514     int8 shiftCount;                              
515                                                   
516     shiftCount = countLeadingZeros64( aSig );     
517     *zSigPtr = aSig<<shiftCount;                  
518     *zExpPtr = 1 - shiftCount;                    
519                                                   
520 }                                                 
521                                                   
522 /*                                                
523 ----------------------------------------------    
524 Packs the sign `zSign', exponent `zExp', and s    
525 extended double-precision floating-point value    
526 ----------------------------------------------    
527 */                                                
528 INLINE floatx80 packFloatx80( flag zSign, int3    
529 {                                                 
530     floatx80 z;                                   
531                                                   
532     z.low = zSig;                                 
533     z.high = ( ( (bits16) zSign )<<15 ) + zExp    
534     z.__padding = 0;                              
535     return z;                                     
536                                                   
537 }                                                 
538                                                   
539 /*                                                
540 ----------------------------------------------    
541 Takes an abstract floating-point value having     
542 and extended significand formed by the concate    
543 and returns the proper extended double-precisi    
544 corresponding to the abstract input.  Ordinari    
545 rounded and packed into the extended double-pr    
546 inexact exception raised if the abstract input    
547 exactly.  If the abstract value is too large,     
548 inexact exceptions are raised and an infinity     
549 returned.  If the abstract value is too small,    
550 a subnormal number, and the underflow and inex    
551 the abstract input cannot be represented exact    
552 double-precision floating-point number.           
553     If `roundingPrecision' is 32 or 64, the re    
554 number of bits as single or double precision,     
555 result is rounded to the full precision of the    
556 format.                                           
557     The input significand must be normalized o    
558 significand is not normalized, `zExp' must be     
559 returned is a subnormal number, and it must no    
560 handling of underflow and overflow follows the    
561 Floating-point Arithmetic.                        
562 ----------------------------------------------    
563 */                                                
564 static floatx80                                   
565  roundAndPackFloatx80(                            
566      struct roundingData *roundData, flag zSig    
567  )                                                
568 {                                                 
569     int8 roundingMode, roundingPrecision;         
570     flag roundNearestEven, increment, isTiny;     
571     int64 roundIncrement, roundMask, roundBits    
572                                                   
573     roundingMode = roundData->mode;               
574     roundingPrecision = roundData->precision;     
575     roundNearestEven = ( roundingMode == float    
576     if ( roundingPrecision == 80 ) goto precis    
577     if ( roundingPrecision == 64 ) {              
578         roundIncrement = LIT64( 0x000000000000    
579         roundMask = LIT64( 0x00000000000007FF     
580     }                                             
581     else if ( roundingPrecision == 32 ) {         
582         roundIncrement = LIT64( 0x000000800000    
583         roundMask = LIT64( 0x000000FFFFFFFFFF     
584     }                                             
585     else {                                        
586         goto precision80;                         
587     }                                             
588     zSig0 |= ( zSig1 != 0 );                      
589     if ( ! roundNearestEven ) {                   
590         if ( roundingMode == float_round_to_ze    
591             roundIncrement = 0;                   
592         }                                         
593         else {                                    
594             roundIncrement = roundMask;           
595             if ( zSign ) {                        
596                 if ( roundingMode == float_rou    
597             }                                     
598             else {                                
599                 if ( roundingMode == float_rou    
600             }                                     
601         }                                         
602     }                                             
603     roundBits = zSig0 & roundMask;                
604     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {      
605         if (    ( 0x7FFE < zExp )                 
606              || ( ( zExp == 0x7FFE ) && ( zSig    
607            ) {                                    
608             goto overflow;                        
609         }                                         
610         if ( zExp <= 0 ) {                        
611             isTiny =                              
612                    ( float_detect_tininess ==     
613                 || ( zExp < 0 )                   
614                 || ( zSig0 <= zSig0 + roundInc    
615             shift64RightJamming( zSig0, 1 - zE    
616             zExp = 0;                             
617             roundBits = zSig0 & roundMask;        
618             if ( isTiny && roundBits ) roundDa    
619             if ( roundBits ) roundData->except    
620             zSig0 += roundIncrement;              
621             if ( (sbits64) zSig0 < 0 ) zExp =     
622             roundIncrement = roundMask + 1;       
623             if ( roundNearestEven && ( roundBi    
624                 roundMask |= roundIncrement;      
625             }                                     
626             zSig0 &= ~ roundMask;                 
627             return packFloatx80( zSign, zExp,     
628         }                                         
629     }                                             
630     if ( roundBits ) roundData->exception |= f    
631     zSig0 += roundIncrement;                      
632     if ( zSig0 < roundIncrement ) {               
633         ++zExp;                                   
634         zSig0 = LIT64( 0x8000000000000000 );      
635     }                                             
636     roundIncrement = roundMask + 1;               
637     if ( roundNearestEven && ( roundBits<<1 ==    
638         roundMask |= roundIncrement;              
639     }                                             
640     zSig0 &= ~ roundMask;                         
641     if ( zSig0 == 0 ) zExp = 0;                   
642     return packFloatx80( zSign, zExp, zSig0 );    
643  precision80:                                     
644     increment = ( (sbits64) zSig1 < 0 );          
645     if ( ! roundNearestEven ) {                   
646         if ( roundingMode == float_round_to_ze    
647             increment = 0;                        
648         }                                         
649         else {                                    
650             if ( zSign ) {                        
651                 increment = ( roundingMode ==     
652             }                                     
653             else {                                
654                 increment = ( roundingMode ==     
655             }                                     
656         }                                         
657     }                                             
658     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {      
659         if (    ( 0x7FFE < zExp )                 
660              || (    ( zExp == 0x7FFE )           
661                   && ( zSig0 == LIT64( 0xFFFFF    
662                   && increment                    
663                 )                                 
664            ) {                                    
665             roundMask = 0;                        
666  overflow:                                        
667             roundData->exception |= float_flag    
668             if (    ( roundingMode == float_ro    
669                  || ( zSign && ( roundingMode     
670                  || ( ! zSign && ( roundingMod    
671                ) {                                
672                 return packFloatx80( zSign, 0x    
673             }                                     
674             return packFloatx80( zSign, 0x7FFF    
675         }                                         
676         if ( zExp <= 0 ) {                        
677             isTiny =                              
678                    ( float_detect_tininess ==     
679                 || ( zExp < 0 )                   
680                 || ! increment                    
681                 || ( zSig0 < LIT64( 0xFFFFFFFF    
682             shift64ExtraRightJamming( zSig0, z    
683             zExp = 0;                             
684             if ( isTiny && zSig1 ) roundData->    
685             if ( zSig1 ) roundData->exception     
686             if ( roundNearestEven ) {             
687                 increment = ( (sbits64) zSig1     
688             }                                     
689             else {                                
690                 if ( zSign ) {                    
691                     increment = ( roundingMode    
692                 }                                 
693                 else {                            
694                     increment = ( roundingMode    
695                 }                                 
696             }                                     
697             if ( increment ) {                    
698                 ++zSig0;                          
699                 zSig0 &= ~ ( ( zSig1 + zSig1 =    
700                 if ( (sbits64) zSig0 < 0 ) zEx    
701             }                                     
702             return packFloatx80( zSign, zExp,     
703         }                                         
704     }                                             
705     if ( zSig1 ) roundData->exception |= float    
706     if ( increment ) {                            
707         ++zSig0;                                  
708         if ( zSig0 == 0 ) {                       
709             ++zExp;                               
710             zSig0 = LIT64( 0x8000000000000000     
711         }                                         
712         else {                                    
713             zSig0 &= ~ ( ( zSig1 + zSig1 == 0     
714         }                                         
715     }                                             
716     else {                                        
717         if ( zSig0 == 0 ) zExp = 0;               
718     }                                             
719                                                   
720     return packFloatx80( zSign, zExp, zSig0 );    
721 }                                                 
722                                                   
723 /*                                                
724 ----------------------------------------------    
725 Takes an abstract floating-point value having     
726 `zExp', and significand formed by the concaten    
727 and returns the proper extended double-precisi    
728 corresponding to the abstract input.  This rou    
729 `roundAndPackFloatx80' except that the input s    
730 normalized.                                       
731 ----------------------------------------------    
732 */                                                
733 static floatx80                                   
734  normalizeRoundAndPackFloatx80(                   
735      struct roundingData *roundData, flag zSig    
736  )                                                
737 {                                                 
738     int8 shiftCount;                              
739                                                   
740     if ( zSig0 == 0 ) {                           
741         zSig0 = zSig1;                            
742         zSig1 = 0;                                
743         zExp -= 64;                               
744     }                                             
745     shiftCount = countLeadingZeros64( zSig0 );    
746     shortShift128Left( zSig0, zSig1, shiftCoun    
747     zExp -= shiftCount;                           
748     return                                        
749         roundAndPackFloatx80( roundData, zSign    
750                                                   
751 }                                                 
752                                                   
753 #endif                                            
754                                                   
755 /*                                                
756 ----------------------------------------------    
757 Returns the result of converting the 32-bit tw    
758 the single-precision floating-point format.  T    
759 according to the IEC/IEEE Standard for Binary     
760 ----------------------------------------------    
761 */                                                
762 float32 int32_to_float32(struct roundingData *    
763 {                                                 
764     flag zSign;                                   
765                                                   
766     if ( a == 0 ) return 0;                       
767     if ( a == 0x80000000 ) return packFloat32(    
768     zSign = ( a < 0 );                            
769     return normalizeRoundAndPackFloat32( round    
770                                                   
771 }                                                 
772                                                   
773 /*                                                
774 ----------------------------------------------    
775 Returns the result of converting the 32-bit tw    
776 the double-precision floating-point format.  T    
777 according to the IEC/IEEE Standard for Binary     
778 ----------------------------------------------    
779 */                                                
780 float64 int32_to_float64( int32 a )               
781 {                                                 
782     flag aSign;                                   
783     uint32 absA;                                  
784     int8 shiftCount;                              
785     bits64 zSig;                                  
786                                                   
787     if ( a == 0 ) return 0;                       
788     aSign = ( a < 0 );                            
789     absA = aSign ? - a : a;                       
790     shiftCount = countLeadingZeros32( absA ) +    
791     zSig = absA;                                  
792     return packFloat64( aSign, 0x432 - shiftCo    
793                                                   
794 }                                                 
795                                                   
796 #ifdef FLOATX80                                   
797                                                   
798 /*                                                
799 ----------------------------------------------    
800 Returns the result of converting the 32-bit tw    
801 to the extended double-precision floating-poin    
802 is performed according to the IEC/IEEE Standar    
803 Arithmetic.                                       
804 ----------------------------------------------    
805 */                                                
806 floatx80 int32_to_floatx80( int32 a )             
807 {                                                 
808     flag zSign;                                   
809     uint32 absA;                                  
810     int8 shiftCount;                              
811     bits64 zSig;                                  
812                                                   
813     if ( a == 0 ) return packFloatx80( 0, 0, 0    
814     zSign = ( a < 0 );                            
815     absA = zSign ? - a : a;                       
816     shiftCount = countLeadingZeros32( absA ) +    
817     zSig = absA;                                  
818     return packFloatx80( zSign, 0x403E - shift    
819                                                   
820 }                                                 
821                                                   
822 #endif                                            
823                                                   
824 /*                                                
825 ----------------------------------------------    
826 Returns the result of converting the single-pr    
827 `a' to the 32-bit two's complement integer for    
828 performed according to the IEC/IEEE Standard f    
829 Arithmetic---which means in particular that th    
830 according to the current rounding mode.  If `a    
831 positive integer is returned.  Otherwise, if t    
832 largest integer with the same sign as `a' is r    
833 ----------------------------------------------    
834 */                                                
835 int32 float32_to_int32( struct roundingData *r    
836 {                                                 
837     flag aSign;                                   
838     int16 aExp, shiftCount;                       
839     bits32 aSig;                                  
840     bits64 zSig;                                  
841                                                   
842     aSig = extractFloat32Frac( a );               
843     aExp = extractFloat32Exp( a );                
844     aSign = extractFloat32Sign( a );              
845     if ( ( aExp == 0x7FF ) && aSig ) aSign = 0    
846     if ( aExp ) aSig |= 0x00800000;               
847     shiftCount = 0xAF - aExp;                     
848     zSig = aSig;                                  
849     zSig <<= 32;                                  
850     if ( 0 < shiftCount ) shift64RightJamming(    
851     return roundAndPackInt32( roundData, aSign    
852                                                   
853 }                                                 
854                                                   
855 /*                                                
856 ----------------------------------------------    
857 Returns the result of converting the single-pr    
858 `a' to the 32-bit two's complement integer for    
859 performed according to the IEC/IEEE Standard f    
860 Arithmetic, except that the conversion is alwa    
861 `a' is a NaN, the largest positive integer is     
862 conversion overflows, the largest integer with    
863 returned.                                         
864 ----------------------------------------------    
865 */                                                
866 int32 float32_to_int32_round_to_zero( float32     
867 {                                                 
868     flag aSign;                                   
869     int16 aExp, shiftCount;                       
870     bits32 aSig;                                  
871     int32 z;                                      
872                                                   
873     aSig = extractFloat32Frac( a );               
874     aExp = extractFloat32Exp( a );                
875     aSign = extractFloat32Sign( a );              
876     shiftCount = aExp - 0x9E;                     
877     if ( 0 <= shiftCount ) {                      
878         if ( a == 0xCF000000 ) return 0x800000    
879         float_raise( float_flag_invalid );        
880         if ( ! aSign || ( ( aExp == 0xFF ) &&     
881         return 0x80000000;                        
882     }                                             
883     else if ( aExp <= 0x7E ) {                    
884         if ( aExp | aSig ) float_raise( float_    
885         return 0;                                 
886     }                                             
887     aSig = ( aSig | 0x00800000 )<<8;              
888     z = aSig>>( - shiftCount );                   
889     if ( (bits32) ( aSig<<( shiftCount & 31 )     
890         float_raise( float_flag_inexact );        
891     }                                             
892     return aSign ? - z : z;                       
893                                                   
894 }                                                 
895                                                   
896 /*                                                
897 ----------------------------------------------    
898 Returns the result of converting the single-pr    
899 `a' to the double-precision floating-point for    
900 performed according to the IEC/IEEE Standard f    
901 Arithmetic.                                       
902 ----------------------------------------------    
903 */                                                
904 float64 float32_to_float64( float32 a )           
905 {                                                 
906     flag aSign;                                   
907     int16 aExp;                                   
908     bits32 aSig;                                  
909                                                   
910     aSig = extractFloat32Frac( a );               
911     aExp = extractFloat32Exp( a );                
912     aSign = extractFloat32Sign( a );              
913     if ( aExp == 0xFF ) {                         
914         if ( aSig ) return commonNaNToFloat64(    
915         return packFloat64( aSign, 0x7FF, 0 );    
916     }                                             
917     if ( aExp == 0 ) {                            
918         if ( aSig == 0 ) return packFloat64( a    
919         normalizeFloat32Subnormal( aSig, &aExp    
920         --aExp;                                   
921     }                                             
922     return packFloat64( aSign, aExp + 0x380, (    
923                                                   
924 }                                                 
925                                                   
926 #ifdef FLOATX80                                   
927                                                   
928 /*                                                
929 ----------------------------------------------    
930 Returns the result of converting the single-pr    
931 `a' to the extended double-precision floating-    
932 is performed according to the IEC/IEEE Standar    
933 Arithmetic.                                       
934 ----------------------------------------------    
935 */                                                
936 floatx80 float32_to_floatx80( float32 a )         
937 {                                                 
938     flag aSign;                                   
939     int16 aExp;                                   
940     bits32 aSig;                                  
941                                                   
942     aSig = extractFloat32Frac( a );               
943     aExp = extractFloat32Exp( a );                
944     aSign = extractFloat32Sign( a );              
945     if ( aExp == 0xFF ) {                         
946         if ( aSig ) return commonNaNToFloatx80    
947         return packFloatx80( aSign, 0x7FFF, LI    
948     }                                             
949     if ( aExp == 0 ) {                            
950         if ( aSig == 0 ) return packFloatx80(     
951         normalizeFloat32Subnormal( aSig, &aExp    
952     }                                             
953     aSig |= 0x00800000;                           
954     return packFloatx80( aSign, aExp + 0x3F80,    
955                                                   
956 }                                                 
957                                                   
958 #endif                                            
959                                                   
960 /*                                                
961 ----------------------------------------------    
962 Rounds the single-precision floating-point val    
963 returns the result as a single-precision float    
964 operation is performed according to the IEC/IE    
965 Floating-point Arithmetic.                        
966 ----------------------------------------------    
967 */                                                
968 float32 float32_round_to_int( struct roundingD    
969 {                                                 
970     flag aSign;                                   
971     int16 aExp;                                   
972     bits32 lastBitMask, roundBitsMask;            
973     int8 roundingMode;                            
974     float32 z;                                    
975                                                   
976     aExp = extractFloat32Exp( a );                
977     if ( 0x96 <= aExp ) {                         
978         if ( ( aExp == 0xFF ) && extractFloat3    
979             return propagateFloat32NaN( a, a )    
980         }                                         
981         return a;                                 
982     }                                             
983     roundingMode = roundData->mode;               
984     if ( aExp <= 0x7E ) {                         
985         if ( (bits32) ( a<<1 ) == 0 ) return a    
986         roundData->exception |= float_flag_ine    
987         aSign = extractFloat32Sign( a );          
988         switch ( roundingMode ) {                 
989          case float_round_nearest_even:           
990             if ( ( aExp == 0x7E ) && extractFl    
991                 return packFloat32( aSign, 0x7    
992             }                                     
993             break;                                
994          case float_round_down:                   
995             return aSign ? 0xBF800000 : 0;        
996          case float_round_up:                     
997             return aSign ? 0x80000000 : 0x3F80    
998         }                                         
999         return packFloat32( aSign, 0, 0 );        
1000     }                                            
1001     lastBitMask = 1;                             
1002     lastBitMask <<= 0x96 - aExp;                 
1003     roundBitsMask = lastBitMask - 1;             
1004     z = a;                                       
1005     if ( roundingMode == float_round_nearest_    
1006         z += lastBitMask>>1;                     
1007         if ( ( z & roundBitsMask ) == 0 ) z &    
1008     }                                            
1009     else if ( roundingMode != float_round_to_    
1010         if ( extractFloat32Sign( z ) ^ ( roun    
1011             z += roundBitsMask;                  
1012         }                                        
1013     }                                            
1014     z &= ~ roundBitsMask;                        
1015     if ( z != a ) roundData->exception |= flo    
1016     return z;                                    
1017                                                  
1018 }                                                
1019                                                  
1020 /*                                               
1021 ---------------------------------------------    
1022 Returns the result of adding the absolute val    
1023 floating-point values `a' and `b'.  If `zSign    
1024 before being returned.  `zSign' is ignored if    
1025 addition is performed according to the IEC/IE    
1026 Floating-point Arithmetic.                       
1027 ---------------------------------------------    
1028 */                                               
1029 static float32 addFloat32Sigs( struct roundin    
1030 {                                                
1031     int16 aExp, bExp, zExp;                      
1032     bits32 aSig, bSig, zSig;                     
1033     int16 expDiff;                               
1034                                                  
1035     aSig = extractFloat32Frac( a );              
1036     aExp = extractFloat32Exp( a );               
1037     bSig = extractFloat32Frac( b );              
1038     bExp = extractFloat32Exp( b );               
1039     expDiff = aExp - bExp;                       
1040     aSig <<= 6;                                  
1041     bSig <<= 6;                                  
1042     if ( 0 < expDiff ) {                         
1043         if ( aExp == 0xFF ) {                    
1044             if ( aSig ) return propagateFloat    
1045             return a;                            
1046         }                                        
1047         if ( bExp == 0 ) {                       
1048             --expDiff;                           
1049         }                                        
1050         else {                                   
1051             bSig |= 0x20000000;                  
1052         }                                        
1053         shift32RightJamming( bSig, expDiff, &    
1054         zExp = aExp;                             
1055     }                                            
1056     else if ( expDiff < 0 ) {                    
1057         if ( bExp == 0xFF ) {                    
1058             if ( bSig ) return propagateFloat    
1059             return packFloat32( zSign, 0xFF,     
1060         }                                        
1061         if ( aExp == 0 ) {                       
1062             ++expDiff;                           
1063         }                                        
1064         else {                                   
1065             aSig |= 0x20000000;                  
1066         }                                        
1067         shift32RightJamming( aSig, - expDiff,    
1068         zExp = bExp;                             
1069     }                                            
1070     else {                                       
1071         if ( aExp == 0xFF ) {                    
1072             if ( aSig | bSig ) return propaga    
1073             return a;                            
1074         }                                        
1075         if ( aExp == 0 ) return packFloat32(     
1076         zSig = 0x40000000 + aSig + bSig;         
1077         zExp = aExp;                             
1078         goto roundAndPack;                       
1079     }                                            
1080     aSig |= 0x20000000;                          
1081     zSig = ( aSig + bSig )<<1;                   
1082     --zExp;                                      
1083     if ( (sbits32) zSig < 0 ) {                  
1084         zSig = aSig + bSig;                      
1085         ++zExp;                                  
1086     }                                            
1087  roundAndPack:                                   
1088     return roundAndPackFloat32( roundData, zS    
1089                                                  
1090 }                                                
1091                                                  
1092 /*                                               
1093 ---------------------------------------------    
1094 Returns the result of subtracting the absolut    
1095 precision floating-point values `a' and `b'.     
1096 difference is negated before being returned.     
1097 result is a NaN.  The subtraction is performe    
1098 Standard for Binary Floating-point Arithmetic    
1099 ---------------------------------------------    
1100 */                                               
1101 static float32 subFloat32Sigs( struct roundin    
1102 {                                                
1103     int16 aExp, bExp, zExp;                      
1104     bits32 aSig, bSig, zSig;                     
1105     int16 expDiff;                               
1106                                                  
1107     aSig = extractFloat32Frac( a );              
1108     aExp = extractFloat32Exp( a );               
1109     bSig = extractFloat32Frac( b );              
1110     bExp = extractFloat32Exp( b );               
1111     expDiff = aExp - bExp;                       
1112     aSig <<= 7;                                  
1113     bSig <<= 7;                                  
1114     if ( 0 < expDiff ) goto aExpBigger;          
1115     if ( expDiff < 0 ) goto bExpBigger;          
1116     if ( aExp == 0xFF ) {                        
1117         if ( aSig | bSig ) return propagateFl    
1118         roundData->exception |= float_flag_in    
1119         return float32_default_nan;              
1120     }                                            
1121     if ( aExp == 0 ) {                           
1122         aExp = 1;                                
1123         bExp = 1;                                
1124     }                                            
1125     if ( bSig < aSig ) goto aBigger;             
1126     if ( aSig < bSig ) goto bBigger;             
1127     return packFloat32( roundData->mode == fl    
1128  bExpBigger:                                     
1129     if ( bExp == 0xFF ) {                        
1130         if ( bSig ) return propagateFloat32Na    
1131         return packFloat32( zSign ^ 1, 0xFF,     
1132     }                                            
1133     if ( aExp == 0 ) {                           
1134         ++expDiff;                               
1135     }                                            
1136     else {                                       
1137         aSig |= 0x40000000;                      
1138     }                                            
1139     shift32RightJamming( aSig, - expDiff, &aS    
1140     bSig |= 0x40000000;                          
1141  bBigger:                                        
1142     zSig = bSig - aSig;                          
1143     zExp = bExp;                                 
1144     zSign ^= 1;                                  
1145     goto normalizeRoundAndPack;                  
1146  aExpBigger:                                     
1147     if ( aExp == 0xFF ) {                        
1148         if ( aSig ) return propagateFloat32Na    
1149         return a;                                
1150     }                                            
1151     if ( bExp == 0 ) {                           
1152         --expDiff;                               
1153     }                                            
1154     else {                                       
1155         bSig |= 0x40000000;                      
1156     }                                            
1157     shift32RightJamming( bSig, expDiff, &bSig    
1158     aSig |= 0x40000000;                          
1159  aBigger:                                        
1160     zSig = aSig - bSig;                          
1161     zExp = aExp;                                 
1162  normalizeRoundAndPack:                          
1163     --zExp;                                      
1164     return normalizeRoundAndPackFloat32( roun    
1165                                                  
1166 }                                                
1167                                                  
1168 /*                                               
1169 ---------------------------------------------    
1170 Returns the result of adding the single-preci    
1171 and `b'.  The operation is performed accordin    
1172 Binary Floating-point Arithmetic.                
1173 ---------------------------------------------    
1174 */                                               
1175 float32 float32_add( struct roundingData *rou    
1176 {                                                
1177     flag aSign, bSign;                           
1178                                                  
1179     aSign = extractFloat32Sign( a );             
1180     bSign = extractFloat32Sign( b );             
1181     if ( aSign == bSign ) {                      
1182         return addFloat32Sigs( roundData, a,     
1183     }                                            
1184     else {                                       
1185         return subFloat32Sigs( roundData, a,     
1186     }                                            
1187                                                  
1188 }                                                
1189                                                  
1190 /*                                               
1191 ---------------------------------------------    
1192 Returns the result of subtracting the single-    
1193 `a' and `b'.  The operation is performed acco    
1194 for Binary Floating-point Arithmetic.            
1195 ---------------------------------------------    
1196 */                                               
1197 float32 float32_sub( struct roundingData *rou    
1198 {                                                
1199     flag aSign, bSign;                           
1200                                                  
1201     aSign = extractFloat32Sign( a );             
1202     bSign = extractFloat32Sign( b );             
1203     if ( aSign == bSign ) {                      
1204         return subFloat32Sigs( roundData, a,     
1205     }                                            
1206     else {                                       
1207         return addFloat32Sigs( roundData, a,     
1208     }                                            
1209                                                  
1210 }                                                
1211                                                  
1212 /*                                               
1213 ---------------------------------------------    
1214 Returns the result of multiplying the single-    
1215 `a' and `b'.  The operation is performed acco    
1216 for Binary Floating-point Arithmetic.            
1217 ---------------------------------------------    
1218 */                                               
1219 float32 float32_mul( struct roundingData *rou    
1220 {                                                
1221     flag aSign, bSign, zSign;                    
1222     int16 aExp, bExp, zExp;                      
1223     bits32 aSig, bSig;                           
1224     bits64 zSig64;                               
1225     bits32 zSig;                                 
1226                                                  
1227     aSig = extractFloat32Frac( a );              
1228     aExp = extractFloat32Exp( a );               
1229     aSign = extractFloat32Sign( a );             
1230     bSig = extractFloat32Frac( b );              
1231     bExp = extractFloat32Exp( b );               
1232     bSign = extractFloat32Sign( b );             
1233     zSign = aSign ^ bSign;                       
1234     if ( aExp == 0xFF ) {                        
1235         if ( aSig || ( ( bExp == 0xFF ) && bS    
1236             return propagateFloat32NaN( a, b     
1237         }                                        
1238         if ( ( bExp | bSig ) == 0 ) {            
1239             roundData->exception |= float_fla    
1240             return float32_default_nan;          
1241         }                                        
1242         return packFloat32( zSign, 0xFF, 0 );    
1243     }                                            
1244     if ( bExp == 0xFF ) {                        
1245         if ( bSig ) return propagateFloat32Na    
1246         if ( ( aExp | aSig ) == 0 ) {            
1247             roundData->exception |= float_fla    
1248             return float32_default_nan;          
1249         }                                        
1250         return packFloat32( zSign, 0xFF, 0 );    
1251     }                                            
1252     if ( aExp == 0 ) {                           
1253         if ( aSig == 0 ) return packFloat32(     
1254         normalizeFloat32Subnormal( aSig, &aEx    
1255     }                                            
1256     if ( bExp == 0 ) {                           
1257         if ( bSig == 0 ) return packFloat32(     
1258         normalizeFloat32Subnormal( bSig, &bEx    
1259     }                                            
1260     zExp = aExp + bExp - 0x7F;                   
1261     aSig = ( aSig | 0x00800000 )<<7;             
1262     bSig = ( bSig | 0x00800000 )<<8;             
1263     shift64RightJamming( ( (bits64) aSig ) *     
1264     zSig = zSig64;                               
1265     if ( 0 <= (sbits32) ( zSig<<1 ) ) {          
1266         zSig <<= 1;                              
1267         --zExp;                                  
1268     }                                            
1269     return roundAndPackFloat32( roundData, zS    
1270                                                  
1271 }                                                
1272                                                  
1273 /*                                               
1274 ---------------------------------------------    
1275 Returns the result of dividing the single-pre    
1276 by the corresponding value `b'.  The operatio    
1277 IEC/IEEE Standard for Binary Floating-point A    
1278 ---------------------------------------------    
1279 */                                               
1280 float32 float32_div( struct roundingData *rou    
1281 {                                                
1282     flag aSign, bSign, zSign;                    
1283     int16 aExp, bExp, zExp;                      
1284     bits32 aSig, bSig, zSig;                     
1285                                                  
1286     aSig = extractFloat32Frac( a );              
1287     aExp = extractFloat32Exp( a );               
1288     aSign = extractFloat32Sign( a );             
1289     bSig = extractFloat32Frac( b );              
1290     bExp = extractFloat32Exp( b );               
1291     bSign = extractFloat32Sign( b );             
1292     zSign = aSign ^ bSign;                       
1293     if ( aExp == 0xFF ) {                        
1294         if ( aSig ) return propagateFloat32Na    
1295         if ( bExp == 0xFF ) {                    
1296             if ( bSig ) return propagateFloat    
1297             roundData->exception |= float_fla    
1298             return float32_default_nan;          
1299         }                                        
1300         return packFloat32( zSign, 0xFF, 0 );    
1301     }                                            
1302     if ( bExp == 0xFF ) {                        
1303         if ( bSig ) return propagateFloat32Na    
1304         return packFloat32( zSign, 0, 0 );       
1305     }                                            
1306     if ( bExp == 0 ) {                           
1307         if ( bSig == 0 ) {                       
1308             if ( ( aExp | aSig ) == 0 ) {        
1309                 roundData->exception |= float    
1310                 return float32_default_nan;      
1311             }                                    
1312             roundData->exception |= float_fla    
1313             return packFloat32( zSign, 0xFF,     
1314         }                                        
1315         normalizeFloat32Subnormal( bSig, &bEx    
1316     }                                            
1317     if ( aExp == 0 ) {                           
1318         if ( aSig == 0 ) return packFloat32(     
1319         normalizeFloat32Subnormal( aSig, &aEx    
1320     }                                            
1321     zExp = aExp - bExp + 0x7D;                   
1322     aSig = ( aSig | 0x00800000 )<<7;             
1323     bSig = ( bSig | 0x00800000 )<<8;             
1324     if ( bSig <= ( aSig + aSig ) ) {             
1325         aSig >>= 1;                              
1326         ++zExp;                                  
1327     }                                            
1328     {                                            
1329         bits64 tmp = ( (bits64) aSig )<<32;      
1330         do_div( tmp, bSig );                     
1331         zSig = tmp;                              
1332     }                                            
1333     if ( ( zSig & 0x3F ) == 0 ) {                
1334         zSig |= ( ( (bits64) bSig ) * zSig !=    
1335     }                                            
1336     return roundAndPackFloat32( roundData, zS    
1337                                                  
1338 }                                                
1339                                                  
1340 /*                                               
1341 ---------------------------------------------    
1342 Returns the remainder of the single-precision    
1343 with respect to the corresponding value `b'.     
1344 according to the IEC/IEEE Standard for Binary    
1345 ---------------------------------------------    
1346 */                                               
1347 float32 float32_rem( struct roundingData *rou    
1348 {                                                
1349     flag aSign, bSign, zSign;                    
1350     int16 aExp, bExp, expDiff;                   
1351     bits32 aSig, bSig;                           
1352     bits32 q;                                    
1353     bits64 aSig64, bSig64, q64;                  
1354     bits32 alternateASig;                        
1355     sbits32 sigMean;                             
1356                                                  
1357     aSig = extractFloat32Frac( a );              
1358     aExp = extractFloat32Exp( a );               
1359     aSign = extractFloat32Sign( a );             
1360     bSig = extractFloat32Frac( b );              
1361     bExp = extractFloat32Exp( b );               
1362     bSign = extractFloat32Sign( b );             
1363     if ( aExp == 0xFF ) {                        
1364         if ( aSig || ( ( bExp == 0xFF ) && bS    
1365             return propagateFloat32NaN( a, b     
1366         }                                        
1367         roundData->exception |= float_flag_in    
1368         return float32_default_nan;              
1369     }                                            
1370     if ( bExp == 0xFF ) {                        
1371         if ( bSig ) return propagateFloat32Na    
1372         return a;                                
1373     }                                            
1374     if ( bExp == 0 ) {                           
1375         if ( bSig == 0 ) {                       
1376             roundData->exception |= float_fla    
1377             return float32_default_nan;          
1378         }                                        
1379         normalizeFloat32Subnormal( bSig, &bEx    
1380     }                                            
1381     if ( aExp == 0 ) {                           
1382         if ( aSig == 0 ) return a;               
1383         normalizeFloat32Subnormal( aSig, &aEx    
1384     }                                            
1385     expDiff = aExp - bExp;                       
1386     aSig |= 0x00800000;                          
1387     bSig |= 0x00800000;                          
1388     if ( expDiff < 32 ) {                        
1389         aSig <<= 8;                              
1390         bSig <<= 8;                              
1391         if ( expDiff < 0 ) {                     
1392             if ( expDiff < -1 ) return a;        
1393             aSig >>= 1;                          
1394         }                                        
1395         q = ( bSig <= aSig );                    
1396         if ( q ) aSig -= bSig;                   
1397         if ( 0 < expDiff ) {                     
1398             bits64 tmp = ( (bits64) aSig )<<3    
1399             do_div( tmp, bSig );                 
1400             q = tmp;                             
1401             q >>= 32 - expDiff;                  
1402             bSig >>= 2;                          
1403             aSig = ( ( aSig>>1 )<<( expDiff -    
1404         }                                        
1405         else {                                   
1406             aSig >>= 2;                          
1407             bSig >>= 2;                          
1408         }                                        
1409     }                                            
1410     else {                                       
1411         if ( bSig <= aSig ) aSig -= bSig;        
1412         aSig64 = ( (bits64) aSig )<<40;          
1413         bSig64 = ( (bits64) bSig )<<40;          
1414         expDiff -= 64;                           
1415         while ( 0 < expDiff ) {                  
1416             q64 = estimateDiv128To64( aSig64,    
1417             q64 = ( 2 < q64 ) ? q64 - 2 : 0;     
1418             aSig64 = - ( ( bSig * q64 )<<38 )    
1419             expDiff -= 62;                       
1420         }                                        
1421         expDiff += 64;                           
1422         q64 = estimateDiv128To64( aSig64, 0,     
1423         q64 = ( 2 < q64 ) ? q64 - 2 : 0;         
1424         q = q64>>( 64 - expDiff );               
1425         bSig <<= 6;                              
1426         aSig = ( ( aSig64>>33 )<<( expDiff -     
1427     }                                            
1428     do {                                         
1429         alternateASig = aSig;                    
1430         ++q;                                     
1431         aSig -= bSig;                            
1432     } while ( 0 <= (sbits32) aSig );             
1433     sigMean = aSig + alternateASig;              
1434     if ( ( sigMean < 0 ) || ( ( sigMean == 0     
1435         aSig = alternateASig;                    
1436     }                                            
1437     zSign = ( (sbits32) aSig < 0 );              
1438     if ( zSign ) aSig = - aSig;                  
1439     return normalizeRoundAndPackFloat32( roun    
1440                                                  
1441 }                                                
1442                                                  
1443 /*                                               
1444 ---------------------------------------------    
1445 Returns the square root of the single-precisi    
1446 The operation is performed according to the I    
1447 Floating-point Arithmetic.                       
1448 ---------------------------------------------    
1449 */                                               
1450 float32 float32_sqrt( struct roundingData *ro    
1451 {                                                
1452     flag aSign;                                  
1453     int16 aExp, zExp;                            
1454     bits32 aSig, zSig;                           
1455     bits64 rem, term;                            
1456                                                  
1457     aSig = extractFloat32Frac( a );              
1458     aExp = extractFloat32Exp( a );               
1459     aSign = extractFloat32Sign( a );             
1460     if ( aExp == 0xFF ) {                        
1461         if ( aSig ) return propagateFloat32Na    
1462         if ( ! aSign ) return a;                 
1463         roundData->exception |= float_flag_in    
1464         return float32_default_nan;              
1465     }                                            
1466     if ( aSign ) {                               
1467         if ( ( aExp | aSig ) == 0 ) return a;    
1468         roundData->exception |= float_flag_in    
1469         return float32_default_nan;              
1470     }                                            
1471     if ( aExp == 0 ) {                           
1472         if ( aSig == 0 ) return 0;               
1473         normalizeFloat32Subnormal( aSig, &aEx    
1474     }                                            
1475     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;        
1476     aSig = ( aSig | 0x00800000 )<<8;             
1477     zSig = estimateSqrt32( aExp, aSig ) + 2;     
1478     if ( ( zSig & 0x7F ) <= 5 ) {                
1479         if ( zSig < 2 ) {                        
1480             zSig = 0xFFFFFFFF;                   
1481         }                                        
1482         else {                                   
1483             aSig >>= aExp & 1;                   
1484             term = ( (bits64) zSig ) * zSig;     
1485             rem = ( ( (bits64) aSig )<<32 ) -    
1486             while ( (sbits64) rem < 0 ) {        
1487                 --zSig;                          
1488                 rem += ( ( (bits64) zSig )<<1    
1489             }                                    
1490             zSig |= ( rem != 0 );                
1491         }                                        
1492     }                                            
1493     shift32RightJamming( zSig, 1, &zSig );       
1494     return roundAndPackFloat32( roundData, 0,    
1495                                                  
1496 }                                                
1497                                                  
1498 /*                                               
1499 ---------------------------------------------    
1500 Returns 1 if the single-precision floating-po    
1501 corresponding value `b', and 0 otherwise.  Th    
1502 according to the IEC/IEEE Standard for Binary    
1503 ---------------------------------------------    
1504 */                                               
1505 flag float32_eq( float32 a, float32 b )          
1506 {                                                
1507                                                  
1508     if (    ( ( extractFloat32Exp( a ) == 0xF    
1509          || ( ( extractFloat32Exp( b ) == 0xF    
1510        ) {                                       
1511         if ( float32_is_signaling_nan( a ) ||    
1512             float_raise( float_flag_invalid )    
1513         }                                        
1514         return 0;                                
1515     }                                            
1516     return ( a == b ) || ( (bits32) ( ( a | b    
1517                                                  
1518 }                                                
1519                                                  
1520 /*                                               
1521 ---------------------------------------------    
1522 Returns 1 if the single-precision floating-po    
1523 equal to the corresponding value `b', and 0 o    
1524 performed according to the IEC/IEEE Standard     
1525 Arithmetic.                                      
1526 ---------------------------------------------    
1527 */                                               
1528 flag float32_le( float32 a, float32 b )          
1529 {                                                
1530     flag aSign, bSign;                           
1531                                                  
1532     if (    ( ( extractFloat32Exp( a ) == 0xF    
1533          || ( ( extractFloat32Exp( b ) == 0xF    
1534        ) {                                       
1535         float_raise( float_flag_invalid );       
1536         return 0;                                
1537     }                                            
1538     aSign = extractFloat32Sign( a );             
1539     bSign = extractFloat32Sign( b );             
1540     if ( aSign != bSign ) return aSign || ( (    
1541     return ( a == b ) || ( aSign ^ ( a < b )     
1542                                                  
1543 }                                                
1544                                                  
1545 /*                                               
1546 ---------------------------------------------    
1547 Returns 1 if the single-precision floating-po    
1548 the corresponding value `b', and 0 otherwise.    
1549 according to the IEC/IEEE Standard for Binary    
1550 ---------------------------------------------    
1551 */                                               
1552 flag float32_lt( float32 a, float32 b )          
1553 {                                                
1554     flag aSign, bSign;                           
1555                                                  
1556     if (    ( ( extractFloat32Exp( a ) == 0xF    
1557          || ( ( extractFloat32Exp( b ) == 0xF    
1558        ) {                                       
1559         float_raise( float_flag_invalid );       
1560         return 0;                                
1561     }                                            
1562     aSign = extractFloat32Sign( a );             
1563     bSign = extractFloat32Sign( b );             
1564     if ( aSign != bSign ) return aSign && ( (    
1565     return ( a != b ) && ( aSign ^ ( a < b )     
1566                                                  
1567 }                                                
1568                                                  
1569 /*                                               
1570 ---------------------------------------------    
1571 Returns 1 if the single-precision floating-po    
1572 corresponding value `b', and 0 otherwise.  Th    
1573 if either operand is a NaN.  Otherwise, the c    
1574 according to the IEC/IEEE Standard for Binary    
1575 ---------------------------------------------    
1576 */                                               
1577 flag float32_eq_signaling( float32 a, float32    
1578 {                                                
1579                                                  
1580     if (    ( ( extractFloat32Exp( a ) == 0xF    
1581          || ( ( extractFloat32Exp( b ) == 0xF    
1582        ) {                                       
1583         float_raise( float_flag_invalid );       
1584         return 0;                                
1585     }                                            
1586     return ( a == b ) || ( (bits32) ( ( a | b    
1587                                                  
1588 }                                                
1589                                                  
1590 /*                                               
1591 ---------------------------------------------    
1592 Returns 1 if the single-precision floating-po    
1593 equal to the corresponding value `b', and 0 o    
1594 cause an exception.  Otherwise, the compariso    
1595 IEC/IEEE Standard for Binary Floating-point A    
1596 ---------------------------------------------    
1597 */                                               
1598 flag float32_le_quiet( float32 a, float32 b )    
1599 {                                                
1600     flag aSign, bSign;                           
1601     //int16 aExp, bExp;                          
1602                                                  
1603     if (    ( ( extractFloat32Exp( a ) == 0xF    
1604          || ( ( extractFloat32Exp( b ) == 0xF    
1605        ) {                                       
1606         /* Do nothing, even if NaN as we're q    
1607         return 0;                                
1608     }                                            
1609     aSign = extractFloat32Sign( a );             
1610     bSign = extractFloat32Sign( b );             
1611     if ( aSign != bSign ) return aSign || ( (    
1612     return ( a == b ) || ( aSign ^ ( a < b )     
1613                                                  
1614 }                                                
1615                                                  
1616 /*                                               
1617 ---------------------------------------------    
1618 Returns 1 if the single-precision floating-po    
1619 the corresponding value `b', and 0 otherwise.    
1620 exception.  Otherwise, the comparison is perf    
1621 Standard for Binary Floating-point Arithmetic    
1622 ---------------------------------------------    
1623 */                                               
1624 flag float32_lt_quiet( float32 a, float32 b )    
1625 {                                                
1626     flag aSign, bSign;                           
1627                                                  
1628     if (    ( ( extractFloat32Exp( a ) == 0xF    
1629          || ( ( extractFloat32Exp( b ) == 0xF    
1630        ) {                                       
1631         /* Do nothing, even if NaN as we're q    
1632         return 0;                                
1633     }                                            
1634     aSign = extractFloat32Sign( a );             
1635     bSign = extractFloat32Sign( b );             
1636     if ( aSign != bSign ) return aSign && ( (    
1637     return ( a != b ) && ( aSign ^ ( a < b )     
1638                                                  
1639 }                                                
1640                                                  
1641 /*                                               
1642 ---------------------------------------------    
1643 Returns the result of converting the double-p    
1644 `a' to the 32-bit two's complement integer fo    
1645 performed according to the IEC/IEEE Standard     
1646 Arithmetic---which means in particular that t    
1647 according to the current rounding mode.  If `    
1648 positive integer is returned.  Otherwise, if     
1649 largest integer with the same sign as `a' is     
1650 ---------------------------------------------    
1651 */                                               
1652 int32 float64_to_int32( struct roundingData *    
1653 {                                                
1654     flag aSign;                                  
1655     int16 aExp, shiftCount;                      
1656     bits64 aSig;                                 
1657                                                  
1658     aSig = extractFloat64Frac( a );              
1659     aExp = extractFloat64Exp( a );               
1660     aSign = extractFloat64Sign( a );             
1661     if ( ( aExp == 0x7FF ) && aSig ) aSign =     
1662     if ( aExp ) aSig |= LIT64( 0x001000000000    
1663     shiftCount = 0x42C - aExp;                   
1664     if ( 0 < shiftCount ) shift64RightJamming    
1665     return roundAndPackInt32( roundData, aSig    
1666                                                  
1667 }                                                
1668                                                  
1669 /*                                               
1670 ---------------------------------------------    
1671 Returns the result of converting the double-p    
1672 `a' to the 32-bit two's complement integer fo    
1673 performed according to the IEC/IEEE Standard     
1674 Arithmetic, except that the conversion is alw    
1675 `a' is a NaN, the largest positive integer is    
1676 conversion overflows, the largest integer wit    
1677 returned.                                        
1678 ---------------------------------------------    
1679 */                                               
1680 int32 float64_to_int32_round_to_zero( float64    
1681 {                                                
1682     flag aSign;                                  
1683     int16 aExp, shiftCount;                      
1684     bits64 aSig, savedASig;                      
1685     int32 z;                                     
1686                                                  
1687     aSig = extractFloat64Frac( a );              
1688     aExp = extractFloat64Exp( a );               
1689     aSign = extractFloat64Sign( a );             
1690     shiftCount = 0x433 - aExp;                   
1691     if ( shiftCount < 21 ) {                     
1692         if ( ( aExp == 0x7FF ) && aSig ) aSig    
1693         goto invalid;                            
1694     }                                            
1695     else if ( 52 < shiftCount ) {                
1696         if ( aExp || aSig ) float_raise( floa    
1697         return 0;                                
1698     }                                            
1699     aSig |= LIT64( 0x0010000000000000 );         
1700     savedASig = aSig;                            
1701     aSig >>= shiftCount;                         
1702     z = aSig;                                    
1703     if ( aSign ) z = - z;                        
1704     if ( ( z < 0 ) ^ aSign ) {                   
1705  invalid:                                        
1706         float_raise( float_flag_invalid );       
1707         return aSign ? 0x80000000 : 0x7FFFFFF    
1708     }                                            
1709     if ( ( aSig<<shiftCount ) != savedASig )     
1710         float_raise( float_flag_inexact );       
1711     }                                            
1712     return z;                                    
1713                                                  
1714 }                                                
1715                                                  
1716 /*                                               
1717 ---------------------------------------------    
1718 Returns the result of converting the double-p    
1719 `a' to the 32-bit two's complement unsigned i    
1720 is performed according to the IEC/IEEE Standa    
1721 Arithmetic---which means in particular that t    
1722 according to the current rounding mode.  If `    
1723 positive integer is returned.  Otherwise, if     
1724 largest positive integer is returned.            
1725 ---------------------------------------------    
1726 */                                               
1727 int32 float64_to_uint32( struct roundingData     
1728 {                                                
1729     flag aSign;                                  
1730     int16 aExp, shiftCount;                      
1731     bits64 aSig;                                 
1732                                                  
1733     aSig = extractFloat64Frac( a );              
1734     aExp = extractFloat64Exp( a );               
1735     aSign = 0; //extractFloat64Sign( a );        
1736     //if ( ( aExp == 0x7FF ) && aSig ) aSign     
1737     if ( aExp ) aSig |= LIT64( 0x001000000000    
1738     shiftCount = 0x42C - aExp;                   
1739     if ( 0 < shiftCount ) shift64RightJamming    
1740     return roundAndPackInt32( roundData, aSig    
1741 }                                                
1742                                                  
1743 /*                                               
1744 ---------------------------------------------    
1745 Returns the result of converting the double-p    
1746 `a' to the 32-bit two's complement integer fo    
1747 performed according to the IEC/IEEE Standard     
1748 Arithmetic, except that the conversion is alw    
1749 `a' is a NaN, the largest positive integer is    
1750 conversion overflows, the largest positive in    
1751 ---------------------------------------------    
1752 */                                               
1753 int32 float64_to_uint32_round_to_zero( float6    
1754 {                                                
1755     flag aSign;                                  
1756     int16 aExp, shiftCount;                      
1757     bits64 aSig, savedASig;                      
1758     int32 z;                                     
1759                                                  
1760     aSig = extractFloat64Frac( a );              
1761     aExp = extractFloat64Exp( a );               
1762     aSign = extractFloat64Sign( a );             
1763     shiftCount = 0x433 - aExp;                   
1764     if ( shiftCount < 21 ) {                     
1765         if ( ( aExp == 0x7FF ) && aSig ) aSig    
1766         goto invalid;                            
1767     }                                            
1768     else if ( 52 < shiftCount ) {                
1769         if ( aExp || aSig ) float_raise( floa    
1770         return 0;                                
1771     }                                            
1772     aSig |= LIT64( 0x0010000000000000 );         
1773     savedASig = aSig;                            
1774     aSig >>= shiftCount;                         
1775     z = aSig;                                    
1776     if ( aSign ) z = - z;                        
1777     if ( ( z < 0 ) ^ aSign ) {                   
1778  invalid:                                        
1779         float_raise( float_flag_invalid );       
1780         return aSign ? 0x80000000 : 0x7FFFFFF    
1781     }                                            
1782     if ( ( aSig<<shiftCount ) != savedASig )     
1783         float_raise( float_flag_inexact );       
1784     }                                            
1785     return z;                                    
1786 }                                                
1787                                                  
1788 /*                                               
1789 ---------------------------------------------    
1790 Returns the result of converting the double-p    
1791 `a' to the single-precision floating-point fo    
1792 performed according to the IEC/IEEE Standard     
1793 Arithmetic.                                      
1794 ---------------------------------------------    
1795 */                                               
1796 float32 float64_to_float32( struct roundingDa    
1797 {                                                
1798     flag aSign;                                  
1799     int16 aExp;                                  
1800     bits64 aSig;                                 
1801     bits32 zSig;                                 
1802                                                  
1803     aSig = extractFloat64Frac( a );              
1804     aExp = extractFloat64Exp( a );               
1805     aSign = extractFloat64Sign( a );             
1806     if ( aExp == 0x7FF ) {                       
1807         if ( aSig ) return commonNaNToFloat32    
1808         return packFloat32( aSign, 0xFF, 0 );    
1809     }                                            
1810     shift64RightJamming( aSig, 22, &aSig );      
1811     zSig = aSig;                                 
1812     if ( aExp || zSig ) {                        
1813         zSig |= 0x40000000;                      
1814         aExp -= 0x381;                           
1815     }                                            
1816     return roundAndPackFloat32( roundData, aS    
1817                                                  
1818 }                                                
1819                                                  
1820 #ifdef FLOATX80                                  
1821                                                  
1822 /*                                               
1823 ---------------------------------------------    
1824 Returns the result of converting the double-p    
1825 `a' to the extended double-precision floating    
1826 is performed according to the IEC/IEEE Standa    
1827 Arithmetic.                                      
1828 ---------------------------------------------    
1829 */                                               
1830 floatx80 float64_to_floatx80( float64 a )        
1831 {                                                
1832     flag aSign;                                  
1833     int16 aExp;                                  
1834     bits64 aSig;                                 
1835                                                  
1836     aSig = extractFloat64Frac( a );              
1837     aExp = extractFloat64Exp( a );               
1838     aSign = extractFloat64Sign( a );             
1839     if ( aExp == 0x7FF ) {                       
1840         if ( aSig ) return commonNaNToFloatx8    
1841         return packFloatx80( aSign, 0x7FFF, L    
1842     }                                            
1843     if ( aExp == 0 ) {                           
1844         if ( aSig == 0 ) return packFloatx80(    
1845         normalizeFloat64Subnormal( aSig, &aEx    
1846     }                                            
1847     return                                       
1848         packFloatx80(                            
1849             aSign, aExp + 0x3C00, ( aSig | LI    
1850                                                  
1851 }                                                
1852                                                  
1853 #endif                                           
1854                                                  
1855 /*                                               
1856 ---------------------------------------------    
1857 Rounds the double-precision floating-point va    
1858 returns the result as a double-precision floa    
1859 operation is performed according to the IEC/I    
1860 Floating-point Arithmetic.                       
1861 ---------------------------------------------    
1862 */                                               
1863 float64 float64_round_to_int( struct rounding    
1864 {                                                
1865     flag aSign;                                  
1866     int16 aExp;                                  
1867     bits64 lastBitMask, roundBitsMask;           
1868     int8 roundingMode;                           
1869     float64 z;                                   
1870                                                  
1871     aExp = extractFloat64Exp( a );               
1872     if ( 0x433 <= aExp ) {                       
1873         if ( ( aExp == 0x7FF ) && extractFloa    
1874             return propagateFloat64NaN( a, a     
1875         }                                        
1876         return a;                                
1877     }                                            
1878     if ( aExp <= 0x3FE ) {                       
1879         if ( (bits64) ( a<<1 ) == 0 ) return     
1880         roundData->exception |= float_flag_in    
1881         aSign = extractFloat64Sign( a );         
1882         switch ( roundData->mode ) {             
1883          case float_round_nearest_even:          
1884             if ( ( aExp == 0x3FE ) && extract    
1885                 return packFloat64( aSign, 0x    
1886             }                                    
1887             break;                               
1888          case float_round_down:                  
1889             return aSign ? LIT64( 0xBFF000000    
1890          case float_round_up:                    
1891             return                               
1892             aSign ? LIT64( 0x8000000000000000    
1893         }                                        
1894         return packFloat64( aSign, 0, 0 );       
1895     }                                            
1896     lastBitMask = 1;                             
1897     lastBitMask <<= 0x433 - aExp;                
1898     roundBitsMask = lastBitMask - 1;             
1899     z = a;                                       
1900     roundingMode = roundData->mode;              
1901     if ( roundingMode == float_round_nearest_    
1902         z += lastBitMask>>1;                     
1903         if ( ( z & roundBitsMask ) == 0 ) z &    
1904     }                                            
1905     else if ( roundingMode != float_round_to_    
1906         if ( extractFloat64Sign( z ) ^ ( roun    
1907             z += roundBitsMask;                  
1908         }                                        
1909     }                                            
1910     z &= ~ roundBitsMask;                        
1911     if ( z != a ) roundData->exception |= flo    
1912     return z;                                    
1913                                                  
1914 }                                                
1915                                                  
1916 /*                                               
1917 ---------------------------------------------    
1918 Returns the result of adding the absolute val    
1919 floating-point values `a' and `b'.  If `zSign    
1920 before being returned.  `zSign' is ignored if    
1921 addition is performed according to the IEC/IE    
1922 Floating-point Arithmetic.                       
1923 ---------------------------------------------    
1924 */                                               
1925 static float64 addFloat64Sigs( struct roundin    
1926 {                                                
1927     int16 aExp, bExp, zExp;                      
1928     bits64 aSig, bSig, zSig;                     
1929     int16 expDiff;                               
1930                                                  
1931     aSig = extractFloat64Frac( a );              
1932     aExp = extractFloat64Exp( a );               
1933     bSig = extractFloat64Frac( b );              
1934     bExp = extractFloat64Exp( b );               
1935     expDiff = aExp - bExp;                       
1936     aSig <<= 9;                                  
1937     bSig <<= 9;                                  
1938     if ( 0 < expDiff ) {                         
1939         if ( aExp == 0x7FF ) {                   
1940             if ( aSig ) return propagateFloat    
1941             return a;                            
1942         }                                        
1943         if ( bExp == 0 ) {                       
1944             --expDiff;                           
1945         }                                        
1946         else {                                   
1947             bSig |= LIT64( 0x2000000000000000    
1948         }                                        
1949         shift64RightJamming( bSig, expDiff, &    
1950         zExp = aExp;                             
1951     }                                            
1952     else if ( expDiff < 0 ) {                    
1953         if ( bExp == 0x7FF ) {                   
1954             if ( bSig ) return propagateFloat    
1955             return packFloat64( zSign, 0x7FF,    
1956         }                                        
1957         if ( aExp == 0 ) {                       
1958             ++expDiff;                           
1959         }                                        
1960         else {                                   
1961             aSig |= LIT64( 0x2000000000000000    
1962         }                                        
1963         shift64RightJamming( aSig, - expDiff,    
1964         zExp = bExp;                             
1965     }                                            
1966     else {                                       
1967         if ( aExp == 0x7FF ) {                   
1968             if ( aSig | bSig ) return propaga    
1969             return a;                            
1970         }                                        
1971         if ( aExp == 0 ) return packFloat64(     
1972         zSig = LIT64( 0x4000000000000000 ) +     
1973         zExp = aExp;                             
1974         goto roundAndPack;                       
1975     }                                            
1976     aSig |= LIT64( 0x2000000000000000 );         
1977     zSig = ( aSig + bSig )<<1;                   
1978     --zExp;                                      
1979     if ( (sbits64) zSig < 0 ) {                  
1980         zSig = aSig + bSig;                      
1981         ++zExp;                                  
1982     }                                            
1983  roundAndPack:                                   
1984     return roundAndPackFloat64( roundData, zS    
1985                                                  
1986 }                                                
1987                                                  
1988 /*                                               
1989 ---------------------------------------------    
1990 Returns the result of subtracting the absolut    
1991 precision floating-point values `a' and `b'.     
1992 difference is negated before being returned.     
1993 result is a NaN.  The subtraction is performe    
1994 Standard for Binary Floating-point Arithmetic    
1995 ---------------------------------------------    
1996 */                                               
1997 static float64 subFloat64Sigs( struct roundin    
1998 {                                                
1999     int16 aExp, bExp, zExp;                      
2000     bits64 aSig, bSig, zSig;                     
2001     int16 expDiff;                               
2002                                                  
2003     aSig = extractFloat64Frac( a );              
2004     aExp = extractFloat64Exp( a );               
2005     bSig = extractFloat64Frac( b );              
2006     bExp = extractFloat64Exp( b );               
2007     expDiff = aExp - bExp;                       
2008     aSig <<= 10;                                 
2009     bSig <<= 10;                                 
2010     if ( 0 < expDiff ) goto aExpBigger;          
2011     if ( expDiff < 0 ) goto bExpBigger;          
2012     if ( aExp == 0x7FF ) {                       
2013         if ( aSig | bSig ) return propagateFl    
2014         roundData->exception |= float_flag_in    
2015         return float64_default_nan;              
2016     }                                            
2017     if ( aExp == 0 ) {                           
2018         aExp = 1;                                
2019         bExp = 1;                                
2020     }                                            
2021     if ( bSig < aSig ) goto aBigger;             
2022     if ( aSig < bSig ) goto bBigger;             
2023     return packFloat64( roundData->mode == fl    
2024  bExpBigger:                                     
2025     if ( bExp == 0x7FF ) {                       
2026         if ( bSig ) return propagateFloat64Na    
2027         return packFloat64( zSign ^ 1, 0x7FF,    
2028     }                                            
2029     if ( aExp == 0 ) {                           
2030         ++expDiff;                               
2031     }                                            
2032     else {                                       
2033         aSig |= LIT64( 0x4000000000000000 );     
2034     }                                            
2035     shift64RightJamming( aSig, - expDiff, &aS    
2036     bSig |= LIT64( 0x4000000000000000 );         
2037  bBigger:                                        
2038     zSig = bSig - aSig;                          
2039     zExp = bExp;                                 
2040     zSign ^= 1;                                  
2041     goto normalizeRoundAndPack;                  
2042  aExpBigger:                                     
2043     if ( aExp == 0x7FF ) {                       
2044         if ( aSig ) return propagateFloat64Na    
2045         return a;                                
2046     }                                            
2047     if ( bExp == 0 ) {                           
2048         --expDiff;                               
2049     }                                            
2050     else {                                       
2051         bSig |= LIT64( 0x4000000000000000 );     
2052     }                                            
2053     shift64RightJamming( bSig, expDiff, &bSig    
2054     aSig |= LIT64( 0x4000000000000000 );         
2055  aBigger:                                        
2056     zSig = aSig - bSig;                          
2057     zExp = aExp;                                 
2058  normalizeRoundAndPack:                          
2059     --zExp;                                      
2060     return normalizeRoundAndPackFloat64( roun    
2061                                                  
2062 }                                                
2063                                                  
2064 /*                                               
2065 ---------------------------------------------    
2066 Returns the result of adding the double-preci    
2067 and `b'.  The operation is performed accordin    
2068 Binary Floating-point Arithmetic.                
2069 ---------------------------------------------    
2070 */                                               
2071 float64 float64_add( struct roundingData *rou    
2072 {                                                
2073     flag aSign, bSign;                           
2074                                                  
2075     aSign = extractFloat64Sign( a );             
2076     bSign = extractFloat64Sign( b );             
2077     if ( aSign == bSign ) {                      
2078         return addFloat64Sigs( roundData, a,     
2079     }                                            
2080     else {                                       
2081         return subFloat64Sigs( roundData, a,     
2082     }                                            
2083                                                  
2084 }                                                
2085                                                  
2086 /*                                               
2087 ---------------------------------------------    
2088 Returns the result of subtracting the double-    
2089 `a' and `b'.  The operation is performed acco    
2090 for Binary Floating-point Arithmetic.            
2091 ---------------------------------------------    
2092 */                                               
2093 float64 float64_sub( struct roundingData *rou    
2094 {                                                
2095     flag aSign, bSign;                           
2096                                                  
2097     aSign = extractFloat64Sign( a );             
2098     bSign = extractFloat64Sign( b );             
2099     if ( aSign == bSign ) {                      
2100         return subFloat64Sigs( roundData, a,     
2101     }                                            
2102     else {                                       
2103         return addFloat64Sigs( roundData, a,     
2104     }                                            
2105                                                  
2106 }                                                
2107                                                  
2108 /*                                               
2109 ---------------------------------------------    
2110 Returns the result of multiplying the double-    
2111 `a' and `b'.  The operation is performed acco    
2112 for Binary Floating-point Arithmetic.            
2113 ---------------------------------------------    
2114 */                                               
2115 float64 float64_mul( struct roundingData *rou    
2116 {                                                
2117     flag aSign, bSign, zSign;                    
2118     int16 aExp, bExp, zExp;                      
2119     bits64 aSig, bSig, zSig0, zSig1;             
2120                                                  
2121     aSig = extractFloat64Frac( a );              
2122     aExp = extractFloat64Exp( a );               
2123     aSign = extractFloat64Sign( a );             
2124     bSig = extractFloat64Frac( b );              
2125     bExp = extractFloat64Exp( b );               
2126     bSign = extractFloat64Sign( b );             
2127     zSign = aSign ^ bSign;                       
2128     if ( aExp == 0x7FF ) {                       
2129         if ( aSig || ( ( bExp == 0x7FF ) && b    
2130             return propagateFloat64NaN( a, b     
2131         }                                        
2132         if ( ( bExp | bSig ) == 0 ) {            
2133             roundData->exception |= float_fla    
2134             return float64_default_nan;          
2135         }                                        
2136         return packFloat64( zSign, 0x7FF, 0 )    
2137     }                                            
2138     if ( bExp == 0x7FF ) {                       
2139         if ( bSig ) return propagateFloat64Na    
2140         if ( ( aExp | aSig ) == 0 ) {            
2141             roundData->exception |= float_fla    
2142             return float64_default_nan;          
2143         }                                        
2144         return packFloat64( zSign, 0x7FF, 0 )    
2145     }                                            
2146     if ( aExp == 0 ) {                           
2147         if ( aSig == 0 ) return packFloat64(     
2148         normalizeFloat64Subnormal( aSig, &aEx    
2149     }                                            
2150     if ( bExp == 0 ) {                           
2151         if ( bSig == 0 ) return packFloat64(     
2152         normalizeFloat64Subnormal( bSig, &bEx    
2153     }                                            
2154     zExp = aExp + bExp - 0x3FF;                  
2155     aSig = ( aSig | LIT64( 0x0010000000000000    
2156     bSig = ( bSig | LIT64( 0x0010000000000000    
2157     mul64To128( aSig, bSig, &zSig0, &zSig1 );    
2158     zSig0 |= ( zSig1 != 0 );                     
2159     if ( 0 <= (sbits64) ( zSig0<<1 ) ) {         
2160         zSig0 <<= 1;                             
2161         --zExp;                                  
2162     }                                            
2163     return roundAndPackFloat64( roundData, zS    
2164                                                  
2165 }                                                
2166                                                  
2167 /*                                               
2168 ---------------------------------------------    
2169 Returns the result of dividing the double-pre    
2170 by the corresponding value `b'.  The operatio    
2171 the IEC/IEEE Standard for Binary Floating-poi    
2172 ---------------------------------------------    
2173 */                                               
2174 float64 float64_div( struct roundingData *rou    
2175 {                                                
2176     flag aSign, bSign, zSign;                    
2177     int16 aExp, bExp, zExp;                      
2178     bits64 aSig, bSig, zSig;                     
2179     bits64 rem0, rem1;                           
2180     bits64 term0, term1;                         
2181                                                  
2182     aSig = extractFloat64Frac( a );              
2183     aExp = extractFloat64Exp( a );               
2184     aSign = extractFloat64Sign( a );             
2185     bSig = extractFloat64Frac( b );              
2186     bExp = extractFloat64Exp( b );               
2187     bSign = extractFloat64Sign( b );             
2188     zSign = aSign ^ bSign;                       
2189     if ( aExp == 0x7FF ) {                       
2190         if ( aSig ) return propagateFloat64Na    
2191         if ( bExp == 0x7FF ) {                   
2192             if ( bSig ) return propagateFloat    
2193             roundData->exception |= float_fla    
2194             return float64_default_nan;          
2195         }                                        
2196         return packFloat64( zSign, 0x7FF, 0 )    
2197     }                                            
2198     if ( bExp == 0x7FF ) {                       
2199         if ( bSig ) return propagateFloat64Na    
2200         return packFloat64( zSign, 0, 0 );       
2201     }                                            
2202     if ( bExp == 0 ) {                           
2203         if ( bSig == 0 ) {                       
2204             if ( ( aExp | aSig ) == 0 ) {        
2205                 roundData->exception |= float    
2206                 return float64_default_nan;      
2207             }                                    
2208             roundData->exception |= float_fla    
2209             return packFloat64( zSign, 0x7FF,    
2210         }                                        
2211         normalizeFloat64Subnormal( bSig, &bEx    
2212     }                                            
2213     if ( aExp == 0 ) {                           
2214         if ( aSig == 0 ) return packFloat64(     
2215         normalizeFloat64Subnormal( aSig, &aEx    
2216     }                                            
2217     zExp = aExp - bExp + 0x3FD;                  
2218     aSig = ( aSig | LIT64( 0x0010000000000000    
2219     bSig = ( bSig | LIT64( 0x0010000000000000    
2220     if ( bSig <= ( aSig + aSig ) ) {             
2221         aSig >>= 1;                              
2222         ++zExp;                                  
2223     }                                            
2224     zSig = estimateDiv128To64( aSig, 0, bSig     
2225     if ( ( zSig & 0x1FF ) <= 2 ) {               
2226         mul64To128( bSig, zSig, &term0, &term    
2227         sub128( aSig, 0, term0, term1, &rem0,    
2228         while ( (sbits64) rem0 < 0 ) {           
2229             --zSig;                              
2230             add128( rem0, rem1, 0, bSig, &rem    
2231         }                                        
2232         zSig |= ( rem1 != 0 );                   
2233     }                                            
2234     return roundAndPackFloat64( roundData, zS    
2235                                                  
2236 }                                                
2237                                                  
2238 /*                                               
2239 ---------------------------------------------    
2240 Returns the remainder of the double-precision    
2241 with respect to the corresponding value `b'.     
2242 according to the IEC/IEEE Standard for Binary    
2243 ---------------------------------------------    
2244 */                                               
2245 float64 float64_rem( struct roundingData *rou    
2246 {                                                
2247     flag aSign, bSign, zSign;                    
2248     int16 aExp, bExp, expDiff;                   
2249     bits64 aSig, bSig;                           
2250     bits64 q, alternateASig;                     
2251     sbits64 sigMean;                             
2252                                                  
2253     aSig = extractFloat64Frac( a );              
2254     aExp = extractFloat64Exp( a );               
2255     aSign = extractFloat64Sign( a );             
2256     bSig = extractFloat64Frac( b );              
2257     bExp = extractFloat64Exp( b );               
2258     bSign = extractFloat64Sign( b );             
2259     if ( aExp == 0x7FF ) {                       
2260         if ( aSig || ( ( bExp == 0x7FF ) && b    
2261             return propagateFloat64NaN( a, b     
2262         }                                        
2263         roundData->exception |= float_flag_in    
2264         return float64_default_nan;              
2265     }                                            
2266     if ( bExp == 0x7FF ) {                       
2267         if ( bSig ) return propagateFloat64Na    
2268         return a;                                
2269     }                                            
2270     if ( bExp == 0 ) {                           
2271         if ( bSig == 0 ) {                       
2272             roundData->exception |= float_fla    
2273             return float64_default_nan;          
2274         }                                        
2275         normalizeFloat64Subnormal( bSig, &bEx    
2276     }                                            
2277     if ( aExp == 0 ) {                           
2278         if ( aSig == 0 ) return a;               
2279         normalizeFloat64Subnormal( aSig, &aEx    
2280     }                                            
2281     expDiff = aExp - bExp;                       
2282     aSig = ( aSig | LIT64( 0x0010000000000000    
2283     bSig = ( bSig | LIT64( 0x0010000000000000    
2284     if ( expDiff < 0 ) {                         
2285         if ( expDiff < -1 ) return a;            
2286         aSig >>= 1;                              
2287     }                                            
2288     q = ( bSig <= aSig );                        
2289     if ( q ) aSig -= bSig;                       
2290     expDiff -= 64;                               
2291     while ( 0 < expDiff ) {                      
2292         q = estimateDiv128To64( aSig, 0, bSig    
2293         q = ( 2 < q ) ? q - 2 : 0;               
2294         aSig = - ( ( bSig>>2 ) * q );            
2295         expDiff -= 62;                           
2296     }                                            
2297     expDiff += 64;                               
2298     if ( 0 < expDiff ) {                         
2299         q = estimateDiv128To64( aSig, 0, bSig    
2300         q = ( 2 < q ) ? q - 2 : 0;               
2301         q >>= 64 - expDiff;                      
2302         bSig >>= 2;                              
2303         aSig = ( ( aSig>>1 )<<( expDiff - 1 )    
2304     }                                            
2305     else {                                       
2306         aSig >>= 2;                              
2307         bSig >>= 2;                              
2308     }                                            
2309     do {                                         
2310         alternateASig = aSig;                    
2311         ++q;                                     
2312         aSig -= bSig;                            
2313     } while ( 0 <= (sbits64) aSig );             
2314     sigMean = aSig + alternateASig;              
2315     if ( ( sigMean < 0 ) || ( ( sigMean == 0     
2316         aSig = alternateASig;                    
2317     }                                            
2318     zSign = ( (sbits64) aSig < 0 );              
2319     if ( zSign ) aSig = - aSig;                  
2320     return normalizeRoundAndPackFloat64( roun    
2321                                                  
2322 }                                                
2323                                                  
2324 /*                                               
2325 ---------------------------------------------    
2326 Returns the square root of the double-precisi    
2327 The operation is performed according to the I    
2328 Floating-point Arithmetic.                       
2329 ---------------------------------------------    
2330 */                                               
2331 float64 float64_sqrt( struct roundingData *ro    
2332 {                                                
2333     flag aSign;                                  
2334     int16 aExp, zExp;                            
2335     bits64 aSig, zSig;                           
2336     bits64 rem0, rem1, term0, term1; //, shif    
2337     //float64 z;                                 
2338                                                  
2339     aSig = extractFloat64Frac( a );              
2340     aExp = extractFloat64Exp( a );               
2341     aSign = extractFloat64Sign( a );             
2342     if ( aExp == 0x7FF ) {                       
2343         if ( aSig ) return propagateFloat64Na    
2344         if ( ! aSign ) return a;                 
2345         roundData->exception |= float_flag_in    
2346         return float64_default_nan;              
2347     }                                            
2348     if ( aSign ) {                               
2349         if ( ( aExp | aSig ) == 0 ) return a;    
2350         roundData->exception |= float_flag_in    
2351         return float64_default_nan;              
2352     }                                            
2353     if ( aExp == 0 ) {                           
2354         if ( aSig == 0 ) return 0;               
2355         normalizeFloat64Subnormal( aSig, &aEx    
2356     }                                            
2357     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;      
2358     aSig |= LIT64( 0x0010000000000000 );         
2359     zSig = estimateSqrt32( aExp, aSig>>21 );     
2360     zSig <<= 31;                                 
2361     aSig <<= 9 - ( aExp & 1 );                   
2362     zSig = estimateDiv128To64( aSig, 0, zSig     
2363     if ( ( zSig & 0x3FF ) <= 5 ) {               
2364         if ( zSig < 2 ) {                        
2365             zSig = LIT64( 0xFFFFFFFFFFFFFFFF     
2366         }                                        
2367         else {                                   
2368             aSig <<= 2;                          
2369             mul64To128( zSig, zSig, &term0, &    
2370             sub128( aSig, 0, term0, term1, &r    
2371             while ( (sbits64) rem0 < 0 ) {       
2372                 --zSig;                          
2373                 shortShift128Left( 0, zSig, 1    
2374                 term1 |= 1;                      
2375                 add128( rem0, rem1, term0, te    
2376             }                                    
2377             zSig |= ( ( rem0 | rem1 ) != 0 );    
2378         }                                        
2379     }                                            
2380     shift64RightJamming( zSig, 1, &zSig );       
2381     return roundAndPackFloat64( roundData, 0,    
2382                                                  
2383 }                                                
2384                                                  
2385 /*                                               
2386 ---------------------------------------------    
2387 Returns 1 if the double-precision floating-po    
2388 corresponding value `b', and 0 otherwise.  Th    
2389 according to the IEC/IEEE Standard for Binary    
2390 ---------------------------------------------    
2391 */                                               
2392 flag float64_eq( float64 a, float64 b )          
2393 {                                                
2394                                                  
2395     if (    ( ( extractFloat64Exp( a ) == 0x7    
2396          || ( ( extractFloat64Exp( b ) == 0x7    
2397        ) {                                       
2398         if ( float64_is_signaling_nan( a ) ||    
2399             float_raise( float_flag_invalid )    
2400         }                                        
2401         return 0;                                
2402     }                                            
2403     return ( a == b ) || ( (bits64) ( ( a | b    
2404                                                  
2405 }                                                
2406                                                  
2407 /*                                               
2408 ---------------------------------------------    
2409 Returns 1 if the double-precision floating-po    
2410 equal to the corresponding value `b', and 0 o    
2411 performed according to the IEC/IEEE Standard     
2412 Arithmetic.                                      
2413 ---------------------------------------------    
2414 */                                               
2415 flag float64_le( float64 a, float64 b )          
2416 {                                                
2417     flag aSign, bSign;                           
2418                                                  
2419     if (    ( ( extractFloat64Exp( a ) == 0x7    
2420          || ( ( extractFloat64Exp( b ) == 0x7    
2421        ) {                                       
2422         float_raise( float_flag_invalid );       
2423         return 0;                                
2424     }                                            
2425     aSign = extractFloat64Sign( a );             
2426     bSign = extractFloat64Sign( b );             
2427     if ( aSign != bSign ) return aSign || ( (    
2428     return ( a == b ) || ( aSign ^ ( a < b )     
2429                                                  
2430 }                                                
2431                                                  
2432 /*                                               
2433 ---------------------------------------------    
2434 Returns 1 if the double-precision floating-po    
2435 the corresponding value `b', and 0 otherwise.    
2436 according to the IEC/IEEE Standard for Binary    
2437 ---------------------------------------------    
2438 */                                               
2439 flag float64_lt( float64 a, float64 b )          
2440 {                                                
2441     flag aSign, bSign;                           
2442                                                  
2443     if (    ( ( extractFloat64Exp( a ) == 0x7    
2444          || ( ( extractFloat64Exp( b ) == 0x7    
2445        ) {                                       
2446         float_raise( float_flag_invalid );       
2447         return 0;                                
2448     }                                            
2449     aSign = extractFloat64Sign( a );             
2450     bSign = extractFloat64Sign( b );             
2451     if ( aSign != bSign ) return aSign && ( (    
2452     return ( a != b ) && ( aSign ^ ( a < b )     
2453                                                  
2454 }                                                
2455                                                  
2456 /*                                               
2457 ---------------------------------------------    
2458 Returns 1 if the double-precision floating-po    
2459 corresponding value `b', and 0 otherwise.  Th    
2460 if either operand is a NaN.  Otherwise, the c    
2461 according to the IEC/IEEE Standard for Binary    
2462 ---------------------------------------------    
2463 */                                               
2464 flag float64_eq_signaling( float64 a, float64    
2465 {                                                
2466                                                  
2467     if (    ( ( extractFloat64Exp( a ) == 0x7    
2468          || ( ( extractFloat64Exp( b ) == 0x7    
2469        ) {                                       
2470         float_raise( float_flag_invalid );       
2471         return 0;                                
2472     }                                            
2473     return ( a == b ) || ( (bits64) ( ( a | b    
2474                                                  
2475 }                                                
2476                                                  
2477 /*                                               
2478 ---------------------------------------------    
2479 Returns 1 if the double-precision floating-po    
2480 equal to the corresponding value `b', and 0 o    
2481 cause an exception.  Otherwise, the compariso    
2482 IEC/IEEE Standard for Binary Floating-point A    
2483 ---------------------------------------------    
2484 */                                               
2485 flag float64_le_quiet( float64 a, float64 b )    
2486 {                                                
2487     flag aSign, bSign;                           
2488     //int16 aExp, bExp;                          
2489                                                  
2490     if (    ( ( extractFloat64Exp( a ) == 0x7    
2491          || ( ( extractFloat64Exp( b ) == 0x7    
2492        ) {                                       
2493         /* Do nothing, even if NaN as we're q    
2494         return 0;                                
2495     }                                            
2496     aSign = extractFloat64Sign( a );             
2497     bSign = extractFloat64Sign( b );             
2498     if ( aSign != bSign ) return aSign || ( (    
2499     return ( a == b ) || ( aSign ^ ( a < b )     
2500                                                  
2501 }                                                
2502                                                  
2503 /*                                               
2504 ---------------------------------------------    
2505 Returns 1 if the double-precision floating-po    
2506 the corresponding value `b', and 0 otherwise.    
2507 exception.  Otherwise, the comparison is perf    
2508 Standard for Binary Floating-point Arithmetic    
2509 ---------------------------------------------    
2510 */                                               
2511 flag float64_lt_quiet( float64 a, float64 b )    
2512 {                                                
2513     flag aSign, bSign;                           
2514                                                  
2515     if (    ( ( extractFloat64Exp( a ) == 0x7    
2516          || ( ( extractFloat64Exp( b ) == 0x7    
2517        ) {                                       
2518         /* Do nothing, even if NaN as we're q    
2519         return 0;                                
2520     }                                            
2521     aSign = extractFloat64Sign( a );             
2522     bSign = extractFloat64Sign( b );             
2523     if ( aSign != bSign ) return aSign && ( (    
2524     return ( a != b ) && ( aSign ^ ( a < b )     
2525                                                  
2526 }                                                
2527                                                  
2528 #ifdef FLOATX80                                  
2529                                                  
2530 /*                                               
2531 ---------------------------------------------    
2532 Returns the result of converting the extended    
2533 point value `a' to the 32-bit two's complemen    
2534 conversion is performed according to the IEC/    
2535 Floating-point Arithmetic---which means in pa    
2536 is rounded according to the current rounding     
2537 largest positive integer is returned.  Otherw    
2538 overflows, the largest integer with the same     
2539 ---------------------------------------------    
2540 */                                               
2541 int32 floatx80_to_int32( struct roundingData     
2542 {                                                
2543     flag aSign;                                  
2544     int32 aExp, shiftCount;                      
2545     bits64 aSig;                                 
2546                                                  
2547     aSig = extractFloatx80Frac( a );             
2548     aExp = extractFloatx80Exp( a );              
2549     aSign = extractFloatx80Sign( a );            
2550     if ( ( aExp == 0x7FFF ) && (bits64) ( aSi    
2551     shiftCount = 0x4037 - aExp;                  
2552     if ( shiftCount <= 0 ) shiftCount = 1;       
2553     shift64RightJamming( aSig, shiftCount, &a    
2554     return roundAndPackInt32( roundData, aSig    
2555                                                  
2556 }                                                
2557                                                  
2558 /*                                               
2559 ---------------------------------------------    
2560 Returns the result of converting the extended    
2561 point value `a' to the 32-bit two's complemen    
2562 conversion is performed according to the IEC/    
2563 Floating-point Arithmetic, except that the co    
2564 toward zero.  If `a' is a NaN, the largest po    
2565 Otherwise, if the conversion overflows, the l    
2566 sign as `a' is returned.                         
2567 ---------------------------------------------    
2568 */                                               
2569 int32 floatx80_to_int32_round_to_zero( floatx    
2570 {                                                
2571     flag aSign;                                  
2572     int32 aExp, shiftCount;                      
2573     bits64 aSig, savedASig;                      
2574     int32 z;                                     
2575                                                  
2576     aSig = extractFloatx80Frac( a );             
2577     aExp = extractFloatx80Exp( a );              
2578     aSign = extractFloatx80Sign( a );            
2579     shiftCount = 0x403E - aExp;                  
2580     if ( shiftCount < 32 ) {                     
2581         if ( ( aExp == 0x7FFF ) && (bits64) (    
2582         goto invalid;                            
2583     }                                            
2584     else if ( 63 < shiftCount ) {                
2585         if ( aExp || aSig ) float_raise( floa    
2586         return 0;                                
2587     }                                            
2588     savedASig = aSig;                            
2589     aSig >>= shiftCount;                         
2590     z = aSig;                                    
2591     if ( aSign ) z = - z;                        
2592     if ( ( z < 0 ) ^ aSign ) {                   
2593  invalid:                                        
2594         float_raise( float_flag_invalid );       
2595         return aSign ? 0x80000000 : 0x7FFFFFF    
2596     }                                            
2597     if ( ( aSig<<shiftCount ) != savedASig )     
2598         float_raise( float_flag_inexact );       
2599     }                                            
2600     return z;                                    
2601                                                  
2602 }                                                
2603                                                  
2604 /*                                               
2605 ---------------------------------------------    
2606 Returns the result of converting the extended    
2607 point value `a' to the single-precision float    
2608 conversion is performed according to the IEC/    
2609 Floating-point Arithmetic.                       
2610 ---------------------------------------------    
2611 */                                               
2612 float32 floatx80_to_float32( struct roundingD    
2613 {                                                
2614     flag aSign;                                  
2615     int32 aExp;                                  
2616     bits64 aSig;                                 
2617                                                  
2618     aSig = extractFloatx80Frac( a );             
2619     aExp = extractFloatx80Exp( a );              
2620     aSign = extractFloatx80Sign( a );            
2621     if ( aExp == 0x7FFF ) {                      
2622         if ( (bits64) ( aSig<<1 ) ) {            
2623             return commonNaNToFloat32( floatx    
2624         }                                        
2625         return packFloat32( aSign, 0xFF, 0 );    
2626     }                                            
2627     shift64RightJamming( aSig, 33, &aSig );      
2628     if ( aExp || aSig ) aExp -= 0x3F81;          
2629     return roundAndPackFloat32( roundData, aS    
2630                                                  
2631 }                                                
2632                                                  
2633 /*                                               
2634 ---------------------------------------------    
2635 Returns the result of converting the extended    
2636 point value `a' to the double-precision float    
2637 conversion is performed according to the IEC/    
2638 Floating-point Arithmetic.                       
2639 ---------------------------------------------    
2640 */                                               
2641 float64 floatx80_to_float64( struct roundingD    
2642 {                                                
2643     flag aSign;                                  
2644     int32 aExp;                                  
2645     bits64 aSig, zSig;                           
2646                                                  
2647     aSig = extractFloatx80Frac( a );             
2648     aExp = extractFloatx80Exp( a );              
2649     aSign = extractFloatx80Sign( a );            
2650     if ( aExp == 0x7FFF ) {                      
2651         if ( (bits64) ( aSig<<1 ) ) {            
2652             return commonNaNToFloat64( floatx    
2653         }                                        
2654         return packFloat64( aSign, 0x7FF, 0 )    
2655     }                                            
2656     shift64RightJamming( aSig, 1, &zSig );       
2657     if ( aExp || aSig ) aExp -= 0x3C01;          
2658     return roundAndPackFloat64( roundData, aS    
2659                                                  
2660 }                                                
2661                                                  
2662 /*                                               
2663 ---------------------------------------------    
2664 Rounds the extended double-precision floating    
2665 and returns the result as an extended quadrup    
2666 value.  The operation is performed according     
2667 Binary Floating-point Arithmetic.                
2668 ---------------------------------------------    
2669 */                                               
2670 floatx80 floatx80_round_to_int( struct roundi    
2671 {                                                
2672     flag aSign;                                  
2673     int32 aExp;                                  
2674     bits64 lastBitMask, roundBitsMask;           
2675     int8 roundingMode;                           
2676     floatx80 z;                                  
2677                                                  
2678     aExp = extractFloatx80Exp( a );              
2679     if ( 0x403E <= aExp ) {                      
2680         if ( ( aExp == 0x7FFF ) && (bits64) (    
2681             return propagateFloatx80NaN( a, a    
2682         }                                        
2683         return a;                                
2684     }                                            
2685     if ( aExp <= 0x3FFE ) {                      
2686         if (    ( aExp == 0 )                    
2687              && ( (bits64) ( extractFloatx80F    
2688             return a;                            
2689         }                                        
2690         roundData->exception |= float_flag_in    
2691         aSign = extractFloatx80Sign( a );        
2692         switch ( roundData->mode ) {             
2693          case float_round_nearest_even:          
2694             if ( ( aExp == 0x3FFE ) && (bits6    
2695                ) {                               
2696                 return                           
2697                     packFloatx80( aSign, 0x3F    
2698             }                                    
2699             break;                               
2700          case float_round_down:                  
2701             return                               
2702                   aSign ?                        
2703                       packFloatx80( 1, 0x3FFF    
2704                 : packFloatx80( 0, 0, 0 );       
2705          case float_round_up:                    
2706             return                               
2707                   aSign ? packFloatx80( 1, 0,    
2708                 : packFloatx80( 0, 0x3FFF, LI    
2709         }                                        
2710         return packFloatx80( aSign, 0, 0 );      
2711     }                                            
2712     lastBitMask = 1;                             
2713     lastBitMask <<= 0x403E - aExp;               
2714     roundBitsMask = lastBitMask - 1;             
2715     z = a;                                       
2716     roundingMode = roundData->mode;              
2717     if ( roundingMode == float_round_nearest_    
2718         z.low += lastBitMask>>1;                 
2719         if ( ( z.low & roundBitsMask ) == 0 )    
2720     }                                            
2721     else if ( roundingMode != float_round_to_    
2722         if ( extractFloatx80Sign( z ) ^ ( rou    
2723             z.low += roundBitsMask;              
2724         }                                        
2725     }                                            
2726     z.low &= ~ roundBitsMask;                    
2727     if ( z.low == 0 ) {                          
2728         ++z.high;                                
2729         z.low = LIT64( 0x8000000000000000 );     
2730     }                                            
2731     if ( z.low != a.low ) roundData->exceptio    
2732     return z;                                    
2733                                                  
2734 }                                                
2735                                                  
2736 /*                                               
2737 ---------------------------------------------    
2738 Returns the result of adding the absolute val    
2739 precision floating-point values `a' and `b'.     
2740 negated before being returned.  `zSign' is ig    
2741 The addition is performed according to the IE    
2742 Floating-point Arithmetic.                       
2743 ---------------------------------------------    
2744 */                                               
2745 static floatx80 addFloatx80Sigs( struct round    
2746 {                                                
2747     int32 aExp, bExp, zExp;                      
2748     bits64 aSig, bSig, zSig0, zSig1;             
2749     int32 expDiff;                               
2750                                                  
2751     aSig = extractFloatx80Frac( a );             
2752     aExp = extractFloatx80Exp( a );              
2753     bSig = extractFloatx80Frac( b );             
2754     bExp = extractFloatx80Exp( b );              
2755     expDiff = aExp - bExp;                       
2756     if ( 0 < expDiff ) {                         
2757         if ( aExp == 0x7FFF ) {                  
2758             if ( (bits64) ( aSig<<1 ) ) retur    
2759             return a;                            
2760         }                                        
2761         if ( bExp == 0 ) --expDiff;              
2762         shift64ExtraRightJamming( bSig, 0, ex    
2763         zExp = aExp;                             
2764     }                                            
2765     else if ( expDiff < 0 ) {                    
2766         if ( bExp == 0x7FFF ) {                  
2767             if ( (bits64) ( bSig<<1 ) ) retur    
2768             return packFloatx80( zSign, 0x7FF    
2769         }                                        
2770         if ( aExp == 0 ) ++expDiff;              
2771         shift64ExtraRightJamming( aSig, 0, -     
2772         zExp = bExp;                             
2773     }                                            
2774     else {                                       
2775         if ( aExp == 0x7FFF ) {                  
2776             if ( (bits64) ( ( aSig | bSig )<<    
2777                 return propagateFloatx80NaN(     
2778             }                                    
2779             return a;                            
2780         }                                        
2781         zSig1 = 0;                               
2782         zSig0 = aSig + bSig;                     
2783         if ( aExp == 0 ) {                       
2784             normalizeFloatx80Subnormal( zSig0    
2785             goto roundAndPack;                   
2786         }                                        
2787         zExp = aExp;                             
2788         goto shiftRight1;                        
2789     }                                            
2790                                                  
2791     zSig0 = aSig + bSig;                         
2792                                                  
2793     if ( (sbits64) zSig0 < 0 ) goto roundAndP    
2794  shiftRight1:                                    
2795     shift64ExtraRightJamming( zSig0, zSig1, 1    
2796     zSig0 |= LIT64( 0x8000000000000000 );        
2797     ++zExp;                                      
2798  roundAndPack:                                   
2799     return                                       
2800         roundAndPackFloatx80(                    
2801             roundData, zSign, zExp, zSig0, zS    
2802                                                  
2803 }                                                
2804                                                  
2805 /*                                               
2806 ---------------------------------------------    
2807 Returns the result of subtracting the absolut    
2808 double-precision floating-point values `a' an    
2809 the difference is negated before being return    
2810 result is a NaN.  The subtraction is performe    
2811 Standard for Binary Floating-point Arithmetic    
2812 ---------------------------------------------    
2813 */                                               
2814 static floatx80 subFloatx80Sigs( struct round    
2815 {                                                
2816     int32 aExp, bExp, zExp;                      
2817     bits64 aSig, bSig, zSig0, zSig1;             
2818     int32 expDiff;                               
2819     floatx80 z;                                  
2820                                                  
2821     aSig = extractFloatx80Frac( a );             
2822     aExp = extractFloatx80Exp( a );              
2823     bSig = extractFloatx80Frac( b );             
2824     bExp = extractFloatx80Exp( b );              
2825     expDiff = aExp - bExp;                       
2826     if ( 0 < expDiff ) goto aExpBigger;          
2827     if ( expDiff < 0 ) goto bExpBigger;          
2828     if ( aExp == 0x7FFF ) {                      
2829         if ( (bits64) ( ( aSig | bSig )<<1 )     
2830             return propagateFloatx80NaN( a, b    
2831         }                                        
2832         roundData->exception |= float_flag_in    
2833         z.low = floatx80_default_nan_low;        
2834         z.high = floatx80_default_nan_high;      
2835         z.__padding = 0;                         
2836         return z;                                
2837     }                                            
2838     if ( aExp == 0 ) {                           
2839         aExp = 1;                                
2840         bExp = 1;                                
2841     }                                            
2842     zSig1 = 0;                                   
2843     if ( bSig < aSig ) goto aBigger;             
2844     if ( aSig < bSig ) goto bBigger;             
2845     return packFloatx80( roundData->mode == f    
2846  bExpBigger:                                     
2847     if ( bExp == 0x7FFF ) {                      
2848         if ( (bits64) ( bSig<<1 ) ) return pr    
2849         return packFloatx80( zSign ^ 1, 0x7FF    
2850     }                                            
2851     if ( aExp == 0 ) ++expDiff;                  
2852     shift128RightJamming( aSig, 0, - expDiff,    
2853  bBigger:                                        
2854     sub128( bSig, 0, aSig, zSig1, &zSig0, &zS    
2855     zExp = bExp;                                 
2856     zSign ^= 1;                                  
2857     goto normalizeRoundAndPack;                  
2858  aExpBigger:                                     
2859     if ( aExp == 0x7FFF ) {                      
2860         if ( (bits64) ( aSig<<1 ) ) return pr    
2861         return a;                                
2862     }                                            
2863     if ( bExp == 0 ) --expDiff;                  
2864     shift128RightJamming( bSig, 0, expDiff, &    
2865  aBigger:                                        
2866     sub128( aSig, 0, bSig, zSig1, &zSig0, &zS    
2867     zExp = aExp;                                 
2868  normalizeRoundAndPack:                          
2869     return                                       
2870         normalizeRoundAndPackFloatx80(           
2871             roundData, zSign, zExp, zSig0, zS    
2872                                                  
2873 }                                                
2874                                                  
2875 /*                                               
2876 ---------------------------------------------    
2877 Returns the result of adding the extended dou    
2878 values `a' and `b'.  The operation is perform    
2879 Standard for Binary Floating-point Arithmetic    
2880 ---------------------------------------------    
2881 */                                               
2882 floatx80 floatx80_add( struct roundingData *r    
2883 {                                                
2884     flag aSign, bSign;                           
2885                                                  
2886     aSign = extractFloatx80Sign( a );            
2887     bSign = extractFloatx80Sign( b );            
2888     if ( aSign == bSign ) {                      
2889         return addFloatx80Sigs( roundData, a,    
2890     }                                            
2891     else {                                       
2892         return subFloatx80Sigs( roundData, a,    
2893     }                                            
2894                                                  
2895 }                                                
2896                                                  
2897 /*                                               
2898 ---------------------------------------------    
2899 Returns the result of subtracting the extende    
2900 point values `a' and `b'.  The operation is p    
2901 IEC/IEEE Standard for Binary Floating-point A    
2902 ---------------------------------------------    
2903 */                                               
2904 floatx80 floatx80_sub( struct roundingData *r    
2905 {                                                
2906     flag aSign, bSign;                           
2907                                                  
2908     aSign = extractFloatx80Sign( a );            
2909     bSign = extractFloatx80Sign( b );            
2910     if ( aSign == bSign ) {                      
2911         return subFloatx80Sigs( roundData, a,    
2912     }                                            
2913     else {                                       
2914         return addFloatx80Sigs( roundData, a,    
2915     }                                            
2916                                                  
2917 }                                                
2918                                                  
2919 /*                                               
2920 ---------------------------------------------    
2921 Returns the result of multiplying the extende    
2922 point values `a' and `b'.  The operation is p    
2923 IEC/IEEE Standard for Binary Floating-point A    
2924 ---------------------------------------------    
2925 */                                               
2926 floatx80 floatx80_mul( struct roundingData *r    
2927 {                                                
2928     flag aSign, bSign, zSign;                    
2929     int32 aExp, bExp, zExp;                      
2930     bits64 aSig, bSig, zSig0, zSig1;             
2931     floatx80 z;                                  
2932                                                  
2933     aSig = extractFloatx80Frac( a );             
2934     aExp = extractFloatx80Exp( a );              
2935     aSign = extractFloatx80Sign( a );            
2936     bSig = extractFloatx80Frac( b );             
2937     bExp = extractFloatx80Exp( b );              
2938     bSign = extractFloatx80Sign( b );            
2939     zSign = aSign ^ bSign;                       
2940     if ( aExp == 0x7FFF ) {                      
2941         if (    (bits64) ( aSig<<1 )             
2942              || ( ( bExp == 0x7FFF ) && (bits    
2943             return propagateFloatx80NaN( a, b    
2944         }                                        
2945         if ( ( bExp | bSig ) == 0 ) goto inva    
2946         return packFloatx80( zSign, 0x7FFF, L    
2947     }                                            
2948     if ( bExp == 0x7FFF ) {                      
2949         if ( (bits64) ( bSig<<1 ) ) return pr    
2950         if ( ( aExp | aSig ) == 0 ) {            
2951  invalid:                                        
2952             roundData->exception |= float_fla    
2953             z.low = floatx80_default_nan_low;    
2954             z.high = floatx80_default_nan_hig    
2955             z.__padding = 0;                     
2956             return z;                            
2957         }                                        
2958         return packFloatx80( zSign, 0x7FFF, L    
2959     }                                            
2960     if ( aExp == 0 ) {                           
2961         if ( aSig == 0 ) return packFloatx80(    
2962         normalizeFloatx80Subnormal( aSig, &aE    
2963     }                                            
2964     if ( bExp == 0 ) {                           
2965         if ( bSig == 0 ) return packFloatx80(    
2966         normalizeFloatx80Subnormal( bSig, &bE    
2967     }                                            
2968     zExp = aExp + bExp - 0x3FFE;                 
2969     mul64To128( aSig, bSig, &zSig0, &zSig1 );    
2970     if ( 0 < (sbits64) zSig0 ) {                 
2971         shortShift128Left( zSig0, zSig1, 1, &    
2972         --zExp;                                  
2973     }                                            
2974     return                                       
2975         roundAndPackFloatx80(                    
2976             roundData, zSign, zExp, zSig0, zS    
2977                                                  
2978 }                                                
2979                                                  
2980 /*                                               
2981 ---------------------------------------------    
2982 Returns the result of dividing the extended d    
2983 value `a' by the corresponding value `b'.  Th    
2984 according to the IEC/IEEE Standard for Binary    
2985 ---------------------------------------------    
2986 */                                               
2987 floatx80 floatx80_div( struct roundingData *r    
2988 {                                                
2989     flag aSign, bSign, zSign;                    
2990     int32 aExp, bExp, zExp;                      
2991     bits64 aSig, bSig, zSig0, zSig1;             
2992     bits64 rem0, rem1, rem2, term0, term1, te    
2993     floatx80 z;                                  
2994                                                  
2995     aSig = extractFloatx80Frac( a );             
2996     aExp = extractFloatx80Exp( a );              
2997     aSign = extractFloatx80Sign( a );            
2998     bSig = extractFloatx80Frac( b );             
2999     bExp = extractFloatx80Exp( b );              
3000     bSign = extractFloatx80Sign( b );            
3001     zSign = aSign ^ bSign;                       
3002     if ( aExp == 0x7FFF ) {                      
3003         if ( (bits64) ( aSig<<1 ) ) return pr    
3004         if ( bExp == 0x7FFF ) {                  
3005             if ( (bits64) ( bSig<<1 ) ) retur    
3006             goto invalid;                        
3007         }                                        
3008         return packFloatx80( zSign, 0x7FFF, L    
3009     }                                            
3010     if ( bExp == 0x7FFF ) {                      
3011         if ( (bits64) ( bSig<<1 ) ) return pr    
3012         return packFloatx80( zSign, 0, 0 );      
3013     }                                            
3014     if ( bExp == 0 ) {                           
3015         if ( bSig == 0 ) {                       
3016             if ( ( aExp | aSig ) == 0 ) {        
3017  invalid:                                        
3018                 roundData->exception |= float    
3019                 z.low = floatx80_default_nan_    
3020                 z.high = floatx80_default_nan    
3021                 z.__padding = 0;                 
3022                 return z;                        
3023             }                                    
3024             roundData->exception |= float_fla    
3025             return packFloatx80( zSign, 0x7FF    
3026         }                                        
3027         normalizeFloatx80Subnormal( bSig, &bE    
3028     }                                            
3029     if ( aExp == 0 ) {                           
3030         if ( aSig == 0 ) return packFloatx80(    
3031         normalizeFloatx80Subnormal( aSig, &aE    
3032     }                                            
3033     zExp = aExp - bExp + 0x3FFE;                 
3034     rem1 = 0;                                    
3035     if ( bSig <= aSig ) {                        
3036         shift128Right( aSig, 0, 1, &aSig, &re    
3037         ++zExp;                                  
3038     }                                            
3039     zSig0 = estimateDiv128To64( aSig, rem1, b    
3040     mul64To128( bSig, zSig0, &term0, &term1 )    
3041     sub128( aSig, rem1, term0, term1, &rem0,     
3042     while ( (sbits64) rem0 < 0 ) {               
3043         --zSig0;                                 
3044         add128( rem0, rem1, 0, bSig, &rem0, &    
3045     }                                            
3046     zSig1 = estimateDiv128To64( rem1, 0, bSig    
3047     if ( (bits64) ( zSig1<<1 ) <= 8 ) {          
3048         mul64To128( bSig, zSig1, &term1, &ter    
3049         sub128( rem1, 0, term1, term2, &rem1,    
3050         while ( (sbits64) rem1 < 0 ) {           
3051             --zSig1;                             
3052             add128( rem1, rem2, 0, bSig, &rem    
3053         }                                        
3054         zSig1 |= ( ( rem1 | rem2 ) != 0 );       
3055     }                                            
3056     return                                       
3057         roundAndPackFloatx80(                    
3058             roundData, zSign, zExp, zSig0, zS    
3059                                                  
3060 }                                                
3061                                                  
3062 /*                                               
3063 ---------------------------------------------    
3064 Returns the remainder of the extended double-    
3065 `a' with respect to the corresponding value `    
3066 according to the IEC/IEEE Standard for Binary    
3067 ---------------------------------------------    
3068 */                                               
3069 floatx80 floatx80_rem( struct roundingData *r    
3070 {                                                
3071     flag aSign, bSign, zSign;                    
3072     int32 aExp, bExp, expDiff;                   
3073     bits64 aSig0, aSig1, bSig;                   
3074     bits64 q, term0, term1, alternateASig0, a    
3075     floatx80 z;                                  
3076                                                  
3077     aSig0 = extractFloatx80Frac( a );            
3078     aExp = extractFloatx80Exp( a );              
3079     aSign = extractFloatx80Sign( a );            
3080     bSig = extractFloatx80Frac( b );             
3081     bExp = extractFloatx80Exp( b );              
3082     bSign = extractFloatx80Sign( b );            
3083     if ( aExp == 0x7FFF ) {                      
3084         if (    (bits64) ( aSig0<<1 )            
3085              || ( ( bExp == 0x7FFF ) && (bits    
3086             return propagateFloatx80NaN( a, b    
3087         }                                        
3088         goto invalid;                            
3089     }                                            
3090     if ( bExp == 0x7FFF ) {                      
3091         if ( (bits64) ( bSig<<1 ) ) return pr    
3092         return a;                                
3093     }                                            
3094     if ( bExp == 0 ) {                           
3095         if ( bSig == 0 ) {                       
3096  invalid:                                        
3097             roundData->exception |= float_fla    
3098             z.low = floatx80_default_nan_low;    
3099             z.high = floatx80_default_nan_hig    
3100             z.__padding = 0;                     
3101             return z;                            
3102         }                                        
3103         normalizeFloatx80Subnormal( bSig, &bE    
3104     }                                            
3105     if ( aExp == 0 ) {                           
3106         if ( (bits64) ( aSig0<<1 ) == 0 ) ret    
3107         normalizeFloatx80Subnormal( aSig0, &a    
3108     }                                            
3109     bSig |= LIT64( 0x8000000000000000 );         
3110     zSign = aSign;                               
3111     expDiff = aExp - bExp;                       
3112     aSig1 = 0;                                   
3113     if ( expDiff < 0 ) {                         
3114         if ( expDiff < -1 ) return a;            
3115         shift128Right( aSig0, 0, 1, &aSig0, &    
3116         expDiff = 0;                             
3117     }                                            
3118     q = ( bSig <= aSig0 );                       
3119     if ( q ) aSig0 -= bSig;                      
3120     expDiff -= 64;                               
3121     while ( 0 < expDiff ) {                      
3122         q = estimateDiv128To64( aSig0, aSig1,    
3123         q = ( 2 < q ) ? q - 2 : 0;               
3124         mul64To128( bSig, q, &term0, &term1 )    
3125         sub128( aSig0, aSig1, term0, term1, &    
3126         shortShift128Left( aSig0, aSig1, 62,     
3127         expDiff -= 62;                           
3128     }                                            
3129     expDiff += 64;                               
3130     if ( 0 < expDiff ) {                         
3131         q = estimateDiv128To64( aSig0, aSig1,    
3132         q = ( 2 < q ) ? q - 2 : 0;               
3133         q >>= 64 - expDiff;                      
3134         mul64To128( bSig, q<<( 64 - expDiff )    
3135         sub128( aSig0, aSig1, term0, term1, &    
3136         shortShift128Left( 0, bSig, 64 - expD    
3137         while ( le128( term0, term1, aSig0, a    
3138             ++q;                                 
3139             sub128( aSig0, aSig1, term0, term    
3140         }                                        
3141     }                                            
3142     else {                                       
3143         term1 = 0;                               
3144         term0 = bSig;                            
3145     }                                            
3146     sub128( term0, term1, aSig0, aSig1, &alte    
3147     if (    lt128( alternateASig0, alternateA    
3148          || (    eq128( alternateASig0, alter    
3149               && ( q & 1 ) )                     
3150        ) {                                       
3151         aSig0 = alternateASig0;                  
3152         aSig1 = alternateASig1;                  
3153         zSign = ! zSign;                         
3154     }                                            
3155                                                  
3156     return                                       
3157         normalizeRoundAndPackFloatx80(           
3158             roundData, zSign, bExp + expDiff,    
3159                                                  
3160 }                                                
3161                                                  
3162 /*                                               
3163 ---------------------------------------------    
3164 Returns the square root of the extended doubl    
3165 value `a'.  The operation is performed accord    
3166 for Binary Floating-point Arithmetic.            
3167 ---------------------------------------------    
3168 */                                               
3169 floatx80 floatx80_sqrt( struct roundingData *    
3170 {                                                
3171     flag aSign;                                  
3172     int32 aExp, zExp;                            
3173     bits64 aSig0, aSig1, zSig0, zSig1;           
3174     bits64 rem0, rem1, rem2, rem3, term0, ter    
3175     bits64 shiftedRem0, shiftedRem1;             
3176     floatx80 z;                                  
3177                                                  
3178     aSig0 = extractFloatx80Frac( a );            
3179     aExp = extractFloatx80Exp( a );              
3180     aSign = extractFloatx80Sign( a );            
3181     if ( aExp == 0x7FFF ) {                      
3182         if ( (bits64) ( aSig0<<1 ) ) return p    
3183         if ( ! aSign ) return a;                 
3184         goto invalid;                            
3185     }                                            
3186     if ( aSign ) {                               
3187         if ( ( aExp | aSig0 ) == 0 ) return a    
3188  invalid:                                        
3189         roundData->exception |= float_flag_in    
3190         z.low = floatx80_default_nan_low;        
3191         z.high = floatx80_default_nan_high;      
3192         z.__padding = 0;                         
3193         return z;                                
3194     }                                            
3195     if ( aExp == 0 ) {                           
3196         if ( aSig0 == 0 ) return packFloatx80    
3197         normalizeFloatx80Subnormal( aSig0, &a    
3198     }                                            
3199     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;    
3200     zSig0 = estimateSqrt32( aExp, aSig0>>32 )    
3201     zSig0 <<= 31;                                
3202     aSig1 = 0;                                   
3203     shift128Right( aSig0, 0, ( aExp & 1 ) + 2    
3204     zSig0 = estimateDiv128To64( aSig0, aSig1,    
3205     if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64    
3206     shortShift128Left( aSig0, aSig1, 2, &aSig    
3207     mul64To128( zSig0, zSig0, &term0, &term1     
3208     sub128( aSig0, aSig1, term0, term1, &rem0    
3209     while ( (sbits64) rem0 < 0 ) {               
3210         --zSig0;                                 
3211         shortShift128Left( 0, zSig0, 1, &term    
3212         term1 |= 1;                              
3213         add128( rem0, rem1, term0, term1, &re    
3214     }                                            
3215     shortShift128Left( rem0, rem1, 63, &shift    
3216     zSig1 = estimateDiv128To64( shiftedRem0,     
3217     if ( (bits64) ( zSig1<<1 ) <= 10 ) {         
3218         if ( zSig1 == 0 ) zSig1 = 1;             
3219         mul64To128( zSig0, zSig1, &term1, &te    
3220         shortShift128Left( term1, term2, 1, &    
3221         sub128( rem1, 0, term1, term2, &rem1,    
3222         mul64To128( zSig1, zSig1, &term2, &te    
3223         sub192( rem1, rem2, 0, 0, term2, term    
3224         while ( (sbits64) rem1 < 0 ) {           
3225             --zSig1;                             
3226             shortShift192Left( 0, zSig0, zSig    
3227             term3 |= 1;                          
3228             add192(                              
3229                 rem1, rem2, rem3, term1, term    
3230         }                                        
3231         zSig1 |= ( ( rem1 | rem2 | rem3 ) !=     
3232     }                                            
3233     return                                       
3234         roundAndPackFloatx80(                    
3235             roundData, 0, zExp, zSig0, zSig1     
3236                                                  
3237 }                                                
3238                                                  
3239 /*                                               
3240 ---------------------------------------------    
3241 Returns 1 if the extended double-precision fl    
3242 equal to the corresponding value `b', and 0 o    
3243 performed according to the IEC/IEEE Standard     
3244 Arithmetic.                                      
3245 ---------------------------------------------    
3246 */                                               
3247 flag floatx80_eq( floatx80 a, floatx80 b )       
3248 {                                                
3249                                                  
3250     if (    (    ( extractFloatx80Exp( a ) ==    
3251               && (bits64) ( extractFloatx80Fr    
3252          || (    ( extractFloatx80Exp( b ) ==    
3253               && (bits64) ( extractFloatx80Fr    
3254        ) {                                       
3255         if (    floatx80_is_signaling_nan( a     
3256              || floatx80_is_signaling_nan( b     
3257             float_raise( float_flag_invalid )    
3258         }                                        
3259         return 0;                                
3260     }                                            
3261     return                                       
3262            ( a.low == b.low )                    
3263         && (    ( a.high == b.high )             
3264              || (    ( a.low == 0 )              
3265                   && ( (bits16) ( ( a.high |     
3266            );                                    
3267                                                  
3268 }                                                
3269                                                  
3270 /*                                               
3271 ---------------------------------------------    
3272 Returns 1 if the extended double-precision fl    
3273 less than or equal to the corresponding value    
3274 comparison is performed according to the IEC/    
3275 Floating-point Arithmetic.                       
3276 ---------------------------------------------    
3277 */                                               
3278 flag floatx80_le( floatx80 a, floatx80 b )       
3279 {                                                
3280     flag aSign, bSign;                           
3281                                                  
3282     if (    (    ( extractFloatx80Exp( a ) ==    
3283               && (bits64) ( extractFloatx80Fr    
3284          || (    ( extractFloatx80Exp( b ) ==    
3285               && (bits64) ( extractFloatx80Fr    
3286        ) {                                       
3287         float_raise( float_flag_invalid );       
3288         return 0;                                
3289     }                                            
3290     aSign = extractFloatx80Sign( a );            
3291     bSign = extractFloatx80Sign( b );            
3292     if ( aSign != bSign ) {                      
3293         return                                   
3294                aSign                             
3295             || (    ( ( (bits16) ( ( a.high |    
3296                  == 0 );                         
3297     }                                            
3298     return                                       
3299           aSign ? le128( b.high, b.low, a.hig    
3300         : le128( a.high, a.low, b.high, b.low    
3301                                                  
3302 }                                                
3303                                                  
3304 /*                                               
3305 ---------------------------------------------    
3306 Returns 1 if the extended double-precision fl    
3307 less than the corresponding value `b', and 0     
3308 is performed according to the IEC/IEEE Standa    
3309 Arithmetic.                                      
3310 ---------------------------------------------    
3311 */                                               
3312 flag floatx80_lt( floatx80 a, floatx80 b )       
3313 {                                                
3314     flag aSign, bSign;                           
3315                                                  
3316     if (    (    ( extractFloatx80Exp( a ) ==    
3317               && (bits64) ( extractFloatx80Fr    
3318          || (    ( extractFloatx80Exp( b ) ==    
3319               && (bits64) ( extractFloatx80Fr    
3320        ) {                                       
3321         float_raise( float_flag_invalid );       
3322         return 0;                                
3323     }                                            
3324     aSign = extractFloatx80Sign( a );            
3325     bSign = extractFloatx80Sign( b );            
3326     if ( aSign != bSign ) {                      
3327         return                                   
3328                aSign                             
3329             && (    ( ( (bits16) ( ( a.high |    
3330                  != 0 );                         
3331     }                                            
3332     return                                       
3333           aSign ? lt128( b.high, b.low, a.hig    
3334         : lt128( a.high, a.low, b.high, b.low    
3335                                                  
3336 }                                                
3337                                                  
3338 /*                                               
3339 ---------------------------------------------    
3340 Returns 1 if the extended double-precision fl    
3341 to the corresponding value `b', and 0 otherwi    
3342 raised if either operand is a NaN.  Otherwise    
3343 according to the IEC/IEEE Standard for Binary    
3344 ---------------------------------------------    
3345 */                                               
3346 flag floatx80_eq_signaling( floatx80 a, float    
3347 {                                                
3348                                                  
3349     if (    (    ( extractFloatx80Exp( a ) ==    
3350               && (bits64) ( extractFloatx80Fr    
3351          || (    ( extractFloatx80Exp( b ) ==    
3352               && (bits64) ( extractFloatx80Fr    
3353        ) {                                       
3354         float_raise( float_flag_invalid );       
3355         return 0;                                
3356     }                                            
3357     return                                       
3358            ( a.low == b.low )                    
3359         && (    ( a.high == b.high )             
3360              || (    ( a.low == 0 )              
3361                   && ( (bits16) ( ( a.high |     
3362            );                                    
3363                                                  
3364 }                                                
3365                                                  
3366 /*                                               
3367 ---------------------------------------------    
3368 Returns 1 if the extended double-precision fl    
3369 than or equal to the corresponding value `b',    
3370 do not cause an exception.  Otherwise, the co    
3371 to the IEC/IEEE Standard for Binary Floating-    
3372 ---------------------------------------------    
3373 */                                               
3374 flag floatx80_le_quiet( floatx80 a, floatx80     
3375 {                                                
3376     flag aSign, bSign;                           
3377                                                  
3378     if (    (    ( extractFloatx80Exp( a ) ==    
3379               && (bits64) ( extractFloatx80Fr    
3380          || (    ( extractFloatx80Exp( b ) ==    
3381               && (bits64) ( extractFloatx80Fr    
3382        ) {                                       
3383         /* Do nothing, even if NaN as we're q    
3384         return 0;                                
3385     }                                            
3386     aSign = extractFloatx80Sign( a );            
3387     bSign = extractFloatx80Sign( b );            
3388     if ( aSign != bSign ) {                      
3389         return                                   
3390                aSign                             
3391             || (    ( ( (bits16) ( ( a.high |    
3392                  == 0 );                         
3393     }                                            
3394     return                                       
3395           aSign ? le128( b.high, b.low, a.hig    
3396         : le128( a.high, a.low, b.high, b.low    
3397                                                  
3398 }                                                
3399                                                  
3400 /*                                               
3401 ---------------------------------------------    
3402 Returns 1 if the extended double-precision fl    
3403 than the corresponding value `b', and 0 other    
3404 an exception.  Otherwise, the comparison is p    
3405 IEC/IEEE Standard for Binary Floating-point A    
3406 ---------------------------------------------    
3407 */                                               
3408 flag floatx80_lt_quiet( floatx80 a, floatx80     
3409 {                                                
3410     flag aSign, bSign;                           
3411                                                  
3412     if (    (    ( extractFloatx80Exp( a ) ==    
3413               && (bits64) ( extractFloatx80Fr    
3414          || (    ( extractFloatx80Exp( b ) ==    
3415               && (bits64) ( extractFloatx80Fr    
3416        ) {                                       
3417         /* Do nothing, even if NaN as we're q    
3418         return 0;                                
3419     }                                            
3420     aSign = extractFloatx80Sign( a );            
3421     bSign = extractFloatx80Sign( b );            
3422     if ( aSign != bSign ) {                      
3423         return                                   
3424                aSign                             
3425             && (    ( ( (bits16) ( ( a.high |    
3426                  != 0 );                         
3427     }                                            
3428     return                                       
3429           aSign ? lt128( b.high, b.low, a.hig    
3430         : lt128( a.high, a.low, b.high, b.low    
3431                                                  
3432 }                                                
3433                                                  
3434 #endif                                           
3435                                                  
3436
~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.
TOMOYO Linux Cross Reference Linux/arch/arm/nwfpe/softfloat.c

Diff markup

Differences between /arch/arm/nwfpe/softfloat.c (Version linux-6.12-rc7) and /arch/m68k/nwfpe/softfloat.c (Version linux-5.2.21)

TOMOYO Linux Cross Reference
Linux/arch/arm/nwfpe/softfloat.c