1 2 /* 3 ============================================== 4 5 This C source fragment is part of the SoftFloa 6 Arithmetic Package, Release 2. 7 8 Written by John R. Hauser. This work was made 9 International Computer Science Institute, loca 10 Street, Berkeley, California 94704. Funding w 11 National Science Foundation under grant MIP-93 12 of this code was written as part of a project 13 processor in collaboration with the University 14 overseen by Profs. Nelson Morgan and John Wawr 15 is available through the web page 16 http://www.jhauser.us/arithmetic/SoftFloat-2b/ 17 18 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. 19 has been made to avoid it, THIS SOFTWARE MAY C 20 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF TH 21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAK 22 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISI 23 24 Derivative works are acceptable, even for comm 25 (1) they include prominent notice that the wor 26 include prominent notice akin to these three p 27 this code that are retained. 28 29 ============================================== 30 */ 31 32 /* 33 ---------------------------------------------- 34 Shifts `a' right by the number of bits given i 35 bits are shifted off, they are ``jammed'' into 36 the result by setting the least significant bi 37 can be arbitrarily large; in particular, if `c 38 result will be either 0 or 1, depending on whe 39 The result is stored in the location pointed t 40 ---------------------------------------------- 41 */ 42 INLINE void shift32RightJamming( bits32 a, int 43 { 44 bits32 z; 45 if ( count == 0 ) { 46 z = a; 47 } 48 else if ( count < 32 ) { 49 z = ( a>>count ) | ( ( a<<( ( - count 50 } 51 else { 52 z = ( a != 0 ); 53 } 54 *zPtr = z; 55 } 56 57 /* 58 ---------------------------------------------- 59 Shifts `a' right by the number of bits given i 60 bits are shifted off, they are ``jammed'' into 61 the result by setting the least significant bi 62 can be arbitrarily large; in particular, if `c 63 result will be either 0 or 1, depending on whe 64 The result is stored in the location pointed t 65 ---------------------------------------------- 66 */ 67 INLINE void shift64RightJamming( bits64 a, int 68 { 69 bits64 z; 70 71 __asm__("@shift64RightJamming -- start"); 72 if ( count == 0 ) { 73 z = a; 74 } 75 else if ( count < 64 ) { 76 z = ( a>>count ) | ( ( a<<( ( - count 77 } 78 else { 79 z = ( a != 0 ); 80 } 81 __asm__("@shift64RightJamming -- end"); 82 *zPtr = z; 83 } 84 85 /* 86 ---------------------------------------------- 87 Shifts the 128-bit value formed by concatenati 88 _plus_ the number of bits given in `count'. T 89 64 nonzero bits; this is stored at the locatio 90 bits shifted off form a second 64-bit result a 91 shifted off is the most-significant bit of the 92 63 bits of the extra result are all zero if an 93 bits shifted off were all zero. This extra re 94 pointed to by `z1Ptr'. The value of `count' c 95 (This routine makes more sense if `a0' and 96 fixed-point value with binary point between `a 97 value is shifted right by the number of bits g 98 integer part of the result is returned at the 99 `z0Ptr'. The fractional part of the result ma 100 described above, and is returned at the locati 101 ---------------------------------------------- 102 */ 103 INLINE void 104 shift64ExtraRightJamming( 105 bits64 a0, bits64 a1, int16 count, bits64 106 { 107 bits64 z0, z1; 108 int8 negCount = ( - count ) & 63; 109 110 if ( count == 0 ) { 111 z1 = a1; 112 z0 = a0; 113 } 114 else if ( count < 64 ) { 115 z1 = ( a0<<negCount ) | ( a1 != 0 ); 116 z0 = a0>>count; 117 } 118 else { 119 if ( count == 64 ) { 120 z1 = a0 | ( a1 != 0 ); 121 } 122 else { 123 z1 = ( ( a0 | a1 ) != 0 ); 124 } 125 z0 = 0; 126 } 127 *z1Ptr = z1; 128 *z0Ptr = z0; 129 130 } 131 132 /* 133 ---------------------------------------------- 134 Shifts the 128-bit value formed by concatenati 135 number of bits given in `count'. Any bits shi 136 of `count' can be arbitrarily large; in partic 137 than 128, the result will be 0. The result is 138 which are stored at the locations pointed to b 139 ---------------------------------------------- 140 */ 141 INLINE void 142 shift128Right( 143 bits64 a0, bits64 a1, int16 count, bits64 144 { 145 bits64 z0, z1; 146 int8 negCount = ( - count ) & 63; 147 148 if ( count == 0 ) { 149 z1 = a1; 150 z0 = a0; 151 } 152 else if ( count < 64 ) { 153 z1 = ( a0<<negCount ) | ( a1>>count ); 154 z0 = a0>>count; 155 } 156 else { 157 z1 = ( count < 64 ) ? ( a0>>( count & 158 z0 = 0; 159 } 160 *z1Ptr = z1; 161 *z0Ptr = z0; 162 163 } 164 165 /* 166 ---------------------------------------------- 167 Shifts the 128-bit value formed by concatenati 168 number of bits given in `count'. If any nonze 169 are ``jammed'' into the least significant bit 170 least significant bit to 1. The value of `cou 171 in particular, if `count' is greater than 128, 172 or 1, depending on whether the concatenation o 173 nonzero. The result is broken into two 64-bit 174 the locations pointed to by `z0Ptr' and `z1Ptr 175 ---------------------------------------------- 176 */ 177 INLINE void 178 shift128RightJamming( 179 bits64 a0, bits64 a1, int16 count, bits64 180 { 181 bits64 z0, z1; 182 int8 negCount = ( - count ) & 63; 183 184 if ( count == 0 ) { 185 z1 = a1; 186 z0 = a0; 187 } 188 else if ( count < 64 ) { 189 z1 = ( a0<<negCount ) | ( a1>>count ) 190 z0 = a0>>count; 191 } 192 else { 193 if ( count == 64 ) { 194 z1 = a0 | ( a1 != 0 ); 195 } 196 else if ( count < 128 ) { 197 z1 = ( a0>>( count & 63 ) ) | ( ( 198 } 199 else { 200 z1 = ( ( a0 | a1 ) != 0 ); 201 } 202 z0 = 0; 203 } 204 *z1Ptr = z1; 205 *z0Ptr = z0; 206 207 } 208 209 /* 210 ---------------------------------------------- 211 Shifts the 192-bit value formed by concatenati 212 by 64 _plus_ the number of bits given in `coun 213 at most 128 nonzero bits; these are broken int 214 stored at the locations pointed to by `z0Ptr' 215 off form a third 64-bit result as follows: Th 216 the most-significant bit of the extra result, 217 extra result are all zero if and only if _all_ 218 were all zero. This extra result is stored in 219 `z2Ptr'. The value of `count' can be arbitrar 220 (This routine makes more sense if `a0', `a 221 to form a fixed-point value with binary point 222 fixed-point value is shifted right by the numb 223 and the integer part of the result is returned 224 by `z0Ptr' and `z1Ptr'. The fractional part o 225 corrupted as described above, and is returned 226 `z2Ptr'.) 227 ---------------------------------------------- 228 */ 229 INLINE void 230 shift128ExtraRightJamming( 231 bits64 a0, 232 bits64 a1, 233 bits64 a2, 234 int16 count, 235 bits64 *z0Ptr, 236 bits64 *z1Ptr, 237 bits64 *z2Ptr 238 ) 239 { 240 bits64 z0, z1, z2; 241 int8 negCount = ( - count ) & 63; 242 243 if ( count == 0 ) { 244 z2 = a2; 245 z1 = a1; 246 z0 = a0; 247 } 248 else { 249 if ( count < 64 ) { 250 z2 = a1<<negCount; 251 z1 = ( a0<<negCount ) | ( a1>>coun 252 z0 = a0>>count; 253 } 254 else { 255 if ( count == 64 ) { 256 z2 = a1; 257 z1 = a0; 258 } 259 else { 260 a2 |= a1; 261 if ( count < 128 ) { 262 z2 = a0<<negCount; 263 z1 = a0>>( count & 63 ); 264 } 265 else { 266 z2 = ( count == 128 ) ? a0 267 z1 = 0; 268 } 269 } 270 z0 = 0; 271 } 272 z2 |= ( a2 != 0 ); 273 } 274 *z2Ptr = z2; 275 *z1Ptr = z1; 276 *z0Ptr = z0; 277 278 } 279 280 /* 281 ---------------------------------------------- 282 Shifts the 128-bit value formed by concatenati 283 number of bits given in `count'. Any bits shi 284 of `count' must be less than 64. The result i 285 pieces which are stored at the locations point 286 ---------------------------------------------- 287 */ 288 INLINE void 289 shortShift128Left( 290 bits64 a0, bits64 a1, int16 count, bits64 291 { 292 293 *z1Ptr = a1<<count; 294 *z0Ptr = 295 ( count == 0 ) ? a0 : ( a0<<count ) | 296 297 } 298 299 /* 300 ---------------------------------------------- 301 Shifts the 192-bit value formed by concatenati 302 by the number of bits given in `count'. Any b 303 The value of `count' must be less than 64. Th 304 64-bit pieces which are stored at the location 305 `z1Ptr', and `z2Ptr'. 306 ---------------------------------------------- 307 */ 308 INLINE void 309 shortShift192Left( 310 bits64 a0, 311 bits64 a1, 312 bits64 a2, 313 int16 count, 314 bits64 *z0Ptr, 315 bits64 *z1Ptr, 316 bits64 *z2Ptr 317 ) 318 { 319 bits64 z0, z1, z2; 320 int8 negCount; 321 322 z2 = a2<<count; 323 z1 = a1<<count; 324 z0 = a0<<count; 325 if ( 0 < count ) { 326 negCount = ( ( - count ) & 63 ); 327 z1 |= a2>>negCount; 328 z0 |= a1>>negCount; 329 } 330 *z2Ptr = z2; 331 *z1Ptr = z1; 332 *z0Ptr = z0; 333 334 } 335 336 /* 337 ---------------------------------------------- 338 Adds the 128-bit value formed by concatenating 339 value formed by concatenating `b0' and `b1'. 340 any carry out is lost. The result is broken i 341 are stored at the locations pointed to by `z0P 342 ---------------------------------------------- 343 */ 344 INLINE void 345 add128( 346 bits64 a0, bits64 a1, bits64 b0, bits64 b 347 { 348 bits64 z1; 349 350 z1 = a1 + b1; 351 *z1Ptr = z1; 352 *z0Ptr = a0 + b0 + ( z1 < a1 ); 353 354 } 355 356 /* 357 ---------------------------------------------- 358 Adds the 192-bit value formed by concatenating 359 192-bit value formed by concatenating `b0', `b 360 modulo 2^192, so any carry out is lost. The r 361 64-bit pieces which are stored at the location 362 `z1Ptr', and `z2Ptr'. 363 ---------------------------------------------- 364 */ 365 INLINE void 366 add192( 367 bits64 a0, 368 bits64 a1, 369 bits64 a2, 370 bits64 b0, 371 bits64 b1, 372 bits64 b2, 373 bits64 *z0Ptr, 374 bits64 *z1Ptr, 375 bits64 *z2Ptr 376 ) 377 { 378 bits64 z0, z1, z2; 379 int8 carry0, carry1; 380 381 z2 = a2 + b2; 382 carry1 = ( z2 < a2 ); 383 z1 = a1 + b1; 384 carry0 = ( z1 < a1 ); 385 z0 = a0 + b0; 386 z1 += carry1; 387 z0 += ( z1 < carry1 ); 388 z0 += carry0; 389 *z2Ptr = z2; 390 *z1Ptr = z1; 391 *z0Ptr = z0; 392 393 } 394 395 /* 396 ---------------------------------------------- 397 Subtracts the 128-bit value formed by concaten 398 128-bit value formed by concatenating `a0' and 399 2^128, so any borrow out (carry out) is lost. 400 64-bit pieces which are stored at the location 401 `z1Ptr'. 402 ---------------------------------------------- 403 */ 404 INLINE void 405 sub128( 406 bits64 a0, bits64 a1, bits64 b0, bits64 b 407 { 408 409 *z1Ptr = a1 - b1; 410 *z0Ptr = a0 - b0 - ( a1 < b1 ); 411 412 } 413 414 /* 415 ---------------------------------------------- 416 Subtracts the 192-bit value formed by concaten 417 from the 192-bit value formed by concatenating 418 Subtraction is modulo 2^192, so any borrow out 419 result is broken into three 64-bit pieces whic 420 pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. 421 ---------------------------------------------- 422 */ 423 INLINE void 424 sub192( 425 bits64 a0, 426 bits64 a1, 427 bits64 a2, 428 bits64 b0, 429 bits64 b1, 430 bits64 b2, 431 bits64 *z0Ptr, 432 bits64 *z1Ptr, 433 bits64 *z2Ptr 434 ) 435 { 436 bits64 z0, z1, z2; 437 int8 borrow0, borrow1; 438 439 z2 = a2 - b2; 440 borrow1 = ( a2 < b2 ); 441 z1 = a1 - b1; 442 borrow0 = ( a1 < b1 ); 443 z0 = a0 - b0; 444 z0 -= ( z1 < borrow1 ); 445 z1 -= borrow1; 446 z0 -= borrow0; 447 *z2Ptr = z2; 448 *z1Ptr = z1; 449 *z0Ptr = z0; 450 451 } 452 453 /* 454 ---------------------------------------------- 455 Multiplies `a' by `b' to obtain a 128-bit prod 456 into two 64-bit pieces which are stored at the 457 `z0Ptr' and `z1Ptr'. 458 ---------------------------------------------- 459 */ 460 INLINE void mul64To128( bits64 a, bits64 b, bi 461 { 462 bits32 aHigh, aLow, bHigh, bLow; 463 bits64 z0, zMiddleA, zMiddleB, z1; 464 465 aLow = a; 466 aHigh = a>>32; 467 bLow = b; 468 bHigh = b>>32; 469 z1 = ( (bits64) aLow ) * bLow; 470 zMiddleA = ( (bits64) aLow ) * bHigh; 471 zMiddleB = ( (bits64) aHigh ) * bLow; 472 z0 = ( (bits64) aHigh ) * bHigh; 473 zMiddleA += zMiddleB; 474 z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) 475 zMiddleA <<= 32; 476 z1 += zMiddleA; 477 z0 += ( z1 < zMiddleA ); 478 *z1Ptr = z1; 479 *z0Ptr = z0; 480 481 } 482 483 /* 484 ---------------------------------------------- 485 Multiplies the 128-bit value formed by concate 486 obtain a 192-bit product. The product is brok 487 which are stored at the locations pointed to b 488 `z2Ptr'. 489 ---------------------------------------------- 490 */ 491 INLINE void 492 mul128By64To192( 493 bits64 a0, 494 bits64 a1, 495 bits64 b, 496 bits64 *z0Ptr, 497 bits64 *z1Ptr, 498 bits64 *z2Ptr 499 ) 500 { 501 bits64 z0, z1, z2, more1; 502 503 mul64To128( a1, b, &z1, &z2 ); 504 mul64To128( a0, b, &z0, &more1 ); 505 add128( z0, more1, 0, z1, &z0, &z1 ); 506 *z2Ptr = z2; 507 *z1Ptr = z1; 508 *z0Ptr = z0; 509 510 } 511 512 /* 513 ---------------------------------------------- 514 Multiplies the 128-bit value formed by concate 515 128-bit value formed by concatenating `b0' and 516 product. The product is broken into four 64-b 517 the locations pointed to by `z0Ptr', `z1Ptr', 518 ---------------------------------------------- 519 */ 520 INLINE void 521 mul128To256( 522 bits64 a0, 523 bits64 a1, 524 bits64 b0, 525 bits64 b1, 526 bits64 *z0Ptr, 527 bits64 *z1Ptr, 528 bits64 *z2Ptr, 529 bits64 *z3Ptr 530 ) 531 { 532 bits64 z0, z1, z2, z3; 533 bits64 more1, more2; 534 535 mul64To128( a1, b1, &z2, &z3 ); 536 mul64To128( a1, b0, &z1, &more2 ); 537 add128( z1, more2, 0, z2, &z1, &z2 ); 538 mul64To128( a0, b0, &z0, &more1 ); 539 add128( z0, more1, 0, z1, &z0, &z1 ); 540 mul64To128( a0, b1, &more1, &more2 ); 541 add128( more1, more2, 0, z2, &more1, &z2 ) 542 add128( z0, z1, 0, more1, &z0, &z1 ); 543 *z3Ptr = z3; 544 *z2Ptr = z2; 545 *z1Ptr = z1; 546 *z0Ptr = z0; 547 548 } 549 550 /* 551 ---------------------------------------------- 552 Returns an approximation to the 64-bit integer 553 `b' into the 128-bit value formed by concatena 554 divisor `b' must be at least 2^63. If q is th 555 toward zero, the approximation returned lies b 556 If the exact quotient q is larger than 64 bits 557 unsigned integer is returned. 558 ---------------------------------------------- 559 */ 560 static bits64 estimateDiv128To64( bits64 a0, b 561 { 562 bits64 b0, b1; 563 bits64 rem0, rem1, term0, term1; 564 bits64 z; 565 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFF 566 b0 = b>>32; /* hence b0 is 32 bits wide n 567 if ( b0<<32 <= a0 ) { 568 z = LIT64( 0xFFFFFFFF00000000 ); 569 } else { 570 z = a0; 571 do_div( z, b0 ); 572 z <<= 32; 573 } 574 mul64To128( b, z, &term0, &term1 ); 575 sub128( a0, a1, term0, term1, &rem0, &rem1 576 while ( ( (sbits64) rem0 ) < 0 ) { 577 z -= LIT64( 0x100000000 ); 578 b1 = b<<32; 579 add128( rem0, rem1, b0, b1, &rem0, &re 580 } 581 rem0 = ( rem0<<32 ) | ( rem1>>32 ); 582 if ( b0<<32 <= rem0 ) { 583 z |= 0xFFFFFFFF; 584 } else { 585 do_div( rem0, b0 ); 586 z |= rem0; 587 } 588 return z; 589 590 } 591 592 /* 593 ---------------------------------------------- 594 Returns an approximation to the square root of 595 by `a'. Considered as an integer, `a' must be 596 `aExp' (the least significant bit) is 1, the i 597 2^31*sqrt(`a'/2^31), where `a' is considered a 598 is 0, the integer returned approximates 2^31*s 599 case, the approximation returned lies strictly 600 value. 601 ---------------------------------------------- 602 */ 603 static bits32 estimateSqrt32( int16 aExp, bits 604 { 605 static const bits16 sqrtOddAdjustments[] = 606 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D 607 0x039C, 0x0468, 0x0545, 0x0631, 0x072B 608 }; 609 static const bits16 sqrtEvenAdjustments[] 610 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A 611 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068 612 }; 613 int8 index; 614 bits32 z; 615 bits64 A; 616 617 index = ( a>>27 ) & 15; 618 if ( aExp & 1 ) { 619 z = 0x4000 + ( a>>17 ) - sqrtOddAdjust 620 z = ( ( a / z )<<14 ) + ( z<<15 ); 621 a >>= 1; 622 } 623 else { 624 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjus 625 z = a / z + z; 626 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( 627 if ( z <= a ) return (bits32) ( ( (sbi 628 } 629 A = ( (bits64) a )<<31; 630 do_div( A, z ); 631 return ( (bits32) A ) + ( z>>1 ); 632 633 } 634 635 /* 636 ---------------------------------------------- 637 Returns the number of leading 0 bits before th 638 of `a'. If `a' is zero, 32 is returned. 639 ---------------------------------------------- 640 */ 641 static int8 countLeadingZeros32( bits32 a ) 642 { 643 static const int8 countLeadingZerosHigh[] 644 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 645 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 646 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 647 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 648 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 649 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 650 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 651 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 652 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 653 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 654 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 655 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 656 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 657 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 658 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 659 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 660 }; 661 int8 shiftCount; 662 663 shiftCount = 0; 664 if ( a < 0x10000 ) { 665 shiftCount += 16; 666 a <<= 16; 667 } 668 if ( a < 0x1000000 ) { 669 shiftCount += 8; 670 a <<= 8; 671 } 672 shiftCount += countLeadingZerosHigh[ a>>24 673 return shiftCount; 674 675 } 676 677 /* 678 ---------------------------------------------- 679 Returns the number of leading 0 bits before th 680 of `a'. If `a' is zero, 64 is returned. 681 ---------------------------------------------- 682 */ 683 static int8 countLeadingZeros64( bits64 a ) 684 { 685 int8 shiftCount; 686 687 shiftCount = 0; 688 if ( a < ( (bits64) 1 )<<32 ) { 689 shiftCount += 32; 690 } 691 else { 692 a >>= 32; 693 } 694 shiftCount += countLeadingZeros32( a ); 695 return shiftCount; 696 697 } 698 699 /* 700 ---------------------------------------------- 701 Returns 1 if the 128-bit value formed by conca 702 is equal to the 128-bit value formed by concat 703 Otherwise, returns 0. 704 ---------------------------------------------- 705 */ 706 INLINE flag eq128( bits64 a0, bits64 a1, bits6 707 { 708 709 return ( a0 == b0 ) && ( a1 == b1 ); 710 711 } 712 713 /* 714 ---------------------------------------------- 715 Returns 1 if the 128-bit value formed by conca 716 than or equal to the 128-bit value formed by c 717 Otherwise, returns 0. 718 ---------------------------------------------- 719 */ 720 INLINE flag le128( bits64 a0, bits64 a1, bits6 721 { 722 723 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( 724 725 } 726 727 /* 728 ---------------------------------------------- 729 Returns 1 if the 128-bit value formed by conca 730 than the 128-bit value formed by concatenating 731 returns 0. 732 ---------------------------------------------- 733 */ 734 INLINE flag lt128( bits64 a0, bits64 a1, bits6 735 { 736 737 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( 738 739 } 740 741 /* 742 ---------------------------------------------- 743 Returns 1 if the 128-bit value formed by conca 744 not equal to the 128-bit value formed by conca 745 Otherwise, returns 0. 746 ---------------------------------------------- 747 */ 748 INLINE flag ne128( bits64 a0, bits64 a1, bits6 749 { 750 751 return ( a0 != b0 ) || ( a1 != b1 ); 752 753 } 754
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.