1 /* SPDX-License-Identifier: GPL-2.0 */ 2 .file "reg_round.S" 3 /*---------------------------------------------------------------------------+ 4 | reg_round.S | 5 | | 6 | Rounding/truncation/etc for FPU basic arithmetic functions. | 7 | | 8 | Copyright (C) 1993,1995,1997 | 9 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 10 | Australia. E-mail billm@suburbia.net | 11 | | 12 | This code has four possible entry points. | 13 | The following must be entered by a jmp instruction: | 14 | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. | 15 | | 16 | The FPU_round entry point is intended to be used by C code. | 17 | From C, call as: | 18 | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) | 19 | | 20 | Return value is the tag of the answer, or-ed with FPU_Exception if | 21 | one was raised, or -1 on internal error. | 22 | | 23 | For correct "up" and "down" rounding, the argument must have the correct | 24 | sign. | 25 | | 26 +---------------------------------------------------------------------------*/ 27 28 /*---------------------------------------------------------------------------+ 29 | Four entry points. | 30 | | 31 | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: | 32 | %eax:%ebx 64 bit significand | 33 | %edx 32 bit extension of the significand | 34 | %edi pointer to an FPU_REG for the result to be stored | 35 | stack calling function must have set up a C stack frame and | 36 | pushed %esi, %edi, and %ebx | 37 | | 38 | Needed just for the fpu_reg_round_sqrt entry point: | 39 | %cx A control word in the same format as the FPU control word. | 40 | Otherwise, PARAM4 must give such a value. | 41 | | 42 | | 43 | The significand and its extension are assumed to be exact in the | 44 | following sense: | 45 | If the significand by itself is the exact result then the significand | 46 | extension (%edx) must contain 0, otherwise the significand extension | 47 | must be non-zero. | 48 | If the significand extension is non-zero then the significand is | 49 | smaller than the magnitude of the correct exact result by an amount | 50 | greater than zero and less than one ls bit of the significand. | 51 | The significand extension is only required to have three possible | 52 | non-zero values: | 53 | less than 0x80000000 <=> the significand is less than 1/2 an ls | 54 | bit smaller than the magnitude of the | 55 | true exact result. | 56 | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit | 57 | smaller than the magnitude of the true | 58 | exact result. | 59 | greater than 0x80000000 <=> the significand is more than 1/2 an ls | 60 | bit smaller than the magnitude of the | 61 | true exact result. | 62 | | 63 +---------------------------------------------------------------------------*/ 64 65 /*---------------------------------------------------------------------------+ 66 | The code in this module has become quite complex, but it should handle | 67 | all of the FPU flags which are set at this stage of the basic arithmetic | 68 | computations. | 69 | There are a few rare cases where the results are not set identically to | 70 | a real FPU. These require a bit more thought because at this stage the | 71 | results of the code here appear to be more consistent... | 72 | This may be changed in a future version. | 73 +---------------------------------------------------------------------------*/ 74 75 76 #include "fpu_emu.h" 77 #include "exception.h" 78 #include "control_w.h" 79 80 /* Flags for FPU_bits_lost */ 81 #define LOST_DOWN $1 82 #define LOST_UP $2 83 84 /* Flags for FPU_denormal */ 85 #define DENORMAL $1 86 #define UNMASKED_UNDERFLOW $2 87 88 89 #ifndef NON_REENTRANT_FPU 90 /* Make the code re-entrant by putting 91 local storage on the stack: */ 92 #define FPU_bits_lost (%esp) 93 #define FPU_denormal 1(%esp) 94 95 #else 96 /* Not re-entrant, so we can gain speed by putting 97 local storage in a static area: */ 98 .data 99 .align 4,0 100 FPU_bits_lost: 101 .byte 0 102 FPU_denormal: 103 .byte 0 104 #endif /* NON_REENTRANT_FPU */ 105 106 107 .text 108 .globl fpu_reg_round 109 .globl fpu_Arith_exit 110 111 /* Entry point when called from C */ 112 SYM_FUNC_START(FPU_round) 113 pushl %ebp 114 movl %esp,%ebp 115 pushl %esi 116 pushl %edi 117 pushl %ebx 118 119 movl PARAM1,%edi 120 movl SIGH(%edi),%eax 121 movl SIGL(%edi),%ebx 122 movl PARAM2,%edx 123 124 fpu_reg_round: /* Normal entry point */ 125 movl PARAM4,%ecx 126 127 #ifndef NON_REENTRANT_FPU 128 pushl %ebx /* adjust the stack pointer */ 129 #endif /* NON_REENTRANT_FPU */ 130 131 #ifdef PARANOID 132 /* Cannot use this here yet */ 133 /* orl %eax,%eax */ 134 /* jns L_entry_bugged */ 135 #endif /* PARANOID */ 136 137 cmpw EXP_UNDER,EXP(%edi) 138 jle L_Make_denorm /* The number is a de-normal */ 139 140 movb $0,FPU_denormal /* 0 -> not a de-normal */ 141 142 Denorm_done: 143 movb $0,FPU_bits_lost /* No bits yet lost in rounding */ 144 145 movl %ecx,%esi 146 andl CW_PC,%ecx 147 cmpl PR_64_BITS,%ecx 148 je LRound_To_64 149 150 cmpl PR_53_BITS,%ecx 151 je LRound_To_53 152 153 cmpl PR_24_BITS,%ecx 154 je LRound_To_24 155 156 #ifdef PECULIAR_486 157 /* With the precision control bits set to 01 "(reserved)", a real 80486 158 behaves as if the precision control bits were set to 11 "64 bits" */ 159 cmpl PR_RESERVED_BITS,%ecx 160 je LRound_To_64 161 #ifdef PARANOID 162 jmp L_bugged_denorm_486 163 #endif /* PARANOID */ 164 #else 165 #ifdef PARANOID 166 jmp L_bugged_denorm /* There is no bug, just a bad control word */ 167 #endif /* PARANOID */ 168 #endif /* PECULIAR_486 */ 169 170 171 /* Round etc to 24 bit precision */ 172 LRound_To_24: 173 movl %esi,%ecx 174 andl CW_RC,%ecx 175 cmpl RC_RND,%ecx 176 je LRound_nearest_24 177 178 cmpl RC_CHOP,%ecx 179 je LCheck_truncate_24 180 181 cmpl RC_UP,%ecx /* Towards +infinity */ 182 je LUp_24 183 184 cmpl RC_DOWN,%ecx /* Towards -infinity */ 185 je LDown_24 186 187 #ifdef PARANOID 188 jmp L_bugged_round24 189 #endif /* PARANOID */ 190 191 LUp_24: 192 cmpb SIGN_POS,PARAM5 193 jne LCheck_truncate_24 /* If negative then up==truncate */ 194 195 jmp LCheck_24_round_up 196 197 LDown_24: 198 cmpb SIGN_POS,PARAM5 199 je LCheck_truncate_24 /* If positive then down==truncate */ 200 201 LCheck_24_round_up: 202 movl %eax,%ecx 203 andl $0x000000ff,%ecx 204 orl %ebx,%ecx 205 orl %edx,%ecx 206 jnz LDo_24_round_up 207 jmp L_Re_normalise 208 209 LRound_nearest_24: 210 /* Do rounding of the 24th bit if needed (nearest or even) */ 211 movl %eax,%ecx 212 andl $0x000000ff,%ecx 213 cmpl $0x00000080,%ecx 214 jc LCheck_truncate_24 /* less than half, no increment needed */ 215 216 jne LGreater_Half_24 /* greater than half, increment needed */ 217 218 /* Possibly half, we need to check the ls bits */ 219 orl %ebx,%ebx 220 jnz LGreater_Half_24 /* greater than half, increment needed */ 221 222 orl %edx,%edx 223 jnz LGreater_Half_24 /* greater than half, increment needed */ 224 225 /* Exactly half, increment only if 24th bit is 1 (round to even) */ 226 testl $0x00000100,%eax 227 jz LDo_truncate_24 228 229 LGreater_Half_24: /* Rounding: increment at the 24th bit */ 230 LDo_24_round_up: 231 andl $0xffffff00,%eax /* Truncate to 24 bits */ 232 xorl %ebx,%ebx 233 movb LOST_UP,FPU_bits_lost 234 addl $0x00000100,%eax 235 jmp LCheck_Round_Overflow 236 237 LCheck_truncate_24: 238 movl %eax,%ecx 239 andl $0x000000ff,%ecx 240 orl %ebx,%ecx 241 orl %edx,%ecx 242 jz L_Re_normalise /* No truncation needed */ 243 244 LDo_truncate_24: 245 andl $0xffffff00,%eax /* Truncate to 24 bits */ 246 xorl %ebx,%ebx 247 movb LOST_DOWN,FPU_bits_lost 248 jmp L_Re_normalise 249 250 251 /* Round etc to 53 bit precision */ 252 LRound_To_53: 253 movl %esi,%ecx 254 andl CW_RC,%ecx 255 cmpl RC_RND,%ecx 256 je LRound_nearest_53 257 258 cmpl RC_CHOP,%ecx 259 je LCheck_truncate_53 260 261 cmpl RC_UP,%ecx /* Towards +infinity */ 262 je LUp_53 263 264 cmpl RC_DOWN,%ecx /* Towards -infinity */ 265 je LDown_53 266 267 #ifdef PARANOID 268 jmp L_bugged_round53 269 #endif /* PARANOID */ 270 271 LUp_53: 272 cmpb SIGN_POS,PARAM5 273 jne LCheck_truncate_53 /* If negative then up==truncate */ 274 275 jmp LCheck_53_round_up 276 277 LDown_53: 278 cmpb SIGN_POS,PARAM5 279 je LCheck_truncate_53 /* If positive then down==truncate */ 280 281 LCheck_53_round_up: 282 movl %ebx,%ecx 283 andl $0x000007ff,%ecx 284 orl %edx,%ecx 285 jnz LDo_53_round_up 286 jmp L_Re_normalise 287 288 LRound_nearest_53: 289 /* Do rounding of the 53rd bit if needed (nearest or even) */ 290 movl %ebx,%ecx 291 andl $0x000007ff,%ecx 292 cmpl $0x00000400,%ecx 293 jc LCheck_truncate_53 /* less than half, no increment needed */ 294 295 jnz LGreater_Half_53 /* greater than half, increment needed */ 296 297 /* Possibly half, we need to check the ls bits */ 298 orl %edx,%edx 299 jnz LGreater_Half_53 /* greater than half, increment needed */ 300 301 /* Exactly half, increment only if 53rd bit is 1 (round to even) */ 302 testl $0x00000800,%ebx 303 jz LTruncate_53 304 305 LGreater_Half_53: /* Rounding: increment at the 53rd bit */ 306 LDo_53_round_up: 307 movb LOST_UP,FPU_bits_lost 308 andl $0xfffff800,%ebx /* Truncate to 53 bits */ 309 addl $0x00000800,%ebx 310 adcl $0,%eax 311 jmp LCheck_Round_Overflow 312 313 LCheck_truncate_53: 314 movl %ebx,%ecx 315 andl $0x000007ff,%ecx 316 orl %edx,%ecx 317 jz L_Re_normalise 318 319 LTruncate_53: 320 movb LOST_DOWN,FPU_bits_lost 321 andl $0xfffff800,%ebx /* Truncate to 53 bits */ 322 jmp L_Re_normalise 323 324 325 /* Round etc to 64 bit precision */ 326 LRound_To_64: 327 movl %esi,%ecx 328 andl CW_RC,%ecx 329 cmpl RC_RND,%ecx 330 je LRound_nearest_64 331 332 cmpl RC_CHOP,%ecx 333 je LCheck_truncate_64 334 335 cmpl RC_UP,%ecx /* Towards +infinity */ 336 je LUp_64 337 338 cmpl RC_DOWN,%ecx /* Towards -infinity */ 339 je LDown_64 340 341 #ifdef PARANOID 342 jmp L_bugged_round64 343 #endif /* PARANOID */ 344 345 LUp_64: 346 cmpb SIGN_POS,PARAM5 347 jne LCheck_truncate_64 /* If negative then up==truncate */ 348 349 orl %edx,%edx 350 jnz LDo_64_round_up 351 jmp L_Re_normalise 352 353 LDown_64: 354 cmpb SIGN_POS,PARAM5 355 je LCheck_truncate_64 /* If positive then down==truncate */ 356 357 orl %edx,%edx 358 jnz LDo_64_round_up 359 jmp L_Re_normalise 360 361 LRound_nearest_64: 362 cmpl $0x80000000,%edx 363 jc LCheck_truncate_64 364 365 jne LDo_64_round_up 366 367 /* Now test for round-to-even */ 368 testb $1,%bl 369 jz LCheck_truncate_64 370 371 LDo_64_round_up: 372 movb LOST_UP,FPU_bits_lost 373 addl $1,%ebx 374 adcl $0,%eax 375 376 LCheck_Round_Overflow: 377 jnc L_Re_normalise 378 379 /* Overflow, adjust the result (significand to 1.0) */ 380 rcrl $1,%eax 381 rcrl $1,%ebx 382 incw EXP(%edi) 383 jmp L_Re_normalise 384 385 LCheck_truncate_64: 386 orl %edx,%edx 387 jz L_Re_normalise 388 389 LTruncate_64: 390 movb LOST_DOWN,FPU_bits_lost 391 392 L_Re_normalise: 393 testb $0xff,FPU_denormal 394 jnz Normalise_result 395 396 L_Normalised: 397 movl TAG_Valid,%edx 398 399 L_deNormalised: 400 cmpb LOST_UP,FPU_bits_lost 401 je L_precision_lost_up 402 403 cmpb LOST_DOWN,FPU_bits_lost 404 je L_precision_lost_down 405 406 L_no_precision_loss: 407 /* store the result */ 408 409 L_Store_significand: 410 movl %eax,SIGH(%edi) 411 movl %ebx,SIGL(%edi) 412 413 cmpw EXP_OVER,EXP(%edi) 414 jge L_overflow 415 416 movl %edx,%eax 417 418 /* Convert the exponent to 80x87 form. */ 419 addw EXTENDED_Ebias,EXP(%edi) 420 andw $0x7fff,EXP(%edi) 421 422 fpu_reg_round_signed_special_exit: 423 424 cmpb SIGN_POS,PARAM5 425 je fpu_reg_round_special_exit 426 427 orw $0x8000,EXP(%edi) /* Negative sign for the result. */ 428 429 fpu_reg_round_special_exit: 430 431 #ifndef NON_REENTRANT_FPU 432 popl %ebx /* adjust the stack pointer */ 433 #endif /* NON_REENTRANT_FPU */ 434 435 fpu_Arith_exit: 436 popl %ebx 437 popl %edi 438 popl %esi 439 leave 440 RET 441 442 443 /* 444 * Set the FPU status flags to represent precision loss due to 445 * round-up. 446 */ 447 L_precision_lost_up: 448 push %edx 449 push %eax 450 call set_precision_flag_up 451 popl %eax 452 popl %edx 453 jmp L_no_precision_loss 454 455 /* 456 * Set the FPU status flags to represent precision loss due to 457 * truncation. 458 */ 459 L_precision_lost_down: 460 push %edx 461 push %eax 462 call set_precision_flag_down 463 popl %eax 464 popl %edx 465 jmp L_no_precision_loss 466 467 468 /* 469 * The number is a denormal (which might get rounded up to a normal) 470 * Shift the number right the required number of bits, which will 471 * have to be undone later... 472 */ 473 L_Make_denorm: 474 /* The action to be taken depends upon whether the underflow 475 exception is masked */ 476 testb CW_Underflow,%cl /* Underflow mask. */ 477 jz Unmasked_underflow /* Do not make a denormal. */ 478 479 movb DENORMAL,FPU_denormal 480 481 pushl %ecx /* Save */ 482 movw EXP_UNDER+1,%cx 483 subw EXP(%edi),%cx 484 485 cmpw $64,%cx /* shrd only works for 0..31 bits */ 486 jnc Denorm_shift_more_than_63 487 488 cmpw $32,%cx /* shrd only works for 0..31 bits */ 489 jnc Denorm_shift_more_than_32 490 491 /* 492 * We got here without jumps by assuming that the most common requirement 493 * is for a small de-normalising shift. 494 * Shift by [1..31] bits 495 */ 496 addw %cx,EXP(%edi) 497 orl %edx,%edx /* extension */ 498 setne %ch /* Save whether %edx is non-zero */ 499 xorl %edx,%edx 500 shrd %cl,%ebx,%edx 501 shrd %cl,%eax,%ebx 502 shr %cl,%eax 503 orb %ch,%dl 504 popl %ecx 505 jmp Denorm_done 506 507 /* Shift by [32..63] bits */ 508 Denorm_shift_more_than_32: 509 addw %cx,EXP(%edi) 510 subb $32,%cl 511 orl %edx,%edx 512 setne %ch 513 orb %ch,%bl 514 xorl %edx,%edx 515 shrd %cl,%ebx,%edx 516 shrd %cl,%eax,%ebx 517 shr %cl,%eax 518 orl %edx,%edx /* test these 32 bits */ 519 setne %cl 520 orb %ch,%bl 521 orb %cl,%bl 522 movl %ebx,%edx 523 movl %eax,%ebx 524 xorl %eax,%eax 525 popl %ecx 526 jmp Denorm_done 527 528 /* Shift by [64..) bits */ 529 Denorm_shift_more_than_63: 530 cmpw $64,%cx 531 jne Denorm_shift_more_than_64 532 533 /* Exactly 64 bit shift */ 534 addw %cx,EXP(%edi) 535 xorl %ecx,%ecx 536 orl %edx,%edx 537 setne %cl 538 orl %ebx,%ebx 539 setne %ch 540 orb %ch,%cl 541 orb %cl,%al 542 movl %eax,%edx 543 xorl %eax,%eax 544 xorl %ebx,%ebx 545 popl %ecx 546 jmp Denorm_done 547 548 Denorm_shift_more_than_64: 549 movw EXP_UNDER+1,EXP(%edi) 550 /* This is easy, %eax must be non-zero, so.. */ 551 movl $1,%edx 552 xorl %eax,%eax 553 xorl %ebx,%ebx 554 popl %ecx 555 jmp Denorm_done 556 557 558 Unmasked_underflow: 559 movb UNMASKED_UNDERFLOW,FPU_denormal 560 jmp Denorm_done 561 562 563 /* Undo the de-normalisation. */ 564 Normalise_result: 565 cmpb UNMASKED_UNDERFLOW,FPU_denormal 566 je Signal_underflow 567 568 /* The number must be a denormal if we got here. */ 569 #ifdef PARANOID 570 /* But check it... just in case. */ 571 cmpw EXP_UNDER+1,EXP(%edi) 572 jne L_norm_bugged 573 #endif /* PARANOID */ 574 575 #ifdef PECULIAR_486 576 /* 577 * This implements a special feature of 80486 behaviour. 578 * Underflow will be signaled even if the number is 579 * not a denormal after rounding. 580 * This difference occurs only for masked underflow, and not 581 * in the unmasked case. 582 * Actual 80486 behaviour differs from this in some circumstances. 583 */ 584 orl %eax,%eax /* ms bits */ 585 js LPseudoDenormal /* Will be masked underflow */ 586 #else 587 orl %eax,%eax /* ms bits */ 588 js L_Normalised /* No longer a denormal */ 589 #endif /* PECULIAR_486 */ 590 591 jnz LDenormal_adj_exponent 592 593 orl %ebx,%ebx 594 jz L_underflow_to_zero /* The contents are zero */ 595 596 LDenormal_adj_exponent: 597 decw EXP(%edi) 598 599 LPseudoDenormal: 600 testb $0xff,FPU_bits_lost /* bits lost == underflow */ 601 movl TAG_Special,%edx 602 jz L_deNormalised 603 604 /* There must be a masked underflow */ 605 push %eax 606 pushl EX_Underflow 607 call EXCEPTION 608 popl %eax 609 popl %eax 610 movl TAG_Special,%edx 611 jmp L_deNormalised 612 613 614 /* 615 * The operations resulted in a number too small to represent. 616 * Masked response. 617 */ 618 L_underflow_to_zero: 619 push %eax 620 call set_precision_flag_down 621 popl %eax 622 623 push %eax 624 pushl EX_Underflow 625 call EXCEPTION 626 popl %eax 627 popl %eax 628 629 /* Reduce the exponent to EXP_UNDER */ 630 movw EXP_UNDER,EXP(%edi) 631 movl TAG_Zero,%edx 632 jmp L_Store_significand 633 634 635 /* The operations resulted in a number too large to represent. */ 636 L_overflow: 637 addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */ 638 push %edi 639 call arith_overflow 640 pop %edi 641 jmp fpu_reg_round_signed_special_exit 642 643 644 Signal_underflow: 645 /* The number may have been changed to a non-denormal */ 646 /* by the rounding operations. */ 647 cmpw EXP_UNDER,EXP(%edi) 648 jle Do_unmasked_underflow 649 650 jmp L_Normalised 651 652 Do_unmasked_underflow: 653 /* Increase the exponent by the magic number */ 654 addw $(3*(1<<13)),EXP(%edi) 655 push %eax 656 pushl EX_Underflow 657 call EXCEPTION 658 popl %eax 659 popl %eax 660 jmp L_Normalised 661 662 663 #ifdef PARANOID 664 #ifdef PECULIAR_486 665 L_bugged_denorm_486: 666 pushl EX_INTERNAL|0x236 667 call EXCEPTION 668 popl %ebx 669 jmp L_exception_exit 670 #else 671 L_bugged_denorm: 672 pushl EX_INTERNAL|0x230 673 call EXCEPTION 674 popl %ebx 675 jmp L_exception_exit 676 #endif /* PECULIAR_486 */ 677 678 L_bugged_round24: 679 pushl EX_INTERNAL|0x231 680 call EXCEPTION 681 popl %ebx 682 jmp L_exception_exit 683 684 L_bugged_round53: 685 pushl EX_INTERNAL|0x232 686 call EXCEPTION 687 popl %ebx 688 jmp L_exception_exit 689 690 L_bugged_round64: 691 pushl EX_INTERNAL|0x233 692 call EXCEPTION 693 popl %ebx 694 jmp L_exception_exit 695 696 L_norm_bugged: 697 pushl EX_INTERNAL|0x234 698 call EXCEPTION 699 popl %ebx 700 jmp L_exception_exit 701 702 L_entry_bugged: 703 pushl EX_INTERNAL|0x235 704 call EXCEPTION 705 popl %ebx 706 L_exception_exit: 707 mov $-1,%eax 708 jmp fpu_reg_round_special_exit 709 #endif /* PARANOID */ 710 711 SYM_FUNC_END(FPU_round)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.