1 | 2 | round.sa 3.4 7/29/91 3 | 4 | handle rounding and normalization task 5 | 6 | 7 | 8 | Copyright (C) Motorola, Inc. 1 9 | All Rights Reserved 10 | 11 | For details on the license for this fi 12 | file, README, in this same directory. 13 14 |ROUND idnt 2,1 | Motorola 040 Floating Po 15 16 |section 8 17 18 #include "fpsp.h" 19 20 | 21 | round --- round result according to pr 22 | 23 | a0 points to the input operand in the 24 | d1(high word) contains rounding precis 25 | ext = $0000xxxx 26 | sgl = $0001xxxx 27 | dbl = $0002xxxx 28 | d1(low word) contains rounding mode: 29 | RN = $xxxx0000 30 | RZ = $xxxx0001 31 | RM = $xxxx0010 32 | RP = $xxxx0011 33 | d0{31:29} contains the g,r,s bits (ext 34 | 35 | On return the value pointed to by a0 i 36 | a0 is preserved and the g-r-s bits in 37 | The result is not typed - the tag fiel 38 | result is still in the internal extend 39 | 40 | The INEX bit of USER_FPSR will be set 41 | inexact (i.e. if any of the g-r-s bits 42 | 43 44 .global round 45 round: 46 | If g=r=s=0 then result is exact and round is 47 | the inex flag in status reg and continue. 48 | 49 bsrs ext_grs |this 50 | :round 51 | ;the a 52 tstl %d0 |if gr 53 bne rnd_cont |lower 54 55 swap %d1 |set u 56 bra truncate 57 58 rnd_cont: 59 | 60 | Use rounding mode as an index into a jump ta 61 | 62 orl #inx2a_mask,USER_FPSR(%a6) |se 63 lea mode_tab,%a1 64 movel (%a1,%d1.w*4),%a1 65 jmp (%a1) 66 | 67 | Jump table indexed by rounding mode in d1.w. 68 | grs != 0. 69 | 70 mode_tab: 71 .long rnd_near 72 .long rnd_zero 73 .long rnd_mnus 74 .long rnd_plus 75 | 76 | ROUND PLUS INFINITY 77 | 78 | If sign of fp number = 0 (positive), t 79 | 80 rnd_plus: 81 swap %d1 |set u 82 tstb LOCAL_SGN(%a0) |check 83 bmi truncate |if po 84 movel #0xffffffff,%d0 |force 85 lea add_to_l,%a1 86 movel (%a1,%d1.w*4),%a1 87 jmp (%a1) 88 | 89 | ROUND MINUS INFINITY 90 | 91 | If sign of fp number = 1 (negative), t 92 | 93 rnd_mnus: 94 swap %d1 |set u 95 tstb LOCAL_SGN(%a0) |check 96 bpl truncate |if ne 97 movel #0xffffffff,%d0 |force 98 lea add_to_l,%a1 99 movel (%a1,%d1.w*4),%a1 100 jmp (%a1) 101 | 102 | ROUND ZERO 103 | 104 | Always truncate. 105 rnd_zero: 106 swap %d1 |set u 107 bra truncate 108 | 109 | 110 | ROUND NEAREST 111 | 112 | If (g=1), then add 1 to l and if (r=s= 113 | Note that this will round to even in c 114 | 115 rnd_near: 116 swap %d1 |set u 117 asll #1,%d0 |shift 118 bcc truncate |if (g 119 lea add_to_l,%a1 120 movel (%a1,%d1.w*4),%a1 121 jmp (%a1) 122 123 | 124 | ext_grs --- extract guard, round and s 125 | 126 | Input: d1 = PREC:ROUND 127 | Output: d0{31:29}= guard, round, 128 | 129 | The ext_grs extract the guard/round/sticky b 130 | selected rounding precision. It is called by 131 | only. All registers except d0 are kept inta 132 | updated guard,round,sticky in d0{31:29} 133 | 134 | Notes: the ext_grs uses the round PREC, and 135 | prior to usage, and needs to restore 136 | 137 ext_grs: 138 swap %d1 |have 139 cmpiw #0,%d1 140 bnes sgl_or_dbl 141 bras end_ext_grs 142 143 sgl_or_dbl: 144 moveml %d2/%d3,-(%a7) |make 145 cmpiw #1,%d1 146 bnes grs_dbl 147 grs_sgl: 148 bfextu LOCAL_HI(%a0){#24:#2},%d3 149 movel #30,%d2 |of th 150 lsll %d2,%d3 |shift 151 movel LOCAL_HI(%a0),%d2 152 andil #0x0000003f,%d2 |s bit 153 bnes st_stky |bits 154 tstl LOCAL_LO(%a0) |test 155 bnes st_stky |if an 156 tstl %d0 |test 157 bnes st_stky |if an 158 bras end_sd |if wo 159 grs_dbl: 160 bfextu LOCAL_LO(%a0){#21:#2},%d3 161 movel #30,%d2 |of th 162 lsll %d2,%d3 |shift 163 movel LOCAL_LO(%a0),%d2 164 andil #0x000001ff,%d2 |s bit 165 bnes st_stky |other 166 tstl %d0 |test 167 bnes st_stky |if an 168 bras end_sd |if cl 169 st_stky: 170 bset #rnd_stky_bit,%d3 171 end_sd: 172 movel %d3,%d0 |retur 173 moveml (%a7)+,%d2/%d3 |resto 174 end_ext_grs: 175 swap %d1 |resto 176 rts 177 178 |******************* Local Equates 179 .set ad_1_sgl,0x00000100 | con 180 .set ad_1_dbl,0x00000800 | con 181 182 183 |Jump table for adding 1 to the l-bit indexed 184 185 add_to_l: 186 .long add_ext 187 .long add_sgl 188 .long add_dbl 189 .long add_dbl 190 | 191 | ADD SINGLE 192 | 193 add_sgl: 194 addl #ad_1_sgl,LOCAL_HI(%a0) 195 bccs scc_clr |no ma 196 roxrw LOCAL_HI(%a0) |shift 197 roxrw LOCAL_HI+2(%a0) |shift 198 addw #0x1,LOCAL_EX(%a0) |and i 199 scc_clr: 200 tstl %d0 |test 201 bnes sgl_done 202 andiw #0xfe00,LOCAL_HI+2(%a0) |clear 203 sgl_done: 204 andil #0xffffff00,LOCAL_HI(%a0) |tru 205 clrl LOCAL_LO(%a0) |clear 206 rts 207 208 | 209 | ADD EXTENDED 210 | 211 add_ext: 212 addql #1,LOCAL_LO(%a0) |add 1 213 bccs xcc_clr |test 214 addql #1,LOCAL_HI(%a0) |propa 215 bccs xcc_clr 216 roxrw LOCAL_HI(%a0) |mant 217 roxrw LOCAL_HI+2(%a0) |mant 218 roxrw LOCAL_LO(%a0) 219 roxrw LOCAL_LO+2(%a0) 220 addw #0x1,LOCAL_EX(%a0) |and i 221 xcc_clr: 222 tstl %d0 |test 223 bnes add_ext_done 224 andib #0xfe,LOCAL_LO+3(%a0) |clear 225 add_ext_done: 226 rts 227 | 228 | ADD DOUBLE 229 | 230 add_dbl: 231 addl #ad_1_dbl,LOCAL_LO(%a0) 232 bccs dcc_clr 233 addql #1,LOCAL_HI(%a0) 234 bccs dcc_clr 235 roxrw LOCAL_HI(%a0) |mant 236 roxrw LOCAL_HI+2(%a0) |mant 237 roxrw LOCAL_LO(%a0) 238 roxrw LOCAL_LO+2(%a0) 239 addw #0x1,LOCAL_EX(%a0) |incr 240 dcc_clr: 241 tstl %d0 |test 242 bnes dbl_done 243 andiw #0xf000,LOCAL_LO+2(%a0) |clear 244 245 dbl_done: 246 andil #0xfffff800,LOCAL_LO(%a0) |tru 247 rts 248 249 error: 250 rts 251 | 252 | Truncate all other bits 253 | 254 trunct: 255 .long end_rnd 256 .long sgl_done 257 .long dbl_done 258 .long dbl_done 259 260 truncate: 261 lea trunct,%a1 262 movel (%a1,%d1.w*4),%a1 263 jmp (%a1) 264 265 end_rnd: 266 rts 267 268 | 269 | NORMALIZE 270 | 271 | These routines (nrm_zero & nrm_set) normaliz 272 | is done by shifting the mantissa left while 273 | exponent. 274 | 275 | NRM_SET shifts and decrements until there is 276 | bit of the mantissa (msb in d1). 277 | 278 | NRM_ZERO shifts and decrements until there i 279 | bit of the mantissa (msb in d1) unless this 280 | would go less than 0. In that case the numb 281 | exponent (d0) is set to 0 and the mantissa ( 282 | normalized. 283 | 284 | Note that both routines have been optimized 285 | therefore do not have the easy to follow dec 286 | 287 | NRM_ZERO 288 | 289 | Distance to first 1 bit in mantissa = 290 | Distance to 0 from exponent = Y 291 | If X < Y 292 | Then 293 | nrm_set 294 | Else 295 | shift mantissa by Y 296 | set exponent = 0 297 | 298 |input: 299 | FP_SCR1 = exponent, ms mantissa part, 300 |output: 301 | L_SCR1{4} = fpte15 or ete15 bit 302 | 303 .global nrm_zero 304 nrm_zero: 305 movew LOCAL_EX(%a0),%d0 306 cmpw #64,%d0 |see if exp > 307 bmis d0_less 308 bsr nrm_set |exp > 64 so e 309 rts 310 d0_less: 311 moveml %d2/%d3/%d5/%d6,-(%a7) 312 movel LOCAL_HI(%a0),%d1 313 movel LOCAL_LO(%a0),%d2 314 315 bfffo %d1{#0:#32},%d3 |get the dista 316 | ;in ms mant 317 beqs ms_clr |branch if no 318 cmpw %d3,%d0 |of X>Y 319 bmis greater |then exp will 320 | ;it is just sh 321 bsr nrm_set |else exp won' 322 moveml (%a7)+,%d2/%d3/%d5/%d6 323 rts 324 greater: 325 movel %d2,%d6 |save ls mant 326 lsll %d0,%d2 |shift ls mant 327 lsll %d0,%d1 |shift ms mant 328 movel #32,%d5 329 subl %d0,%d5 |make op a den 330 lsrl %d5,%d6 |by the number 331 | ;set exp = 0. 332 orl %d6,%d1 |shift the ls 333 movel #0,%d0 |same as if de 334 | ;while shiftin 335 movew %d0,LOCAL_EX(%a0) 336 movel %d1,LOCAL_HI(%a0) 337 movel %d2,LOCAL_LO(%a0) 338 moveml (%a7)+,%d2/%d3/%d5/%d6 339 rts 340 ms_clr: 341 bfffo %d2{#0:#32},%d3 |check if any 342 beqs all_clr |branch if non 343 addw #32,%d3 344 cmpw %d3,%d0 |if X>Y 345 bmis greater |then branch 346 bsr nrm_set |else exp won' 347 moveml (%a7)+,%d2/%d3/%d5/%d6 348 rts 349 all_clr: 350 movew #0,LOCAL_EX(%a0) |no ma 351 moveml (%a7)+,%d2/%d3/%d5/%d6 352 rts 353 | 354 | NRM_SET 355 | 356 .global nrm_set 357 nrm_set: 358 movel %d7,-(%a7) 359 bfffo LOCAL_HI(%a0){#0:#32},%d7 |fin 360 beqs lower |branch if ms 361 362 movel %d6,-(%a7) 363 364 subw %d7,LOCAL_EX(%a0) |sub e 365 movel LOCAL_HI(%a0),%d0 |d0 ha 366 movel LOCAL_LO(%a0),%d1 |d1 has ls m 367 368 lsll %d7,%d0 |shift first 1 369 movel %d1,%d6 |copy ls mant 370 lsll %d7,%d6 |shift ls mant 371 movel %d6,LOCAL_LO(%a0) |store 372 moveql #32,%d6 373 subl %d7,%d6 |continue shif 374 lsrl %d6,%d1 |shift off all 375 | ;be shifted in 376 orl %d1,%d0 |shift the ls 377 movel %d0,LOCAL_HI(%a0) |store 378 moveml (%a7)+,%d7/%d6 |restore regis 379 rts 380 381 | 382 | We get here if ms mant was = 0, and we assum 383 | set (otherwise this would have been tagged a 384 | 385 lower: 386 movew LOCAL_EX(%a0),%d0 |d0 ha 387 movel LOCAL_LO(%a0),%d1 |d1 ha 388 subw #32,%d0 |account for m 389 bfffo %d1{#0:#32},%d7 |find first 1 390 subw %d7,%d0 |subtract shif 391 lsll %d7,%d1 |shift first 1 392 movew %d0,LOCAL_EX(%a0) |store 393 movel %d1,LOCAL_HI(%a0) |store 394 clrl LOCAL_LO(%a0) |clear ls mant 395 movel (%a7)+,%d7 396 rts 397 | 398 | denorm --- denormalize an intermediate 399 | 400 | Used by underflow. 401 | 402 | Input: 403 | a0 points to the operand to be d 404 | (in the internal extended for 405 | 406 | d0: rounding precision 407 | Output: 408 | a0 points to the denormalized re 409 | (in the internal extended for 410 | 411 | d0 is guard,round,sticky 412 | 413 | d0 comes into this routine with the rounding 414 | is then loaded with the denormalized exponen 415 | rounding precision. 416 | 417 418 .global denorm 419 denorm: 420 btstb #6,LOCAL_EX(%a0) |check 421 beqs no_sgn_ext 422 bsetb #7,LOCAL_EX(%a0) |sign 423 no_sgn_ext: 424 425 cmpib #0,%d0 |if 0 then ext 426 bnes not_ext |else branch 427 428 clrl %d1 |load d1 with 429 clrl %d0 |clear the sti 430 bsr dnrm_lp |denormalize t 431 tstb %d1 |check for ine 432 beq no_inex |if clr, no in 433 bras dnrm_inex |if set, set i 434 435 not_ext: 436 cmpil #1,%d0 |if 1 then sin 437 beqs load_sgl |else must be 438 439 load_dbl: 440 movew #dbl_thresh,%d1 |put copy of t 441 movel %d1,%d0 |copy d1 into 442 subw LOCAL_EX(%a0),%d0 |diff 443 cmpw #67,%d0 |if diff > 67 444 bpls chk_stky |then branch ( 445 | ; shifted off 446 clrl %d0 |else clear th 447 bsr dnrm_lp |denormalize t 448 tstb %d1 |check flag 449 beqs no_inex |if clr, no in 450 bras dnrm_inex |if set, set i 451 452 load_sgl: 453 movew #sgl_thresh,%d1 |put copy of t 454 movel %d1,%d0 |copy d1 into 455 subw LOCAL_EX(%a0),%d0 |diff 456 cmpw #67,%d0 |if diff > 67 457 bpls chk_stky |then branch ( 458 | ; shifted off 459 clrl %d0 |else clear th 460 bsr dnrm_lp |denormalize t 461 tstb %d1 |check flag 462 beqs no_inex |if clr, no in 463 bras dnrm_inex |if set, set i 464 465 chk_stky: 466 tstl LOCAL_HI(%a0) |check for any 467 bnes set_stky 468 tstl LOCAL_LO(%a0) |check for any 469 bnes set_stky 470 bras clr_mant 471 set_stky: 472 orl #inx2a_mask,USER_FPSR(%a6) |se 473 movel #0x20000000,%d0 |set sticky bi 474 clr_mant: 475 movew %d1,LOCAL_EX(%a0) 476 movel #0,LOCAL_HI(%a0) |set d 477 movel #0,LOCAL_LO(%a0) 478 rts 479 dnrm_inex: 480 orl #inx2a_mask,USER_FPSR(%a6) |se 481 no_inex: 482 rts 483 484 | 485 | dnrm_lp --- normalize exponent/mantiss 486 | 487 | Input: 488 | a0 points to the operand 489 | d0{31:29} initial guard,round,st 490 | d1{15:0} denormalization thresh 491 | Output: 492 | a0 points to the denormal 493 | d0{31:29} final guard,round,stic 494 | d1.b inexact flag: all one 495 | 496 | The LOCAL_LO and LOCAL_GRS parts of the valu 497 | so that bfext can be used to extract the new 498 | Dnrm_lp can be called with a0 pointing to ET 499 | is no LOCAL_GRS scratch word following it on 500 | 501 .global dnrm_lp 502 dnrm_lp: 503 movel %d2,-(%sp) |save 504 btstb #E3,E_BYTE(%a6) |test 505 beqs not_E3 |not t 506 bfextu WBTEMP_GRS(%a6){#6:#3},%d2 507 movel #29,%d0 508 lsll %d0,%d2 |shift 509 movel %d2,%d0 510 not_E3: 511 movel (%sp)+,%d2 |resto 512 movel LOCAL_LO(%a0),FP_SCR2+LOCAL_LO 513 movel %d0,FP_SCR2+LOCAL_GRS(%a6) 514 movel %d1,%d0 |copy 515 subw LOCAL_EX(%a0),%d1 516 bles no_lp |d1 <= 517 cmpw #32,%d1 518 blts case_1 |0 = d 519 cmpw #64,%d1 520 blts case_2 |32 <= 521 bra case_3 |d1 >= 522 | 523 | No normalization necessary 524 | 525 no_lp: 526 clrb %d1 |set n 527 movel FP_SCR2+LOCAL_GRS(%a6),%d0 528 rts 529 | 530 | case (0<d1<32) 531 | 532 case_1: 533 movel %d2,-(%sp) 534 movew %d0,LOCAL_EX(%a0) 535 movel #32,%d0 536 subw %d1,%d0 |d0 = 537 bfextu LOCAL_EX(%a0){%d0:#32},%d2 538 bfextu %d2{%d1:%d0},%d2 539 bfextu LOCAL_HI(%a0){%d0:#32},%d1 540 bfextu FP_SCR2+LOCAL_LO(%a6){%d0:#32} 541 movel %d2,LOCAL_HI(%a0) 542 movel %d1,LOCAL_LO(%a0) 543 clrb %d1 544 bftst %d0{#2:#30} 545 beqs c1nstky 546 bsetl #rnd_stky_bit,%d0 547 st %d1 548 c1nstky: 549 movel FP_SCR2+LOCAL_GRS(%a6),%d2 550 andil #0xe0000000,%d2 |clear 551 tstl %d2 |test 552 beqs grs_clear 553 orl #0x20000000,%d0 |set s 554 grs_clear: 555 andil #0xe0000000,%d0 |clear 556 movel (%sp)+,%d2 557 rts 558 | 559 | case (32<=d1<64) 560 | 561 case_2: 562 movel %d2,-(%sp) 563 movew %d0,LOCAL_EX(%a0) 564 subw #32,%d1 |d1 no 565 movel #32,%d0 566 subw %d1,%d0 |d0 = 567 bfextu LOCAL_EX(%a0){%d0:#32},%d2 568 bfextu %d2{%d1:%d0},%d2 569 bfextu LOCAL_HI(%a0){%d0:#32},%d1 570 bftst %d1{#2:#30} 571 bnes c2_sstky |bra i 572 bftst FP_SCR2+LOCAL_LO(%a6){%d0:#32} 573 bnes c2_sstky |bra i 574 movel %d1,%d0 575 clrb %d1 576 bras end_c2 577 c2_sstky: 578 movel %d1,%d0 579 bsetl #rnd_stky_bit,%d0 580 st %d1 581 end_c2: 582 clrl LOCAL_HI(%a0) |store 583 movel %d2,LOCAL_LO(%a0) 584 movel FP_SCR2+LOCAL_GRS(%a6),%d2 585 andil #0xe0000000,%d2 |clear 586 tstl %d2 |test 587 beqs clear_grs 588 orl #0x20000000,%d0 |set s 589 clear_grs: 590 andil #0xe0000000,%d0 |get r 591 movel (%sp)+,%d2 592 rts 593 | 594 | d1 >= 64 Force the exponent to be the denorm 595 | correct sign. 596 | 597 case_3: 598 movew %d0,LOCAL_EX(%a0) 599 tstw LOCAL_SGN(%a0) 600 bges c3con 601 c3neg: 602 orl #0x80000000,LOCAL_EX(%a0) 603 c3con: 604 cmpw #64,%d1 605 beqs sixty_four 606 cmpw #65,%d1 607 beqs sixty_five 608 | 609 | Shift value is out of range. Set d1 for ine 610 | return a zero with the given threshold. 611 | 612 clrl LOCAL_HI(%a0) 613 clrl LOCAL_LO(%a0) 614 movel #0x20000000,%d0 615 st %d1 616 rts 617 618 sixty_four: 619 movel LOCAL_HI(%a0),%d0 620 bfextu %d0{#2:#30},%d1 621 andil #0xc0000000,%d0 622 bras c3com 623 624 sixty_five: 625 movel LOCAL_HI(%a0),%d0 626 bfextu %d0{#1:#31},%d1 627 andil #0x80000000,%d0 628 lsrl #1,%d0 |shift 629 630 c3com: 631 tstl %d1 632 bnes c3ssticky 633 tstl LOCAL_LO(%a0) 634 bnes c3ssticky 635 tstb FP_SCR2+LOCAL_GRS(%a6) 636 bnes c3ssticky 637 clrb %d1 638 bras c3end 639 640 c3ssticky: 641 bsetl #rnd_stky_bit,%d0 642 st %d1 643 c3end: 644 clrl LOCAL_HI(%a0) 645 clrl LOCAL_LO(%a0) 646 rts 647 648 |end
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.