1 /* 2 * linux/arch/arm/vfp/vfpsingle.c 3 * 4 * This code is derived in part from John R. Housers softfloat library, which 5 * carries the following notice: 6 * 7 * =========================================================================== 8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point 9 * Arithmetic Package, Release 2. 10 * 11 * Written by John R. Hauser. This work was made possible in part by the 12 * International Computer Science Institute, located at Suite 600, 1947 Center 13 * Street, Berkeley, California 94704. Funding was partially provided by the 14 * National Science Foundation under grant MIP-9311980. The original version 15 * of this code was written as part of a project to build a fixed-point vector 16 * processor in collaboration with the University of California at Berkeley, 17 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information 18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 19 * arithmetic/softfloat.html'. 20 * 21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 23 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 26 * 27 * Derivative works are acceptable, even for commercial purposes, so long as 28 * (1) they include prominent notice that the work is derivative, and (2) they 29 * include prominent notice akin to these three paragraphs for those parts of 30 * this code that are retained. 31 * =========================================================================== 32 */ 33 #include <linux/kernel.h> 34 #include <linux/bitops.h> 35 36 #include <asm/div64.h> 37 #include <asm/vfp.h> 38 39 #include "vfpinstr.h" 40 #include "vfp.h" 41 42 static struct vfp_single vfp_single_default_qnan = { 43 .exponent = 255, 44 .sign = 0, 45 .significand = VFP_SINGLE_SIGNIFICAND_QNAN, 46 }; 47 48 static void vfp_single_dump(const char *str, struct vfp_single *s) 49 { 50 pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n", 51 str, s->sign != 0, s->exponent, s->significand); 52 } 53 54 static void vfp_single_normalise_denormal(struct vfp_single *vs) 55 { 56 int bits = 31 - fls(vs->significand); 57 58 vfp_single_dump("normalise_denormal: in", vs); 59 60 if (bits) { 61 vs->exponent -= bits - 1; 62 vs->significand <<= bits; 63 } 64 65 vfp_single_dump("normalise_denormal: out", vs); 66 } 67 68 #ifndef DEBUG 69 #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except) 70 u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions) 71 #else 72 u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func) 73 #endif 74 { 75 u32 significand, incr, rmode; 76 int exponent, shift, underflow; 77 78 vfp_single_dump("pack: in", vs); 79 80 /* 81 * Infinities and NaNs are a special case. 82 */ 83 if (vs->exponent == 255 && (vs->significand == 0 || exceptions)) 84 goto pack; 85 86 /* 87 * Special-case zero. 88 */ 89 if (vs->significand == 0) { 90 vs->exponent = 0; 91 goto pack; 92 } 93 94 exponent = vs->exponent; 95 significand = vs->significand; 96 97 /* 98 * Normalise first. Note that we shift the significand up to 99 * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least 100 * significant bit. 101 */ 102 shift = 32 - fls(significand); 103 if (shift < 32 && shift) { 104 exponent -= shift; 105 significand <<= shift; 106 } 107 108 #ifdef DEBUG 109 vs->exponent = exponent; 110 vs->significand = significand; 111 vfp_single_dump("pack: normalised", vs); 112 #endif 113 114 /* 115 * Tiny number? 116 */ 117 underflow = exponent < 0; 118 if (underflow) { 119 significand = vfp_shiftright32jamming(significand, -exponent); 120 exponent = 0; 121 #ifdef DEBUG 122 vs->exponent = exponent; 123 vs->significand = significand; 124 vfp_single_dump("pack: tiny number", vs); 125 #endif 126 if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))) 127 underflow = 0; 128 } 129 130 /* 131 * Select rounding increment. 132 */ 133 incr = 0; 134 rmode = fpscr & FPSCR_RMODE_MASK; 135 136 if (rmode == FPSCR_ROUND_NEAREST) { 137 incr = 1 << VFP_SINGLE_LOW_BITS; 138 if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0) 139 incr -= 1; 140 } else if (rmode == FPSCR_ROUND_TOZERO) { 141 incr = 0; 142 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0)) 143 incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1; 144 145 pr_debug("VFP: rounding increment = 0x%08x\n", incr); 146 147 /* 148 * Is our rounding going to overflow? 149 */ 150 if ((significand + incr) < significand) { 151 exponent += 1; 152 significand = (significand >> 1) | (significand & 1); 153 incr >>= 1; 154 #ifdef DEBUG 155 vs->exponent = exponent; 156 vs->significand = significand; 157 vfp_single_dump("pack: overflow", vs); 158 #endif 159 } 160 161 /* 162 * If any of the low bits (which will be shifted out of the 163 * number) are non-zero, the result is inexact. 164 */ 165 if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)) 166 exceptions |= FPSCR_IXC; 167 168 /* 169 * Do our rounding. 170 */ 171 significand += incr; 172 173 /* 174 * Infinity? 175 */ 176 if (exponent >= 254) { 177 exceptions |= FPSCR_OFC | FPSCR_IXC; 178 if (incr == 0) { 179 vs->exponent = 253; 180 vs->significand = 0x7fffffff; 181 } else { 182 vs->exponent = 255; /* infinity */ 183 vs->significand = 0; 184 } 185 } else { 186 if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0) 187 exponent = 0; 188 if (exponent || significand > 0x80000000) 189 underflow = 0; 190 if (underflow) 191 exceptions |= FPSCR_UFC; 192 vs->exponent = exponent; 193 vs->significand = significand >> 1; 194 } 195 196 pack: 197 vfp_single_dump("pack: final", vs); 198 { 199 s32 d = vfp_single_pack(vs); 200 #ifdef DEBUG 201 pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func, 202 sd, d, exceptions); 203 #endif 204 vfp_put_float(d, sd); 205 } 206 207 return exceptions; 208 } 209 210 /* 211 * Propagate the NaN, setting exceptions if it is signalling. 212 * 'n' is always a NaN. 'm' may be a number, NaN or infinity. 213 */ 214 static u32 215 vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn, 216 struct vfp_single *vsm, u32 fpscr) 217 { 218 struct vfp_single *nan; 219 int tn, tm = 0; 220 221 tn = vfp_single_type(vsn); 222 223 if (vsm) 224 tm = vfp_single_type(vsm); 225 226 if (fpscr & FPSCR_DEFAULT_NAN) 227 /* 228 * Default NaN mode - always returns a quiet NaN 229 */ 230 nan = &vfp_single_default_qnan; 231 else { 232 /* 233 * Contemporary mode - select the first signalling 234 * NAN, or if neither are signalling, the first 235 * quiet NAN. 236 */ 237 if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) 238 nan = vsn; 239 else 240 nan = vsm; 241 /* 242 * Make the NaN quiet. 243 */ 244 nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN; 245 } 246 247 *vsd = *nan; 248 249 /* 250 * If one was a signalling NAN, raise invalid operation. 251 */ 252 return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; 253 } 254 255 256 /* 257 * Extended operations 258 */ 259 static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr) 260 { 261 vfp_put_float(vfp_single_packed_abs(m), sd); 262 return 0; 263 } 264 265 static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr) 266 { 267 vfp_put_float(m, sd); 268 return 0; 269 } 270 271 static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr) 272 { 273 vfp_put_float(vfp_single_packed_negate(m), sd); 274 return 0; 275 } 276 277 static const u16 sqrt_oddadjust[] = { 278 0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0, 279 0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67 280 }; 281 282 static const u16 sqrt_evenadjust[] = { 283 0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e, 284 0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002 285 }; 286 287 u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand) 288 { 289 int index; 290 u32 z, a; 291 292 if ((significand & 0xc0000000) != 0x40000000) { 293 pr_warn("VFP: estimate_sqrt: invalid significand\n"); 294 } 295 296 a = significand << 1; 297 index = (a >> 27) & 15; 298 if (exponent & 1) { 299 z = 0x4000 + (a >> 17) - sqrt_oddadjust[index]; 300 z = ((a / z) << 14) + (z << 15); 301 a >>= 1; 302 } else { 303 z = 0x8000 + (a >> 17) - sqrt_evenadjust[index]; 304 z = a / z + z; 305 z = (z >= 0x20000) ? 0xffff8000 : (z << 15); 306 if (z <= a) 307 return (s32)a >> 1; 308 } 309 { 310 u64 v = (u64)a << 31; 311 do_div(v, z); 312 return v + (z >> 1); 313 } 314 } 315 316 static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr) 317 { 318 struct vfp_single vsm, vsd; 319 int ret, tm; 320 321 vfp_single_unpack(&vsm, m); 322 tm = vfp_single_type(&vsm); 323 if (tm & (VFP_NAN|VFP_INFINITY)) { 324 struct vfp_single *vsp = &vsd; 325 326 if (tm & VFP_NAN) 327 ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr); 328 else if (vsm.sign == 0) { 329 sqrt_copy: 330 vsp = &vsm; 331 ret = 0; 332 } else { 333 sqrt_invalid: 334 vsp = &vfp_single_default_qnan; 335 ret = FPSCR_IOC; 336 } 337 vfp_put_float(vfp_single_pack(vsp), sd); 338 return ret; 339 } 340 341 /* 342 * sqrt(+/- 0) == +/- 0 343 */ 344 if (tm & VFP_ZERO) 345 goto sqrt_copy; 346 347 /* 348 * Normalise a denormalised number 349 */ 350 if (tm & VFP_DENORMAL) 351 vfp_single_normalise_denormal(&vsm); 352 353 /* 354 * sqrt(<0) = invalid 355 */ 356 if (vsm.sign) 357 goto sqrt_invalid; 358 359 vfp_single_dump("sqrt", &vsm); 360 361 /* 362 * Estimate the square root. 363 */ 364 vsd.sign = 0; 365 vsd.exponent = ((vsm.exponent - 127) >> 1) + 127; 366 vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2; 367 368 vfp_single_dump("sqrt estimate", &vsd); 369 370 /* 371 * And now adjust. 372 */ 373 if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) { 374 if (vsd.significand < 2) { 375 vsd.significand = 0xffffffff; 376 } else { 377 u64 term; 378 s64 rem; 379 vsm.significand <<= !(vsm.exponent & 1); 380 term = (u64)vsd.significand * vsd.significand; 381 rem = ((u64)vsm.significand << 32) - term; 382 383 pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem); 384 385 while (rem < 0) { 386 vsd.significand -= 1; 387 rem += ((u64)vsd.significand << 1) | 1; 388 } 389 vsd.significand |= rem != 0; 390 } 391 } 392 vsd.significand = vfp_shiftright32jamming(vsd.significand, 1); 393 394 return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt"); 395 } 396 397 /* 398 * Equal := ZC 399 * Less than := N 400 * Greater than := C 401 * Unordered := CV 402 */ 403 static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr) 404 { 405 s32 d; 406 u32 ret = 0; 407 408 d = vfp_get_float(sd); 409 if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) { 410 ret |= FPSCR_C | FPSCR_V; 411 if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) 412 /* 413 * Signalling NaN, or signalling on quiet NaN 414 */ 415 ret |= FPSCR_IOC; 416 } 417 418 if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) { 419 ret |= FPSCR_C | FPSCR_V; 420 if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) 421 /* 422 * Signalling NaN, or signalling on quiet NaN 423 */ 424 ret |= FPSCR_IOC; 425 } 426 427 if (ret == 0) { 428 if (d == m || vfp_single_packed_abs(d | m) == 0) { 429 /* 430 * equal 431 */ 432 ret |= FPSCR_Z | FPSCR_C; 433 } else if (vfp_single_packed_sign(d ^ m)) { 434 /* 435 * different signs 436 */ 437 if (vfp_single_packed_sign(d)) 438 /* 439 * d is negative, so d < m 440 */ 441 ret |= FPSCR_N; 442 else 443 /* 444 * d is positive, so d > m 445 */ 446 ret |= FPSCR_C; 447 } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) { 448 /* 449 * d < m 450 */ 451 ret |= FPSCR_N; 452 } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) { 453 /* 454 * d > m 455 */ 456 ret |= FPSCR_C; 457 } 458 } 459 return ret; 460 } 461 462 static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr) 463 { 464 return vfp_compare(sd, 0, m, fpscr); 465 } 466 467 static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr) 468 { 469 return vfp_compare(sd, 1, m, fpscr); 470 } 471 472 static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr) 473 { 474 return vfp_compare(sd, 0, 0, fpscr); 475 } 476 477 static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr) 478 { 479 return vfp_compare(sd, 1, 0, fpscr); 480 } 481 482 static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr) 483 { 484 struct vfp_single vsm; 485 struct vfp_double vdd; 486 int tm; 487 u32 exceptions = 0; 488 489 vfp_single_unpack(&vsm, m); 490 491 tm = vfp_single_type(&vsm); 492 493 /* 494 * If we have a signalling NaN, signal invalid operation. 495 */ 496 if (tm == VFP_SNAN) 497 exceptions = FPSCR_IOC; 498 499 if (tm & VFP_DENORMAL) 500 vfp_single_normalise_denormal(&vsm); 501 502 vdd.sign = vsm.sign; 503 vdd.significand = (u64)vsm.significand << 32; 504 505 /* 506 * If we have an infinity or NaN, the exponent must be 2047. 507 */ 508 if (tm & (VFP_INFINITY|VFP_NAN)) { 509 vdd.exponent = 2047; 510 if (tm == VFP_QNAN) 511 vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; 512 goto pack_nan; 513 } else if (tm & VFP_ZERO) 514 vdd.exponent = 0; 515 else 516 vdd.exponent = vsm.exponent + (1023 - 127); 517 518 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd"); 519 520 pack_nan: 521 vfp_put_double(vfp_double_pack(&vdd), dd); 522 return exceptions; 523 } 524 525 static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr) 526 { 527 struct vfp_single vs; 528 529 vs.sign = 0; 530 vs.exponent = 127 + 31 - 1; 531 vs.significand = (u32)m; 532 533 return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito"); 534 } 535 536 static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr) 537 { 538 struct vfp_single vs; 539 540 vs.sign = (m & 0x80000000) >> 16; 541 vs.exponent = 127 + 31 - 1; 542 vs.significand = vs.sign ? -m : m; 543 544 return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito"); 545 } 546 547 static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr) 548 { 549 struct vfp_single vsm; 550 u32 d, exceptions = 0; 551 int rmode = fpscr & FPSCR_RMODE_MASK; 552 int tm; 553 554 vfp_single_unpack(&vsm, m); 555 vfp_single_dump("VSM", &vsm); 556 557 /* 558 * Do we have a denormalised number? 559 */ 560 tm = vfp_single_type(&vsm); 561 if (tm & VFP_DENORMAL) 562 exceptions |= FPSCR_IDC; 563 564 if (tm & VFP_NAN) 565 vsm.sign = 0; 566 567 if (vsm.exponent >= 127 + 32) { 568 d = vsm.sign ? 0 : 0xffffffff; 569 exceptions = FPSCR_IOC; 570 } else if (vsm.exponent >= 127 - 1) { 571 int shift = 127 + 31 - vsm.exponent; 572 u32 rem, incr = 0; 573 574 /* 575 * 2^0 <= m < 2^32-2^8 576 */ 577 d = (vsm.significand << 1) >> shift; 578 rem = vsm.significand << (33 - shift); 579 580 if (rmode == FPSCR_ROUND_NEAREST) { 581 incr = 0x80000000; 582 if ((d & 1) == 0) 583 incr -= 1; 584 } else if (rmode == FPSCR_ROUND_TOZERO) { 585 incr = 0; 586 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { 587 incr = ~0; 588 } 589 590 if ((rem + incr) < rem) { 591 if (d < 0xffffffff) 592 d += 1; 593 else 594 exceptions |= FPSCR_IOC; 595 } 596 597 if (d && vsm.sign) { 598 d = 0; 599 exceptions |= FPSCR_IOC; 600 } else if (rem) 601 exceptions |= FPSCR_IXC; 602 } else { 603 d = 0; 604 if (vsm.exponent | vsm.significand) { 605 exceptions |= FPSCR_IXC; 606 if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) 607 d = 1; 608 else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) { 609 d = 0; 610 exceptions |= FPSCR_IOC; 611 } 612 } 613 } 614 615 pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 616 617 vfp_put_float(d, sd); 618 619 return exceptions; 620 } 621 622 static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr) 623 { 624 return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO); 625 } 626 627 static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr) 628 { 629 struct vfp_single vsm; 630 u32 d, exceptions = 0; 631 int rmode = fpscr & FPSCR_RMODE_MASK; 632 int tm; 633 634 vfp_single_unpack(&vsm, m); 635 vfp_single_dump("VSM", &vsm); 636 637 /* 638 * Do we have a denormalised number? 639 */ 640 tm = vfp_single_type(&vsm); 641 if (vfp_single_type(&vsm) & VFP_DENORMAL) 642 exceptions |= FPSCR_IDC; 643 644 if (tm & VFP_NAN) { 645 d = 0; 646 exceptions |= FPSCR_IOC; 647 } else if (vsm.exponent >= 127 + 32) { 648 /* 649 * m >= 2^31-2^7: invalid 650 */ 651 d = 0x7fffffff; 652 if (vsm.sign) 653 d = ~d; 654 exceptions |= FPSCR_IOC; 655 } else if (vsm.exponent >= 127 - 1) { 656 int shift = 127 + 31 - vsm.exponent; 657 u32 rem, incr = 0; 658 659 /* 2^0 <= m <= 2^31-2^7 */ 660 d = (vsm.significand << 1) >> shift; 661 rem = vsm.significand << (33 - shift); 662 663 if (rmode == FPSCR_ROUND_NEAREST) { 664 incr = 0x80000000; 665 if ((d & 1) == 0) 666 incr -= 1; 667 } else if (rmode == FPSCR_ROUND_TOZERO) { 668 incr = 0; 669 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { 670 incr = ~0; 671 } 672 673 if ((rem + incr) < rem && d < 0xffffffff) 674 d += 1; 675 if (d > 0x7fffffff + (vsm.sign != 0)) { 676 d = 0x7fffffff + (vsm.sign != 0); 677 exceptions |= FPSCR_IOC; 678 } else if (rem) 679 exceptions |= FPSCR_IXC; 680 681 if (vsm.sign) 682 d = -d; 683 } else { 684 d = 0; 685 if (vsm.exponent | vsm.significand) { 686 exceptions |= FPSCR_IXC; 687 if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) 688 d = 1; 689 else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) 690 d = -1; 691 } 692 } 693 694 pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 695 696 vfp_put_float((s32)d, sd); 697 698 return exceptions; 699 } 700 701 static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr) 702 { 703 return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO); 704 } 705 706 static struct op fops_ext[32] = { 707 [FEXT_TO_IDX(FEXT_FCPY)] = { vfp_single_fcpy, 0 }, 708 [FEXT_TO_IDX(FEXT_FABS)] = { vfp_single_fabs, 0 }, 709 [FEXT_TO_IDX(FEXT_FNEG)] = { vfp_single_fneg, 0 }, 710 [FEXT_TO_IDX(FEXT_FSQRT)] = { vfp_single_fsqrt, 0 }, 711 [FEXT_TO_IDX(FEXT_FCMP)] = { vfp_single_fcmp, OP_SCALAR }, 712 [FEXT_TO_IDX(FEXT_FCMPE)] = { vfp_single_fcmpe, OP_SCALAR }, 713 [FEXT_TO_IDX(FEXT_FCMPZ)] = { vfp_single_fcmpz, OP_SCALAR }, 714 [FEXT_TO_IDX(FEXT_FCMPEZ)] = { vfp_single_fcmpez, OP_SCALAR }, 715 [FEXT_TO_IDX(FEXT_FCVT)] = { vfp_single_fcvtd, OP_SCALAR|OP_DD }, 716 [FEXT_TO_IDX(FEXT_FUITO)] = { vfp_single_fuito, OP_SCALAR }, 717 [FEXT_TO_IDX(FEXT_FSITO)] = { vfp_single_fsito, OP_SCALAR }, 718 [FEXT_TO_IDX(FEXT_FTOUI)] = { vfp_single_ftoui, OP_SCALAR }, 719 [FEXT_TO_IDX(FEXT_FTOUIZ)] = { vfp_single_ftouiz, OP_SCALAR }, 720 [FEXT_TO_IDX(FEXT_FTOSI)] = { vfp_single_ftosi, OP_SCALAR }, 721 [FEXT_TO_IDX(FEXT_FTOSIZ)] = { vfp_single_ftosiz, OP_SCALAR }, 722 }; 723 724 725 726 727 728 static u32 729 vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn, 730 struct vfp_single *vsm, u32 fpscr) 731 { 732 struct vfp_single *vsp; 733 u32 exceptions = 0; 734 int tn, tm; 735 736 tn = vfp_single_type(vsn); 737 tm = vfp_single_type(vsm); 738 739 if (tn & tm & VFP_INFINITY) { 740 /* 741 * Two infinities. Are they different signs? 742 */ 743 if (vsn->sign ^ vsm->sign) { 744 /* 745 * different signs -> invalid 746 */ 747 exceptions = FPSCR_IOC; 748 vsp = &vfp_single_default_qnan; 749 } else { 750 /* 751 * same signs -> valid 752 */ 753 vsp = vsn; 754 } 755 } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { 756 /* 757 * One infinity and one number -> infinity 758 */ 759 vsp = vsn; 760 } else { 761 /* 762 * 'n' is a NaN of some type 763 */ 764 return vfp_propagate_nan(vsd, vsn, vsm, fpscr); 765 } 766 *vsd = *vsp; 767 return exceptions; 768 } 769 770 static u32 771 vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn, 772 struct vfp_single *vsm, u32 fpscr) 773 { 774 u32 exp_diff, m_sig; 775 776 if (vsn->significand & 0x80000000 || 777 vsm->significand & 0x80000000) { 778 pr_info("VFP: bad FP values in %s\n", __func__); 779 vfp_single_dump("VSN", vsn); 780 vfp_single_dump("VSM", vsm); 781 } 782 783 /* 784 * Ensure that 'n' is the largest magnitude number. Note that 785 * if 'n' and 'm' have equal exponents, we do not swap them. 786 * This ensures that NaN propagation works correctly. 787 */ 788 if (vsn->exponent < vsm->exponent) { 789 struct vfp_single *t = vsn; 790 vsn = vsm; 791 vsm = t; 792 } 793 794 /* 795 * Is 'n' an infinity or a NaN? Note that 'm' may be a number, 796 * infinity or a NaN here. 797 */ 798 if (vsn->exponent == 255) 799 return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr); 800 801 /* 802 * We have two proper numbers, where 'vsn' is the larger magnitude. 803 * 804 * Copy 'n' to 'd' before doing the arithmetic. 805 */ 806 *vsd = *vsn; 807 808 /* 809 * Align both numbers. 810 */ 811 exp_diff = vsn->exponent - vsm->exponent; 812 m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff); 813 814 /* 815 * If the signs are different, we are really subtracting. 816 */ 817 if (vsn->sign ^ vsm->sign) { 818 m_sig = vsn->significand - m_sig; 819 if ((s32)m_sig < 0) { 820 vsd->sign = vfp_sign_negate(vsd->sign); 821 m_sig = -m_sig; 822 } else if (m_sig == 0) { 823 vsd->sign = (fpscr & FPSCR_RMODE_MASK) == 824 FPSCR_ROUND_MINUSINF ? 0x8000 : 0; 825 } 826 } else { 827 m_sig = vsn->significand + m_sig; 828 } 829 vsd->significand = m_sig; 830 831 return 0; 832 } 833 834 static u32 835 vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr) 836 { 837 vfp_single_dump("VSN", vsn); 838 vfp_single_dump("VSM", vsm); 839 840 /* 841 * Ensure that 'n' is the largest magnitude number. Note that 842 * if 'n' and 'm' have equal exponents, we do not swap them. 843 * This ensures that NaN propagation works correctly. 844 */ 845 if (vsn->exponent < vsm->exponent) { 846 struct vfp_single *t = vsn; 847 vsn = vsm; 848 vsm = t; 849 pr_debug("VFP: swapping M <-> N\n"); 850 } 851 852 vsd->sign = vsn->sign ^ vsm->sign; 853 854 /* 855 * If 'n' is an infinity or NaN, handle it. 'm' may be anything. 856 */ 857 if (vsn->exponent == 255) { 858 if (vsn->significand || (vsm->exponent == 255 && vsm->significand)) 859 return vfp_propagate_nan(vsd, vsn, vsm, fpscr); 860 if ((vsm->exponent | vsm->significand) == 0) { 861 *vsd = vfp_single_default_qnan; 862 return FPSCR_IOC; 863 } 864 vsd->exponent = vsn->exponent; 865 vsd->significand = 0; 866 return 0; 867 } 868 869 /* 870 * If 'm' is zero, the result is always zero. In this case, 871 * 'n' may be zero or a number, but it doesn't matter which. 872 */ 873 if ((vsm->exponent | vsm->significand) == 0) { 874 vsd->exponent = 0; 875 vsd->significand = 0; 876 return 0; 877 } 878 879 /* 880 * We add 2 to the destination exponent for the same reason as 881 * the addition case - though this time we have +1 from each 882 * input operand. 883 */ 884 vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2; 885 vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand); 886 887 vfp_single_dump("VSD", vsd); 888 return 0; 889 } 890 891 #define NEG_MULTIPLY (1 << 0) 892 #define NEG_SUBTRACT (1 << 1) 893 894 static u32 895 vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func) 896 { 897 struct vfp_single vsd, vsp, vsn, vsm; 898 u32 exceptions; 899 s32 v; 900 901 v = vfp_get_float(sn); 902 pr_debug("VFP: s%u = %08x\n", sn, v); 903 vfp_single_unpack(&vsn, v); 904 if (vsn.exponent == 0 && vsn.significand) 905 vfp_single_normalise_denormal(&vsn); 906 907 vfp_single_unpack(&vsm, m); 908 if (vsm.exponent == 0 && vsm.significand) 909 vfp_single_normalise_denormal(&vsm); 910 911 exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr); 912 if (negate & NEG_MULTIPLY) 913 vsp.sign = vfp_sign_negate(vsp.sign); 914 915 v = vfp_get_float(sd); 916 pr_debug("VFP: s%u = %08x\n", sd, v); 917 vfp_single_unpack(&vsn, v); 918 if (vsn.exponent == 0 && vsn.significand) 919 vfp_single_normalise_denormal(&vsn); 920 if (negate & NEG_SUBTRACT) 921 vsn.sign = vfp_sign_negate(vsn.sign); 922 923 exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr); 924 925 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func); 926 } 927 928 /* 929 * Standard operations 930 */ 931 932 /* 933 * sd = sd + (sn * sm) 934 */ 935 static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr) 936 { 937 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac"); 938 } 939 940 /* 941 * sd = sd - (sn * sm) 942 */ 943 static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr) 944 { 945 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac"); 946 } 947 948 /* 949 * sd = -sd + (sn * sm) 950 */ 951 static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr) 952 { 953 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc"); 954 } 955 956 /* 957 * sd = -sd - (sn * sm) 958 */ 959 static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr) 960 { 961 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); 962 } 963 964 /* 965 * sd = sn * sm 966 */ 967 static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr) 968 { 969 struct vfp_single vsd, vsn, vsm; 970 u32 exceptions; 971 s32 n = vfp_get_float(sn); 972 973 pr_debug("VFP: s%u = %08x\n", sn, n); 974 975 vfp_single_unpack(&vsn, n); 976 if (vsn.exponent == 0 && vsn.significand) 977 vfp_single_normalise_denormal(&vsn); 978 979 vfp_single_unpack(&vsm, m); 980 if (vsm.exponent == 0 && vsm.significand) 981 vfp_single_normalise_denormal(&vsm); 982 983 exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); 984 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul"); 985 } 986 987 /* 988 * sd = -(sn * sm) 989 */ 990 static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr) 991 { 992 struct vfp_single vsd, vsn, vsm; 993 u32 exceptions; 994 s32 n = vfp_get_float(sn); 995 996 pr_debug("VFP: s%u = %08x\n", sn, n); 997 998 vfp_single_unpack(&vsn, n); 999 if (vsn.exponent == 0 && vsn.significand) 1000 vfp_single_normalise_denormal(&vsn); 1001 1002 vfp_single_unpack(&vsm, m); 1003 if (vsm.exponent == 0 && vsm.significand) 1004 vfp_single_normalise_denormal(&vsm); 1005 1006 exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); 1007 vsd.sign = vfp_sign_negate(vsd.sign); 1008 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul"); 1009 } 1010 1011 /* 1012 * sd = sn + sm 1013 */ 1014 static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr) 1015 { 1016 struct vfp_single vsd, vsn, vsm; 1017 u32 exceptions; 1018 s32 n = vfp_get_float(sn); 1019 1020 pr_debug("VFP: s%u = %08x\n", sn, n); 1021 1022 /* 1023 * Unpack and normalise denormals. 1024 */ 1025 vfp_single_unpack(&vsn, n); 1026 if (vsn.exponent == 0 && vsn.significand) 1027 vfp_single_normalise_denormal(&vsn); 1028 1029 vfp_single_unpack(&vsm, m); 1030 if (vsm.exponent == 0 && vsm.significand) 1031 vfp_single_normalise_denormal(&vsm); 1032 1033 exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr); 1034 1035 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd"); 1036 } 1037 1038 /* 1039 * sd = sn - sm 1040 */ 1041 static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr) 1042 { 1043 /* 1044 * Subtraction is addition with one sign inverted. 1045 */ 1046 return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr); 1047 } 1048 1049 /* 1050 * sd = sn / sm 1051 */ 1052 static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr) 1053 { 1054 struct vfp_single vsd, vsn, vsm; 1055 u32 exceptions = 0; 1056 s32 n = vfp_get_float(sn); 1057 int tm, tn; 1058 1059 pr_debug("VFP: s%u = %08x\n", sn, n); 1060 1061 vfp_single_unpack(&vsn, n); 1062 vfp_single_unpack(&vsm, m); 1063 1064 vsd.sign = vsn.sign ^ vsm.sign; 1065 1066 tn = vfp_single_type(&vsn); 1067 tm = vfp_single_type(&vsm); 1068 1069 /* 1070 * Is n a NAN? 1071 */ 1072 if (tn & VFP_NAN) 1073 goto vsn_nan; 1074 1075 /* 1076 * Is m a NAN? 1077 */ 1078 if (tm & VFP_NAN) 1079 goto vsm_nan; 1080 1081 /* 1082 * If n and m are infinity, the result is invalid 1083 * If n and m are zero, the result is invalid 1084 */ 1085 if (tm & tn & (VFP_INFINITY|VFP_ZERO)) 1086 goto invalid; 1087 1088 /* 1089 * If n is infinity, the result is infinity 1090 */ 1091 if (tn & VFP_INFINITY) 1092 goto infinity; 1093 1094 /* 1095 * If m is zero, raise div0 exception 1096 */ 1097 if (tm & VFP_ZERO) 1098 goto divzero; 1099 1100 /* 1101 * If m is infinity, or n is zero, the result is zero 1102 */ 1103 if (tm & VFP_INFINITY || tn & VFP_ZERO) 1104 goto zero; 1105 1106 if (tn & VFP_DENORMAL) 1107 vfp_single_normalise_denormal(&vsn); 1108 if (tm & VFP_DENORMAL) 1109 vfp_single_normalise_denormal(&vsm); 1110 1111 /* 1112 * Ok, we have two numbers, we can perform division. 1113 */ 1114 vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1; 1115 vsm.significand <<= 1; 1116 if (vsm.significand <= (2 * vsn.significand)) { 1117 vsn.significand >>= 1; 1118 vsd.exponent++; 1119 } 1120 { 1121 u64 significand = (u64)vsn.significand << 32; 1122 do_div(significand, vsm.significand); 1123 vsd.significand = significand; 1124 } 1125 if ((vsd.significand & 0x3f) == 0) 1126 vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32); 1127 1128 return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv"); 1129 1130 vsn_nan: 1131 exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr); 1132 pack: 1133 vfp_put_float(vfp_single_pack(&vsd), sd); 1134 return exceptions; 1135 1136 vsm_nan: 1137 exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr); 1138 goto pack; 1139 1140 zero: 1141 vsd.exponent = 0; 1142 vsd.significand = 0; 1143 goto pack; 1144 1145 divzero: 1146 exceptions = FPSCR_DZC; 1147 infinity: 1148 vsd.exponent = 255; 1149 vsd.significand = 0; 1150 goto pack; 1151 1152 invalid: 1153 vfp_put_float(vfp_single_pack(&vfp_single_default_qnan), sd); 1154 return FPSCR_IOC; 1155 } 1156 1157 static struct op fops[16] = { 1158 [FOP_TO_IDX(FOP_FMAC)] = { vfp_single_fmac, 0 }, 1159 [FOP_TO_IDX(FOP_FNMAC)] = { vfp_single_fnmac, 0 }, 1160 [FOP_TO_IDX(FOP_FMSC)] = { vfp_single_fmsc, 0 }, 1161 [FOP_TO_IDX(FOP_FNMSC)] = { vfp_single_fnmsc, 0 }, 1162 [FOP_TO_IDX(FOP_FMUL)] = { vfp_single_fmul, 0 }, 1163 [FOP_TO_IDX(FOP_FNMUL)] = { vfp_single_fnmul, 0 }, 1164 [FOP_TO_IDX(FOP_FADD)] = { vfp_single_fadd, 0 }, 1165 [FOP_TO_IDX(FOP_FSUB)] = { vfp_single_fsub, 0 }, 1166 [FOP_TO_IDX(FOP_FDIV)] = { vfp_single_fdiv, 0 }, 1167 }; 1168 1169 #define FREG_BANK(x) ((x) & 0x18) 1170 #define FREG_IDX(x) ((x) & 7) 1171 1172 u32 vfp_single_cpdo(u32 inst, u32 fpscr) 1173 { 1174 u32 op = inst & FOP_MASK; 1175 u32 exceptions = 0; 1176 unsigned int dest; 1177 unsigned int sn = vfp_get_sn(inst); 1178 unsigned int sm = vfp_get_sm(inst); 1179 unsigned int vecitr, veclen, vecstride; 1180 struct op *fop; 1181 1182 vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK); 1183 1184 fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)]; 1185 1186 /* 1187 * fcvtsd takes a dN register number as destination, not sN. 1188 * Technically, if bit 0 of dd is set, this is an invalid 1189 * instruction. However, we ignore this for efficiency. 1190 * It also only operates on scalars. 1191 */ 1192 if (fop->flags & OP_DD) 1193 dest = vfp_get_dd(inst); 1194 else 1195 dest = vfp_get_sd(inst); 1196 1197 /* 1198 * If destination bank is zero, vector length is always '1'. 1199 * ARM DDI0100F C5.1.3, C5.3.2. 1200 */ 1201 if ((fop->flags & OP_SCALAR) || FREG_BANK(dest) == 0) 1202 veclen = 0; 1203 else 1204 veclen = fpscr & FPSCR_LENGTH_MASK; 1205 1206 pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, 1207 (veclen >> FPSCR_LENGTH_BIT) + 1); 1208 1209 if (!fop->fn) 1210 goto invalid; 1211 1212 for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { 1213 s32 m = vfp_get_float(sm); 1214 u32 except; 1215 char type; 1216 1217 type = fop->flags & OP_DD ? 'd' : 's'; 1218 if (op == FOP_EXT) 1219 pr_debug("VFP: itr%d (%c%u) = op[%u] (s%u=%08x)\n", 1220 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn, 1221 sm, m); 1222 else 1223 pr_debug("VFP: itr%d (%c%u) = (s%u) op[%u] (s%u=%08x)\n", 1224 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn, 1225 FOP_TO_IDX(op), sm, m); 1226 1227 except = fop->fn(dest, sn, m, fpscr); 1228 pr_debug("VFP: itr%d: exceptions=%08x\n", 1229 vecitr >> FPSCR_LENGTH_BIT, except); 1230 1231 exceptions |= except; 1232 1233 /* 1234 * CHECK: It appears to be undefined whether we stop when 1235 * we encounter an exception. We continue. 1236 */ 1237 dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 7); 1238 sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7); 1239 if (FREG_BANK(sm) != 0) 1240 sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7); 1241 } 1242 return exceptions; 1243 1244 invalid: 1245 return (u32)-1; 1246 } 1247
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.