1 | 2 | satan.sa 3.3 12/19/90 3 | 4 | The entry point satan computes the arc 5 | input value. satand does the same exce 6 | denormalized number. 7 | 8 | Input: Double-extended value in memory 9 | register a0. 10 | 11 | Output: Arctan(X) returned in floating 12 | 13 | Accuracy and Monotonicity: The returne 14 | 64 significant bit, i.e. withi 15 | result is subsequently rounded 16 | result is provably monotonic i 17 | 18 | Speed: The program satan takes approxi 19 | argument X such that 1/16 < |X 20 | the program will run no worse 21 | 22 | Algorithm: 23 | Step 1. If |X| >= 16 or |X| < 1/16, go 24 | 25 | Step 2. Let X = sgn * 2**k * 1.xxxxxxx 26 | Define F = sgn * 2**k * 1.xxxx 27 | of X with a bit-1 attached at 28 | to be u = (X-F) / (1 + X*F). 29 | 30 | Step 3. Approximate arctan(u) by a pol 31 | 32 | Step 4. Return arctan(F) + poly, arcta 33 | calculated beforehand. Exit. 34 | 35 | Step 5. If |X| >= 16, go to Step 7. 36 | 37 | Step 6. Approximate arctan(X) by an od 38 | 39 | Step 7. Define X' = -1/X. Approximate 40 | Arctan(X) = sign(X)*Pi/2 + arc 41 | 42 43 | Copyright (C) Motorola, Inc. 1 44 | All Rights Reserved 45 | 46 | For details on the license for this fi 47 | file, README, in this same directory. 48 49 |satan idnt 2,1 | Motorola 040 Floating Po 50 51 |section 8 52 53 #include "fpsp.h" 54 55 BOUNDS1: .long 0x3FFB8000,0x4002FFFF 56 57 ONE: .long 0x3F800000 58 59 .long 0x00000000 60 61 ATANA3: .long 0xBFF6687E,0x314987D8 62 ATANA2: .long 0x4002AC69,0x34A26DB3 63 64 ATANA1: .long 0xBFC2476F,0x4E1DA28E 65 ATANB6: .long 0x3FB34444,0x7F876989 66 67 ATANB5: .long 0xBFB744EE,0x7FAF45DB 68 ATANB4: .long 0x3FBC71C6,0x46940220 69 70 ATANB3: .long 0xBFC24924,0x921872F9 71 ATANB2: .long 0x3FC99999,0x99998FA9 72 73 ATANB1: .long 0xBFD55555,0x55555555 74 ATANC5: .long 0xBFB70BF3,0x98539E6A 75 76 ATANC4: .long 0x3FBC7187,0x962D1D7D 77 ATANC3: .long 0xBFC24924,0x827107B8 78 79 ATANC2: .long 0x3FC99999,0x9996263E 80 ATANC1: .long 0xBFD55555,0x55555536 81 82 PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235 83 NPIBY2: .long 0xBFFF0000,0xC90FDAA2,0x2168C235 84 PTINY: .long 0x00010000,0x80000000,0x00000000 85 NTINY: .long 0x80010000,0x80000000,0x00000000 86 87 ATANTBL: 88 .long 0x3FFB0000,0x83D152C5,0x060B7A 89 .long 0x3FFB0000,0x8BC85445,0x65498B 90 .long 0x3FFB0000,0x93BE4060,0x17626B 91 .long 0x3FFB0000,0x9BB3078D,0x35AEC2 92 .long 0x3FFB0000,0xA3A69A52,0x5DDCE7 93 .long 0x3FFB0000,0xAB98E943,0x627656 94 .long 0x3FFB0000,0xB389E502,0xF9C598 95 .long 0x3FFB0000,0xBB797E43,0x6B09E6 96 .long 0x3FFB0000,0xC367A5C7,0x39E5F4 97 .long 0x3FFB0000,0xCB544C61,0xCFF7D5 98 .long 0x3FFB0000,0xD33F62F8,0x248853 99 .long 0x3FFB0000,0xDB28DA81,0x62404C 100 .long 0x3FFB0000,0xE310A407,0x8AD34F 101 .long 0x3FFB0000,0xEAF6B0A8,0x188EE1 102 .long 0x3FFB0000,0xF2DAF194,0x9DBE79 103 .long 0x3FFB0000,0xFABD5813,0x61D47E 104 .long 0x3FFC0000,0x8346AC21,0x0959EC 105 .long 0x3FFC0000,0x8B232A08,0x304282 106 .long 0x3FFC0000,0x92FB70B8,0xD29AE2 107 .long 0x3FFC0000,0x9ACF476F,0x5CCD1C 108 .long 0x3FFC0000,0xA29E7630,0x4954F2 109 .long 0x3FFC0000,0xAA68C5D0,0x8AB852 110 .long 0x3FFC0000,0xB22DFFFD,0x9D539F 111 .long 0x3FFC0000,0xB9EDEF45,0x3E900E 112 .long 0x3FFC0000,0xC1A85F1C,0xC75E3E 113 .long 0x3FFC0000,0xC95D1BE8,0x28138D 114 .long 0x3FFC0000,0xD10BF300,0x840D2D 115 .long 0x3FFC0000,0xD8B4B2BA,0x6BC05E 116 .long 0x3FFC0000,0xE0572A6B,0xB42335 117 .long 0x3FFC0000,0xE7F32A70,0xEA9CAA 118 .long 0x3FFC0000,0xEF888432,0x64ECEF 119 .long 0x3FFC0000,0xF7170A28,0xECC066 120 .long 0x3FFD0000,0x812FD288,0x332DAD 121 .long 0x3FFD0000,0x88A8D1B1,0x218E4D 122 .long 0x3FFD0000,0x9012AB3F,0x23E4AE 123 .long 0x3FFD0000,0x976CC3D4,0x11E7F1 124 .long 0x3FFD0000,0x9EB68949,0x3889A2 125 .long 0x3FFD0000,0xA5EF72C3,0x448736 126 .long 0x3FFD0000,0xAD1700BA,0xF07A72 127 .long 0x3FFD0000,0xB42CBCFA,0xFD37EF 128 .long 0x3FFD0000,0xBB303A94,0x0BA80F 129 .long 0x3FFD0000,0xC22115C6,0xFCAEBB 130 .long 0x3FFD0000,0xC8FEF3E6,0x863312 131 .long 0x3FFD0000,0xCFC98330,0xB4000C 132 .long 0x3FFD0000,0xD6807AA1,0x102C5B 133 .long 0x3FFD0000,0xDD2399BC,0x31252A 134 .long 0x3FFD0000,0xE3B2A855,0x6B8FC5 135 .long 0x3FFD0000,0xEA2D764F,0x643159 136 .long 0x3FFD0000,0xF3BF5BF8,0xBAD1A2 137 .long 0x3FFE0000,0x801CE39E,0x0D205C 138 .long 0x3FFE0000,0x8630A2DA,0xDA1ED0 139 .long 0x3FFE0000,0x8C1AD445,0xF3E09B 140 .long 0x3FFE0000,0x91DB8F16,0x64F350 141 .long 0x3FFE0000,0x97731420,0x365E53 142 .long 0x3FFE0000,0x9CE1C8E6,0xA0B8CD 143 .long 0x3FFE0000,0xA22832DB,0xCADAAE 144 .long 0x3FFE0000,0xA746F2DD,0xB76022 145 .long 0x3FFE0000,0xAC3EC0FB,0x997DD6 146 .long 0x3FFE0000,0xB110688A,0xEBDC6F 147 .long 0x3FFE0000,0xB5BCC490,0x59ECC4 148 .long 0x3FFE0000,0xBA44BC7D,0xD47078 149 .long 0x3FFE0000,0xBEA94144,0xFD049A 150 .long 0x3FFE0000,0xC2EB4ABB,0x661628 151 .long 0x3FFE0000,0xC70BD54C,0xE602EE 152 .long 0x3FFE0000,0xCD000549,0xADEC71 153 .long 0x3FFE0000,0xD48457D2,0xD8EA4E 154 .long 0x3FFE0000,0xDB948DA7,0x12DECE 155 .long 0x3FFE0000,0xE23855F9,0x69E809 156 .long 0x3FFE0000,0xE8771129,0xC43532 157 .long 0x3FFE0000,0xEE57C16E,0x0D379C 158 .long 0x3FFE0000,0xF3E10211,0xA87C37 159 .long 0x3FFE0000,0xF919039D,0x758B8D 160 .long 0x3FFE0000,0xFE058B8F,0x64935F 161 .long 0x3FFF0000,0x8155FB49,0x7B685D 162 .long 0x3FFF0000,0x83889E35,0x49D108 163 .long 0x3FFF0000,0x859CFA76,0x511D72 164 .long 0x3FFF0000,0x87952ECF,0xFF8131 165 .long 0x3FFF0000,0x89732FD1,0x955764 166 .long 0x3FFF0000,0x8B38CAD1,0x01932A 167 .long 0x3FFF0000,0x8CE7A8D8,0x301EE6 168 .long 0x3FFF0000,0x8F46A39E,0x2EAE52 169 .long 0x3FFF0000,0x922DA7D7,0x918884 170 .long 0x3FFF0000,0x94D19FCB,0xDEDF52 171 .long 0x3FFF0000,0x973AB944,0x19D2A0 172 .long 0x3FFF0000,0x996FF00E,0x08E10B 173 .long 0x3FFF0000,0x9B773F95,0x12321D 174 .long 0x3FFF0000,0x9D55CC32,0x0F9356 175 .long 0x3FFF0000,0x9F100575,0x006CC5 176 .long 0x3FFF0000,0xA0A9C290,0xD97CC0 177 .long 0x3FFF0000,0xA22659EB,0xEBC063 178 .long 0x3FFF0000,0xA388B4AF,0xF6EF0E 179 .long 0x3FFF0000,0xA4D35F10,0x61D292 180 .long 0x3FFF0000,0xA60895DC,0xFBE318 181 .long 0x3FFF0000,0xA72A51DC,0x7367BE 182 .long 0x3FFF0000,0xA83A5153,0x095616 183 .long 0x3FFF0000,0xA93A2007,0x753954 184 .long 0x3FFF0000,0xAA9E7245,0x023B26 185 .long 0x3FFF0000,0xAC4C84BA,0x6FE4D5 186 .long 0x3FFF0000,0xADCE4A4A,0x606B97 187 .long 0x3FFF0000,0xAF2A2DCD,0x8D263C 188 .long 0x3FFF0000,0xB0656F81,0xF22265 189 .long 0x3FFF0000,0xB1846515,0x0F7149 190 .long 0x3FFF0000,0xB28AAA15,0x6F9ADA 191 .long 0x3FFF0000,0xB37B44FF,0x3766B8 192 .long 0x3FFF0000,0xB458C3DC,0xE96304 193 .long 0x3FFF0000,0xB525529D,0x562246 194 .long 0x3FFF0000,0xB5E2CCA9,0x5F9D88 195 .long 0x3FFF0000,0xB692CADA,0x7ACA1A 196 .long 0x3FFF0000,0xB736AEA7,0xA69258 197 .long 0x3FFF0000,0xB7CFAB28,0x7E9F7B 198 .long 0x3FFF0000,0xB85ECC66,0xCB2198 199 .long 0x3FFF0000,0xB8E4FD5A,0x20A593 200 .long 0x3FFF0000,0xB99F41F6,0x4AFF9B 201 .long 0x3FFF0000,0xBA7F1E17,0x842BBE 202 .long 0x3FFF0000,0xBB471285,0x7637E1 203 .long 0x3FFF0000,0xBBFABE8A,0x4788DF 204 .long 0x3FFF0000,0xBC9D0FAD,0x2B689D 205 .long 0x3FFF0000,0xBD306A39,0x471ECD 206 .long 0x3FFF0000,0xBDB6C731,0x856AF1 207 .long 0x3FFF0000,0xBE31CAC5,0x02E80D 208 .long 0x3FFF0000,0xBEA2D55C,0xE33194 209 .long 0x3FFF0000,0xBF0B10B7,0xC03128 210 .long 0x3FFF0000,0xBF6B7A18,0xDACB77 211 .long 0x3FFF0000,0xBFC4EA46,0x63FA18 212 .long 0x3FFF0000,0xC0181BDE,0x8B89A4 213 .long 0x3FFF0000,0xC065B066,0xCFBF64 214 .long 0x3FFF0000,0xC0AE345F,0x56340A 215 .long 0x3FFF0000,0xC0F22291,0x9CB9E6 216 217 .set X,FP_SCR1 218 .set XDCARE,X+2 219 .set XFRAC,X+4 220 .set XFRACLO,X+8 221 222 .set ATANF,FP_SCR2 223 .set ATANFHI,ATANF+4 224 .set ATANFLO,ATANF+8 225 226 227 | xref t_frcinx 228 |xref t_extdnrm 229 230 .global satand 231 satand: 232 |--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED AR 233 234 bra t_extdnrm 235 236 .global satan 237 satan: 238 |--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, 239 240 fmovex (%a0),%fp0 | ...L 241 242 movel (%a0),%d0 243 movew 4(%a0),%d0 244 fmovex %fp0,X(%a6) 245 andil #0x7FFFFFFF,%d0 246 247 cmpil #0x3FFB8000,%d0 248 bges ATANOK1 249 bra ATANSM 250 251 ATANOK1: 252 cmpil #0x4002FFFF,%d0 253 bles ATANMAIN 254 bra ATANBIG 255 256 257 |--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE 258 |--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] 259 |--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN 260 |--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN 261 |--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CL 262 |--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE A 263 |--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE 264 |--FETCH F AND SAVING OF REGISTERS CAN BE ALL 265 |--DIVIDE. IN THE END THIS METHOD IS MUCH FAST 266 |--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME 267 |--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONA 268 |--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMI 269 |--WILL INVOLVE A VERY LONG POLYNOMIAL. 270 271 |--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1 272 |--WE CHOSE F TO BE +-2^K * 1.BBBB1 273 |--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 274 |--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3 275 |--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINC 276 |-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F| 277 278 ATANMAIN: 279 280 movew #0x0000,XDCARE(%a6) 281 andil #0xF8000000,XFRAC(%a6) 282 oril #0x04000000,XFRAC(%a6) 283 movel #0x00000000,XFRACLO(%a 284 285 fmovex %fp0,%fp1 286 fmulx X(%a6),%fp1 287 fsubx X(%a6),%fp0 288 fadds #0x3F800000,%fp1 289 fdivx %fp1,%fp0 290 291 |--WHILE THE DIVISION IS TAKING ITS TIME, WE F 292 |--CREATE ATAN(F) AND STORE IT IN ATANF, AND 293 |--SAVE REGISTERS FP2. 294 295 movel %d2,-(%a7) | ...S 296 movel %d0,%d2 | ...T 297 andil #0x00007800,%d0 | ...4 298 andil #0x7FFF0000,%d2 | ...E 299 subil #0x3FFB0000,%d2 | ...K 300 asrl #1,%d2 301 addl %d2,%d0 | ...T 302 asrl #7,%d0 | ...I 303 lea ATANTBL,%a1 304 addal %d0,%a1 | ...A 305 movel (%a1)+,ATANF(%a6) 306 movel (%a1)+,ATANFHI(%a6) 307 movel (%a1)+,ATANFLO(%a6) 308 movel X(%a6),%d0 309 andil #0x80000000,%d0 | ...S 310 orl %d0,ATANF(%a6) | ...A 311 movel (%a7)+,%d2 | ...R 312 313 |--THAT'S ALL I HAVE TO DO FOR NOW, 314 |--BUT ALAS, THE DIVIDE IS STILL CRANKING! 315 316 |--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN( 317 |--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U 318 |--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEV 319 |--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V 320 |--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = 321 |--THE REASON FOR THIS REARRANGEMENT IS TO MAK 322 |--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD 323 324 325 fmovex %fp0,%fp1 326 fmulx %fp1,%fp1 327 fmoved ATANA3,%fp2 328 faddx %fp1,%fp2 329 fmulx %fp1,%fp2 330 fmulx %fp0,%fp1 331 faddd ATANA2,%fp2 | ...A 332 fmuld ATANA1,%fp1 | ...A 333 fmulx %fp2,%fp1 334 335 faddx %fp1,%fp0 336 fmovel %d1,%FPCR 337 faddx ATANF(%a6),%fp0 | ...A 338 bra t_frcinx 339 340 ATANBORS: 341 |--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED 342 |--FP0 IS X AND |X| <= 1/16 OR |X| >= 16. 343 cmpil #0x3FFF8000,%d0 344 bgt ATANBIG | ...I.E. |X| 345 346 ATANSM: 347 |--|X| <= 1/16 348 |--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHER 349 |--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y* 350 |--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B 351 |--WHERE Y = X*X, AND Z = Y*Y. 352 353 cmpil #0x3FD78000,%d0 354 blt ATANTINY 355 |--COMPUTE POLYNOMIAL 356 fmulx %fp0,%fp0 | ...F 357 358 359 movew #0x0000,XDCARE(%a6) 360 361 fmovex %fp0,%fp1 362 fmulx %fp1,%fp1 363 364 fmoved ATANB6,%fp2 365 fmoved ATANB5,%fp3 366 367 fmulx %fp1,%fp2 368 fmulx %fp1,%fp3 369 370 faddd ATANB4,%fp2 | ...B 371 faddd ATANB3,%fp3 | ...B 372 373 fmulx %fp1,%fp2 374 fmulx %fp3,%fp1 375 376 faddd ATANB2,%fp2 | ...B 377 faddd ATANB1,%fp1 | ...B 378 379 fmulx %fp0,%fp2 380 fmulx X(%a6),%fp0 381 382 faddx %fp2,%fp1 383 384 385 fmulx %fp1,%fp0 | ...X 386 387 fmovel %d1,%FPCR 388 faddx X(%a6),%fp0 389 390 bra t_frcinx 391 392 ATANTINY: 393 |--|X| < 2^(-40), ATAN(X) = X 394 movew #0x0000,XDCARE(%a6) 395 396 fmovel %d1,%FPCR 397 fmovex X(%a6),%fp0 |last 398 399 bra t_frcinx 400 401 ATANBIG: 402 |--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 403 |--RETURN SIGN(X)*PI/2 + ATAN(-1/X). 404 cmpil #0x40638000,%d0 405 bgt ATANHUGE 406 407 |--APPROXIMATE ATAN(-1/X) BY 408 |--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' 409 |--THIS CAN BE RE-WRITTEN AS 410 |--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] 411 412 fmoves #0xBF800000,%fp1 413 fdivx %fp0,%fp1 414 415 416 |--DIVIDE IS STILL CRANKING 417 418 fmovex %fp1,%fp0 419 fmulx %fp0,%fp0 420 fmovex %fp1,X(%a6) 421 422 fmovex %fp0,%fp1 423 fmulx %fp1,%fp1 424 425 fmoved ATANC5,%fp3 426 fmoved ATANC4,%fp2 427 428 fmulx %fp1,%fp3 429 fmulx %fp1,%fp2 430 431 faddd ATANC3,%fp3 | ...C 432 faddd ATANC2,%fp2 | ...C 433 434 fmulx %fp3,%fp1 435 fmulx %fp0,%fp2 436 437 faddd ATANC1,%fp1 | ...C 438 fmulx X(%a6),%fp0 439 440 faddx %fp2,%fp1 441 442 443 fmulx %fp1,%fp0 444 | ... 445 faddx X(%a6),%fp0 446 447 fmovel %d1,%FPCR 448 449 btstb #7,(%a0) 450 beqs pos_big 451 452 neg_big: 453 faddx NPIBY2,%fp0 454 bra t_frcinx 455 456 pos_big: 457 faddx PPIBY2,%fp0 458 bra t_frcinx 459 460 ATANHUGE: 461 |--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIB 462 btstb #7,(%a0) 463 beqs pos_huge 464 465 neg_huge: 466 fmovex NPIBY2,%fp0 467 fmovel %d1,%fpcr 468 fsubx NTINY,%fp0 469 bra t_frcinx 470 471 pos_huge: 472 fmovex PPIBY2,%fp0 473 fmovel %d1,%fpcr 474 fsubx PTINY,%fp0 475 bra t_frcinx 476 477 |end
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.