1 | 2 | stan.sa 3.3 7/29/91 3 | 4 | The entry point stan computes the tang 5 | an input argument; 6 | stand does the same except for denorma 7 | 8 | Input: Double-extended number X in loc 9 | by address register a0. 10 | 11 | Output: The value tan(X) returned in f 12 | 13 | Accuracy and Monotonicity: The returne 14 | 64 significant bit, i.e. withi 15 | result is subsequently rounded 16 | result is provably monotonic i 17 | 18 | Speed: The program sTAN takes approxim 19 | input argument X such that |X| 20 | situation. 21 | 22 | Algorithm: 23 | 24 | 1. If |X| >= 15Pi or |X| < 2**(-40), g 25 | 26 | 2. Decompose X as X = N(Pi/2) + r wher 27 | k = N mod 2, so in particular, 28 | 29 | 3. If k is odd, go to 5. 30 | 31 | 4. (k is even) Tan(X) = tan(r) and tan 32 | rational function U/V where 33 | U = r + r*s*(P1 + s*(P2 + s*P3 34 | V = 1 + s*(Q1 + s*(Q2 + s*(Q3 35 | Exit. 36 | 37 | 4. (k is odd) Tan(X) = -cot(r). Since 38 | rational function U/V where 39 | U = r + r*s*(P1 + s*(P2 + s*P3 40 | V = 1 + s*(Q1 + s*(Q2 + s*(Q3 41 | -Cot(r) = -V/U. Exit. 42 | 43 | 6. If |X| > 1, go to 8. 44 | 45 | 7. (|X|<2**(-40)) Tan(X) = X. Exit. 46 | 47 | 8. Overwrite X by X := X rem 2Pi. Now 48 | 49 50 | Copyright (C) Motorola, Inc. 1 51 | All Rights Reserved 52 | 53 | For details on the license for this fi 54 | file, README, in this same directory. 55 56 |STAN idnt 2,1 | Motorola 040 Floating Po 57 58 |section 8 59 60 #include "fpsp.h" 61 62 BOUNDS1: .long 0x3FD78000,0x4004BC7E 63 TWOBYPI: .long 0x3FE45F30,0x6DC9C883 64 65 TANQ4: .long 0x3EA0B759,0xF50F8688 66 TANP3: .long 0xBEF2BAA5,0xA8924F04 67 68 TANQ3: .long 0xBF346F59,0xB39BA65F,0x00000000 69 70 TANP2: .long 0x3FF60000,0xE073D3FC,0x199C4A00 71 72 TANQ2: .long 0x3FF90000,0xD23CD684,0x15D95FA1 73 74 TANP1: .long 0xBFFC0000,0x8895A6C5,0xFB423BCA 75 76 TANQ1: .long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE 77 78 INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E4415 79 80 TWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000 81 TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000 82 83 |--N*PI/2, -32 <= N <= 32, IN A LEADING TERM I 84 |--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, T 85 |--MOST 69 BITS LONG. 86 .global PITBL 87 PITBL: 88 .long 0xC0040000,0xC90FDAA2,0x2168C235,0x21 89 .long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0 90 .long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1 91 .long 0xC0040000,0xB6365E22,0xEE46F000,0x21 92 .long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1 93 .long 0xC0040000,0xA9A56078,0xCC3063DD,0x21 94 .long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21 95 .long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1 96 .long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21 97 .long 0xC0040000,0x90836524,0x88034B96,0x20 98 .long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1 99 .long 0xC0040000,0x83F2677A,0x65ECBF73,0x21 100 .long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20 101 .long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21 102 .long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1 103 .long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9F 104 .long 0xC0030000,0xC90FDAA2,0x2168C235,0x21 105 .long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1 106 .long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0 107 .long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20 108 .long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21 109 .long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1 110 .long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F 111 .long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0 112 .long 0xC0020000,0xC90FDAA2,0x2168C235,0x20 113 .long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0 114 .long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20 115 .long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F 116 .long 0xC0010000,0xC90FDAA2,0x2168C235,0x20 117 .long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20 118 .long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F 119 .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F 120 .long 0x00000000,0x00000000,0x00000000,0x00 121 .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F 122 .long 0x40000000,0xC90FDAA2,0x2168C235,0x9F 123 .long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0 124 .long 0x40010000,0xC90FDAA2,0x2168C235,0xA0 125 .long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F 126 .long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0 127 .long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20 128 .long 0x40020000,0xC90FDAA2,0x2168C235,0xA0 129 .long 0x40020000,0xE231D5F6,0x6595DA7B,0x20 130 .long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F 131 .long 0x40030000,0x8A3AE64F,0x76F80584,0x21 132 .long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1 133 .long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0 134 .long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20 135 .long 0x40030000,0xBC7EDCF7,0xFF523611,0x21 136 .long 0x40030000,0xC90FDAA2,0x2168C235,0xA1 137 .long 0x40030000,0xD5A0D84C,0x437F4E58,0x1F 138 .long 0x40030000,0xE231D5F6,0x6595DA7B,0x21 139 .long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1 140 .long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0 141 .long 0x40040000,0x83F2677A,0x65ECBF73,0xA1 142 .long 0x40040000,0x8A3AE64F,0x76F80584,0x21 143 .long 0x40040000,0x90836524,0x88034B96,0xA0 144 .long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1 145 .long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21 146 .long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1 147 .long 0x40040000,0xA9A56078,0xCC3063DD,0xA1 148 .long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21 149 .long 0x40040000,0xB6365E22,0xEE46F000,0xA1 150 .long 0x40040000,0xBC7EDCF7,0xFF523611,0x21 151 .long 0x40040000,0xC2C75BCD,0x105D7C23,0x20 152 .long 0x40040000,0xC90FDAA2,0x2168C235,0xA1 153 154 .set INARG,FP_SCR4 155 156 .set TWOTO63,L_SCR1 157 .set ENDFLAG,L_SCR2 158 .set N,L_SCR3 159 160 | xref t_frcinx 161 |xref t_extdnrm 162 163 .global stand 164 stand: 165 |--TAN(X) = X FOR DENORMALIZED X 166 167 bra t_extdnrm 168 169 .global stan 170 stan: 171 fmovex (%a0),%fp0 | ...L 172 173 movel (%a0),%d0 174 movew 4(%a0),%d0 175 andil #0x7FFFFFFF,%d0 176 177 cmpil #0x3FD78000,%d0 178 bges TANOK1 179 bra TANSM 180 TANOK1: 181 cmpil #0x4004BC7E,%d0 182 blts TANMAIN 183 bra REDUCEX 184 185 186 TANMAIN: 187 |--THIS IS THE USUAL CASE, |X| <= 15 PI. 188 |--THE ARGUMENT REDUCTION IS DONE BY TABLE LOO 189 fmovex %fp0,%fp1 190 fmuld TWOBYPI,%fp1 | ...X 191 192 |--HIDE THE NEXT TWO INSTRUCTIONS 193 leal PITBL+0x200,%a1 | ...T 194 195 |--FP1 IS NOW READY 196 fmovel %fp1,%d0 197 198 asll #4,%d0 199 addal %d0,%a1 | ...A 200 201 fsubx (%a1)+,%fp0 | ...X 202 |--HIDE THE NEXT ONE 203 204 fsubs (%a1),%fp0 | ...F 205 206 rorl #5,%d0 207 andil #0x80000000,%d0 | ...D 208 209 TANCONT: 210 211 cmpil #0,%d0 212 blt NODD 213 214 fmovex %fp0,%fp1 215 fmulx %fp1,%fp1 216 217 fmoved TANQ4,%fp3 218 fmoved TANP3,%fp2 219 220 fmulx %fp1,%fp3 221 fmulx %fp1,%fp2 222 223 faddd TANQ3,%fp3 | ...Q 224 faddx TANP2,%fp2 | ...P 225 226 fmulx %fp1,%fp3 227 fmulx %fp1,%fp2 228 229 faddx TANQ2,%fp3 | ...Q 230 faddx TANP1,%fp2 | ...P 231 232 fmulx %fp1,%fp3 233 fmulx %fp1,%fp2 234 235 faddx TANQ1,%fp3 | ...Q 236 fmulx %fp0,%fp2 237 238 fmulx %fp3,%fp1 239 240 241 faddx %fp2,%fp0 242 243 244 fadds #0x3F800000,%fp1 245 246 fmovel %d1,%fpcr 247 fdivx %fp1,%fp0 248 249 bra t_frcinx 250 251 NODD: 252 fmovex %fp0,%fp1 253 fmulx %fp0,%fp0 254 255 fmoved TANQ4,%fp3 256 fmoved TANP3,%fp2 257 258 fmulx %fp0,%fp3 259 fmulx %fp0,%fp2 260 261 faddd TANQ3,%fp3 | ...Q 262 faddx TANP2,%fp2 | ...P 263 264 fmulx %fp0,%fp3 265 fmulx %fp0,%fp2 266 267 faddx TANQ2,%fp3 | ...Q 268 faddx TANP1,%fp2 | ...P 269 270 fmulx %fp0,%fp3 271 fmulx %fp0,%fp2 272 273 faddx TANQ1,%fp3 | ...Q 274 fmulx %fp1,%fp2 275 276 fmulx %fp3,%fp0 277 278 279 faddx %fp2,%fp1 280 fadds #0x3F800000,%fp0 281 282 283 fmovex %fp1,-(%sp) 284 eoril #0x80000000,(%sp) 285 286 fmovel %d1,%fpcr 287 fdivx (%sp)+,%fp0 |last 288 289 bra t_frcinx 290 291 TANBORS: 292 |--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT 293 |--IF |X| < 2**(-40), RETURN X OR 1. 294 cmpil #0x3FFF8000,%d0 295 bgts REDUCEX 296 297 TANSM: 298 299 fmovex %fp0,-(%sp) 300 fmovel %d1,%fpcr 301 fmovex (%sp)+,%fp0 |last 302 303 bra t_frcinx 304 305 306 REDUCEX: 307 |--WHEN REDUCEX IS USED, THE CODE WILL INEVITA 308 |--THIS REDUCTION METHOD, HOWEVER, IS MUCH FAS 309 |--THE REMAINDER INSTRUCTION WHICH IS NOW IN S 310 311 fmovemx %fp2-%fp5,-(%a7) | ...s 312 movel %d2,-(%a7) 313 fmoves #0x00000000,%fp1 314 315 |--If compact form of abs(arg) in d0=$7ffeffff 316 |--there is a danger of unwanted overflow in f 317 |--case, reduce argument by one remainder step 318 |--safe. 319 cmpil #0x7ffeffff,%d0 |is ar 320 bnes LOOP 321 movel #0x7ffe0000,FP_SCR2(%a6) 322 | ;creat 323 movel #0xc90fdaa2,FP_SCR2+4(%a6) 324 clrl FP_SCR2+8(%a6) 325 ftstx %fp0 |test 326 movel #0x7fdc0000,FP_SCR3(%a6) 327 | ;PI/2 328 movel #0x85a308d3,FP_SCR3+4(%a6) 329 clrl FP_SCR3+8(%a6) 330 fblt red_neg 331 orw #0x8000,FP_SCR2(%a6) |posit 332 orw #0x8000,FP_SCR3(%a6) 333 red_neg: 334 faddx FP_SCR2(%a6),%fp0 335 fmovex %fp0,%fp1 |save 336 faddx FP_SCR3(%a6),%fp0 337 fsubx %fp0,%fp1 338 faddx FP_SCR3(%a6),%fp1 339 340 |--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM 341 |--integer quotient will be stored in N 342 |--Intermediate remainder is 66-bit long; (R,r 343 344 LOOP: 345 fmovex %fp0,INARG(%a6) | ...+ 346 movew INARG(%a6),%d0 347 movel %d0,%a1 | ...s 348 andil #0x00007FFF,%d0 349 subil #0x00003FFF,%d0 | ...D 350 cmpil #28,%d0 351 bles LASTLOOP 352 CONTLOOP: 353 subil #27,%d0 | ...D0 IS L 354 movel #0,ENDFLAG(%a6) 355 bras WORK 356 LASTLOOP: 357 clrl %d0 | ...D 358 movel #1,ENDFLAG(%a6) 359 360 WORK: 361 |--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * 362 |--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. 363 364 |--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63 365 |--2**L * (PIby2_1), 2**L * (PIby2_2) 366 367 movel #0x00003FFE,%d2 | ...B 368 subl %d0,%d2 | ...B 369 370 movel #0xA2F9836E,FP_SCR1+4( 371 movel #0x4E44152A,FP_SCR1+8( 372 movew %d2,FP_SCR1(%a6) 373 374 fmovex %fp0,%fp2 375 fmulx FP_SCR1(%a6),%fp2 376 |--WE MUST NOW FIND INT(FP2). SINCE WE NEED TH 377 |--FLOATING POINT FORMAT, THE TWO FMOVE'S 378 |--WILL BE TOO INEFFICIENT. THE WAY AROUND IT 379 |--(SIGN(INARG)*2**63 + FP2) - SIGN(IN 380 |--US THE DESIRED VALUE IN FLOATING POINT. 381 382 |--HIDE SIX CYCLES OF INSTRUCTION 383 movel %a1,%d2 384 swap %d2 385 andil #0x80000000,%d2 386 oril #0x5F000000,%d2 | ...D 387 movel %d2,TWOTO63(%a6) 388 389 movel %d0,%d2 390 addil #0x00003FFF,%d2 | ...B 391 392 |--FP2 IS READY 393 fadds TWOTO63(%a6),%fp2 394 395 |--HIDE 4 CYCLES OF INSTRUCTION; creating 2**( 396 movew %d2,FP_SCR2(%a6) 397 clrw FP_SCR2+2(%a6) 398 movel #0xC90FDAA2,FP_SCR2+4( 399 clrl FP_SCR2+8(%a6) 400 401 |--FP2 IS READY 402 fsubs TWOTO63(%a6),%fp2 403 404 addil #0x00003FDD,%d0 405 movew %d0,FP_SCR3(%a6) 406 clrw FP_SCR3+2(%a6) 407 movel #0x85A308D3,FP_SCR3+4( 408 clrl FP_SCR3+8(%a6) 409 410 movel ENDFLAG(%a6),%d0 411 412 |--We are now ready to perform (R+r) - N*P1 - 413 |--P2 = 2**(L) * Piby2_2 414 fmovex %fp2,%fp4 415 fmulx FP_SCR2(%a6),%fp4 416 fmovex %fp2,%fp5 417 fmulx FP_SCR3(%a6),%fp5 418 fmovex %fp4,%fp3 419 |--we want P+p = W+w but |p| <= half ulp of 420 |--Then, we need to compute A := R-P and a 421 faddx %fp5,%fp3 422 fsubx %fp3,%fp4 423 424 fsubx %fp3,%fp0 425 faddx %fp5,%fp4 426 427 fmovex %fp0,%fp3 428 fsubx %fp4,%fp1 429 430 |--Now we need to normalize (A,a) to "new (R, 431 |--|r| <= half ulp of R. 432 faddx %fp1,%fp0 433 |--No need to calculate r if this is the last 434 cmpil #0,%d0 435 bgt RESTORE 436 437 |--Need to calculate r 438 fsubx %fp0,%fp3 439 faddx %fp3,%fp1 440 bra LOOP 441 442 RESTORE: 443 fmovel %fp2,N(%a6) 444 movel (%a7)+,%d2 445 fmovemx (%a7)+,%fp2-%fp5 446 447 448 movel N(%a6),%d0 449 rorl #1,%d0 450 451 452 bra TANCONT 453 454 |end
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.