1 | 2 | slogn.sa 3.1 12/10/90 3 | 4 | slogn computes the natural logarithm of an 5 | input value. slognd does the same except the input value is a 6 | denormalized number. slognp1 computes log(1+X), and slognp1d 7 | computes log(1+X) for denormalized X. 8 | 9 | Input: Double-extended value in memory location pointed to by address 10 | register a0. 11 | 12 | Output: log(X) or log(1+X) returned in floating-point register Fp0. 13 | 14 | Accuracy and Monotonicity: The returned result is within 2 ulps in 15 | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the 16 | result is subsequently rounded to double precision. The 17 | result is provably monotonic in double precision. 18 | 19 | Speed: The program slogn takes approximately 190 cycles for input 20 | argument X such that |X-1| >= 1/16, which is the usual 21 | situation. For those arguments, slognp1 takes approximately 22 | 210 cycles. For the less common arguments, the program will 23 | run no worse than 10% slower. 24 | 25 | Algorithm: 26 | LOGN: 27 | Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in 28 | u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2. 29 | 30 | Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven 31 | significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base 32 | 2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7). 33 | 34 | Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u, 35 | log(1+u) = poly. 36 | 37 | Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) 38 | by k*log(2) + (log(F) + poly). The values of log(F) are calculated 39 | beforehand and stored in the program. 40 | 41 | lognp1: 42 | Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in 43 | u where u = 2X/(2+X). Otherwise, move on to Step 2. 44 | 45 | Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2 46 | of the algorithm for LOGN and compute log(1+X) as 47 | k*log(2) + log(F) + poly where poly approximates log(1+u), 48 | u = (Y-F)/F. 49 | 50 | Implementation Notes: 51 | Note 1. There are 64 different possible values for F, thus 64 log(F)'s 52 | need to be tabulated. Moreover, the values of 1/F are also 53 | tabulated so that the division in (Y-F)/F can be performed by a 54 | multiplication. 55 | 56 | Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value 57 | Y-F has to be calculated carefully when 1/2 <= X < 3/2. 58 | 59 | Note 3. To fully exploit the pipeline, polynomials are usually separated 60 | into two parts evaluated independently before being added up. 61 | 62 63 | Copyright (C) Motorola, Inc. 1990 64 | All Rights Reserved 65 | 66 | For details on the license for this file, please see the 67 | file, README, in this same directory. 68 69 |slogn idnt 2,1 | Motorola 040 Floating Point Software Package 70 71 |section 8 72 73 #include "fpsp.h" 74 75 BOUNDS1: .long 0x3FFEF07D,0x3FFF8841 76 BOUNDS2: .long 0x3FFE8000,0x3FFFC000 77 78 LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 79 80 one: .long 0x3F800000 81 zero: .long 0x00000000 82 infty: .long 0x7F800000 83 negone: .long 0xBF800000 84 85 LOGA6: .long 0x3FC2499A,0xB5E4040B 86 LOGA5: .long 0xBFC555B5,0x848CB7DB 87 88 LOGA4: .long 0x3FC99999,0x987D8730 89 LOGA3: .long 0xBFCFFFFF,0xFF6F7E97 90 91 LOGA2: .long 0x3FD55555,0x555555a4 92 LOGA1: .long 0xBFE00000,0x00000008 93 94 LOGB5: .long 0x3F175496,0xADD7DAD6 95 LOGB4: .long 0x3F3C71C2,0xFE80C7E0 96 97 LOGB3: .long 0x3F624924,0x928BCCFF 98 LOGB2: .long 0x3F899999,0x999995EC 99 100 LOGB1: .long 0x3FB55555,0x55555555 101 TWO: .long 0x40000000,0x00000000 102 103 LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000 104 105 LOGTBL: 106 .long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000 107 .long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000 108 .long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000 109 .long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000 110 .long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000 111 .long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000 112 .long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000 113 .long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000 114 .long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000 115 .long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000 116 .long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000 117 .long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000 118 .long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000 119 .long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000 120 .long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000 121 .long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000 122 .long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000 123 .long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000 124 .long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000 125 .long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000 126 .long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000 127 .long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000 128 .long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000 129 .long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000 130 .long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000 131 .long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000 132 .long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000 133 .long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000 134 .long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000 135 .long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000 136 .long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000 137 .long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000 138 .long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000 139 .long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000 140 .long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000 141 .long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000 142 .long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000 143 .long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000 144 .long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000 145 .long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000 146 .long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000 147 .long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000 148 .long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000 149 .long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000 150 .long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000 151 .long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000 152 .long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000 153 .long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000 154 .long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000 155 .long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000 156 .long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000 157 .long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000 158 .long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000 159 .long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000 160 .long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000 161 .long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000 162 .long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000 163 .long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000 164 .long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000 165 .long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000 166 .long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000 167 .long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000 168 .long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000 169 .long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000 170 .long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000 171 .long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000 172 .long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000 173 .long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000 174 .long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000 175 .long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000 176 .long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000 177 .long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000 178 .long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000 179 .long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000 180 .long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000 181 .long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000 182 .long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000 183 .long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000 184 .long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000 185 .long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000 186 .long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000 187 .long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000 188 .long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000 189 .long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000 190 .long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000 191 .long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000 192 .long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000 193 .long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000 194 .long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000 195 .long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000 196 .long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000 197 .long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000 198 .long 0x3FFE0000,0x94458094,0x45809446,0x00000000 199 .long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000 200 .long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000 201 .long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000 202 .long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000 203 .long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000 204 .long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000 205 .long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000 206 .long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000 207 .long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000 208 .long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000 209 .long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000 210 .long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000 211 .long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000 212 .long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000 213 .long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000 214 .long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000 215 .long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000 216 .long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000 217 .long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000 218 .long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000 219 .long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000 220 .long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000 221 .long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000 222 .long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000 223 .long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000 224 .long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000 225 .long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000 226 .long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000 227 .long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000 228 .long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000 229 .long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000 230 .long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000 231 .long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000 232 .long 0x3FFE0000,0x80808080,0x80808081,0x00000000 233 .long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000 234 235 .set ADJK,L_SCR1 236 237 .set X,FP_SCR1 238 .set XDCARE,X+2 239 .set XFRAC,X+4 240 241 .set F,FP_SCR2 242 .set FFRAC,F+4 243 244 .set KLOG2,FP_SCR3 245 246 .set SAVEU,FP_SCR4 247 248 | xref t_frcinx 249 |xref t_extdnrm 250 |xref t_operr 251 |xref t_dz 252 253 .global slognd 254 slognd: 255 |--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT 256 257 movel #-100,ADJK(%a6) | ...INPUT = 2^(ADJK) * FP0 258 259 |----normalize the input value by left shifting k bits (k to be determined 260 |----below), adjusting exponent and storing -k to ADJK 261 |----the value TWOTO100 is no longer needed. 262 |----Note that this code assumes the denormalized input is NON-ZERO. 263 264 moveml %d2-%d7,-(%a7) | ...save some registers 265 movel #0x00000000,%d3 | ...D3 is exponent of smallest norm. # 266 movel 4(%a0),%d4 267 movel 8(%a0),%d5 | ...(D4,D5) is (Hi_X,Lo_X) 268 clrl %d2 | ...D2 used for holding K 269 270 tstl %d4 271 bnes HiX_not0 272 273 HiX_0: 274 movel %d5,%d4 275 clrl %d5 276 movel #32,%d2 277 clrl %d6 278 bfffo %d4{#0:#32},%d6 279 lsll %d6,%d4 280 addl %d6,%d2 | ...(D3,D4,D5) is normalized 281 282 movel %d3,X(%a6) 283 movel %d4,XFRAC(%a6) 284 movel %d5,XFRAC+4(%a6) 285 negl %d2 286 movel %d2,ADJK(%a6) 287 fmovex X(%a6),%fp0 288 moveml (%a7)+,%d2-%d7 | ...restore registers 289 lea X(%a6),%a0 290 bras LOGBGN | ...begin regular log(X) 291 292 293 HiX_not0: 294 clrl %d6 295 bfffo %d4{#0:#32},%d6 | ...find first 1 296 movel %d6,%d2 | ...get k 297 lsll %d6,%d4 298 movel %d5,%d7 | ...a copy of D5 299 lsll %d6,%d5 300 negl %d6 301 addil #32,%d6 302 lsrl %d6,%d7 303 orl %d7,%d4 | ...(D3,D4,D5) normalized 304 305 movel %d3,X(%a6) 306 movel %d4,XFRAC(%a6) 307 movel %d5,XFRAC+4(%a6) 308 negl %d2 309 movel %d2,ADJK(%a6) 310 fmovex X(%a6),%fp0 311 moveml (%a7)+,%d2-%d7 | ...restore registers 312 lea X(%a6),%a0 313 bras LOGBGN | ...begin regular log(X) 314 315 316 .global slogn 317 slogn: 318 |--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S 319 320 fmovex (%a0),%fp0 | ...LOAD INPUT 321 movel #0x00000000,ADJK(%a6) 322 323 LOGBGN: 324 |--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS 325 |--A FINITE, NON-ZERO, NORMALIZED NUMBER. 326 327 movel (%a0),%d0 328 movew 4(%a0),%d0 329 330 movel (%a0),X(%a6) 331 movel 4(%a0),X+4(%a6) 332 movel 8(%a0),X+8(%a6) 333 334 cmpil #0,%d0 | ...CHECK IF X IS NEGATIVE 335 blt LOGNEG | ...LOG OF NEGATIVE ARGUMENT IS INVALID 336 cmp2l BOUNDS1,%d0 | ...X IS POSITIVE, CHECK IF X IS NEAR 1 337 bcc LOGNEAR1 | ...BOUNDS IS ROUGHLY [15/16, 17/16] 338 339 LOGMAIN: 340 |--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1 341 342 |--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY. 343 |--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1. 344 |--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y) 345 |-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F). 346 |--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING 347 |--LOG(1+U) CAN BE VERY EFFICIENT. 348 |--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO 349 |--DIVISION IS NEEDED TO CALCULATE (Y-F)/F. 350 351 |--GET K, Y, F, AND ADDRESS OF 1/F. 352 asrl #8,%d0 353 asrl #8,%d0 | ...SHIFTED 16 BITS, BIASED EXPO. OF X 354 subil #0x3FFF,%d0 | ...THIS IS K 355 addl ADJK(%a6),%d0 | ...ADJUST K, ORIGINAL INPUT MAY BE DENORM. 356 lea LOGTBL,%a0 | ...BASE ADDRESS OF 1/F AND LOG(F) 357 fmovel %d0,%fp1 | ...CONVERT K TO FLOATING-POINT FORMAT 358 359 |--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F 360 movel #0x3FFF0000,X(%a6) | ...X IS NOW Y, I.E. 2^(-K)*X 361 movel XFRAC(%a6),FFRAC(%a6) 362 andil #0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y 363 oril #0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT 364 movel FFRAC(%a6),%d0 | ...READY TO GET ADDRESS OF 1/F 365 andil #0x7E000000,%d0 366 asrl #8,%d0 367 asrl #8,%d0 368 asrl #4,%d0 | ...SHIFTED 20, D0 IS THE DISPLACEMENT 369 addal %d0,%a0 | ...A0 IS THE ADDRESS FOR 1/F 370 371 fmovex X(%a6),%fp0 372 movel #0x3fff0000,F(%a6) 373 clrl F+8(%a6) 374 fsubx F(%a6),%fp0 | ...Y-F 375 fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 WHILE FP0 IS NOT READY 376 |--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K 377 |--REGISTERS SAVED: FPCR, FP1, FP2 378 379 LP1CONT1: 380 |--AN RE-ENTRY POINT FOR LOGNP1 381 fmulx (%a0),%fp0 | ...FP0 IS U = (Y-F)/F 382 fmulx LOGOF2,%fp1 | ...GET K*LOG2 WHILE FP0 IS NOT READY 383 fmovex %fp0,%fp2 384 fmulx %fp2,%fp2 | ...FP2 IS V=U*U 385 fmovex %fp1,KLOG2(%a6) | ...PUT K*LOG2 IN MEMORY, FREE FP1 386 387 |--LOG(1+U) IS APPROXIMATED BY 388 |--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS 389 |--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))] 390 391 fmovex %fp2,%fp3 392 fmovex %fp2,%fp1 393 394 fmuld LOGA6,%fp1 | ...V*A6 395 fmuld LOGA5,%fp2 | ...V*A5 396 397 faddd LOGA4,%fp1 | ...A4+V*A6 398 faddd LOGA3,%fp2 | ...A3+V*A5 399 400 fmulx %fp3,%fp1 | ...V*(A4+V*A6) 401 fmulx %fp3,%fp2 | ...V*(A3+V*A5) 402 403 faddd LOGA2,%fp1 | ...A2+V*(A4+V*A6) 404 faddd LOGA1,%fp2 | ...A1+V*(A3+V*A5) 405 406 fmulx %fp3,%fp1 | ...V*(A2+V*(A4+V*A6)) 407 addal #16,%a0 | ...ADDRESS OF LOG(F) 408 fmulx %fp3,%fp2 | ...V*(A1+V*(A3+V*A5)), FP3 RELEASED 409 410 fmulx %fp0,%fp1 | ...U*V*(A2+V*(A4+V*A6)) 411 faddx %fp2,%fp0 | ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED 412 413 faddx (%a0),%fp1 | ...LOG(F)+U*V*(A2+V*(A4+V*A6)) 414 fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...RESTORE FP2 415 faddx %fp1,%fp0 | ...FP0 IS LOG(F) + LOG(1+U) 416 417 fmovel %d1,%fpcr 418 faddx KLOG2(%a6),%fp0 | ...FINAL ADD 419 bra t_frcinx 420 421 422 LOGNEAR1: 423 |--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT. 424 fmovex %fp0,%fp1 425 fsubs one,%fp1 | ...FP1 IS X-1 426 fadds one,%fp0 | ...FP0 IS X+1 427 faddx %fp1,%fp1 | ...FP1 IS 2(X-1) 428 |--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL 429 |--IN U, U = 2(X-1)/(X+1) = FP1/FP0 430 431 LP1CONT2: 432 |--THIS IS AN RE-ENTRY POINT FOR LOGNP1 433 fdivx %fp0,%fp1 | ...FP1 IS U 434 fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 435 |--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3 436 |--LET V=U*U, W=V*V, CALCULATE 437 |--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY 438 |--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] ) 439 fmovex %fp1,%fp0 440 fmulx %fp0,%fp0 | ...FP0 IS V 441 fmovex %fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1 442 fmovex %fp0,%fp1 443 fmulx %fp1,%fp1 | ...FP1 IS W 444 445 fmoved LOGB5,%fp3 446 fmoved LOGB4,%fp2 447 448 fmulx %fp1,%fp3 | ...W*B5 449 fmulx %fp1,%fp2 | ...W*B4 450 451 faddd LOGB3,%fp3 | ...B3+W*B5 452 faddd LOGB2,%fp2 | ...B2+W*B4 453 454 fmulx %fp3,%fp1 | ...W*(B3+W*B5), FP3 RELEASED 455 456 fmulx %fp0,%fp2 | ...V*(B2+W*B4) 457 458 faddd LOGB1,%fp1 | ...B1+W*(B3+W*B5) 459 fmulx SAVEU(%a6),%fp0 | ...FP0 IS U*V 460 461 faddx %fp2,%fp1 | ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED 462 fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED 463 464 fmulx %fp1,%fp0 | ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) 465 466 fmovel %d1,%fpcr 467 faddx SAVEU(%a6),%fp0 468 bra t_frcinx 469 rts 470 471 LOGNEG: 472 |--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID 473 bra t_operr 474 475 .global slognp1d 476 slognp1d: 477 |--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT 478 | Simply return the denorm 479 480 bra t_extdnrm 481 482 .global slognp1 483 slognp1: 484 |--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S 485 486 fmovex (%a0),%fp0 | ...LOAD INPUT 487 fabsx %fp0 |test magnitude 488 fcmpx LTHOLD,%fp0 |compare with min threshold 489 fbgt LP1REAL |if greater, continue 490 fmovel #0,%fpsr |clr N flag from compare 491 fmovel %d1,%fpcr 492 fmovex (%a0),%fp0 |return signed argument 493 bra t_frcinx 494 495 LP1REAL: 496 fmovex (%a0),%fp0 | ...LOAD INPUT 497 movel #0x00000000,ADJK(%a6) 498 fmovex %fp0,%fp1 | ...FP1 IS INPUT Z 499 fadds one,%fp0 | ...X := ROUND(1+Z) 500 fmovex %fp0,X(%a6) 501 movew XFRAC(%a6),XDCARE(%a6) 502 movel X(%a6),%d0 503 cmpil #0,%d0 504 ble LP1NEG0 | ...LOG OF ZERO OR -VE 505 cmp2l BOUNDS2,%d0 506 bcs LOGMAIN | ...BOUNDS2 IS [1/2,3/2] 507 |--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z, 508 |--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE, 509 |--SIMPLY INVOKE LOG(X) FOR LOG(1+Z). 510 511 LP1NEAR1: 512 |--NEXT SEE IF EXP(-1/16) < X < EXP(1/16) 513 cmp2l BOUNDS1,%d0 514 bcss LP1CARE 515 516 LP1ONE16: 517 |--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2) 518 |--WHERE U = 2Z/(2+Z) = 2Z/(1+X). 519 faddx %fp1,%fp1 | ...FP1 IS 2Z 520 fadds one,%fp0 | ...FP0 IS 1+X 521 |--U = FP1/FP0 522 bra LP1CONT2 523 524 LP1CARE: 525 |--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE 526 |--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST 527 |--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2], 528 |--THERE ARE ONLY TWO CASES. 529 |--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z 530 |--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z 531 |--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF 532 |--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED. 533 534 movel XFRAC(%a6),FFRAC(%a6) 535 andil #0xFE000000,FFRAC(%a6) 536 oril #0x01000000,FFRAC(%a6) | ...F OBTAINED 537 cmpil #0x3FFF8000,%d0 | ...SEE IF 1+Z > 1 538 bges KISZERO 539 540 KISNEG1: 541 fmoves TWO,%fp0 542 movel #0x3fff0000,F(%a6) 543 clrl F+8(%a6) 544 fsubx F(%a6),%fp0 | ...2-F 545 movel FFRAC(%a6),%d0 546 andil #0x7E000000,%d0 547 asrl #8,%d0 548 asrl #8,%d0 549 asrl #4,%d0 | ...D0 CONTAINS DISPLACEMENT FOR 1/F 550 faddx %fp1,%fp1 | ...GET 2Z 551 fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 552 faddx %fp1,%fp0 | ...FP0 IS Y-F = (2-F)+2Z 553 lea LOGTBL,%a0 | ...A0 IS ADDRESS OF 1/F 554 addal %d0,%a0 555 fmoves negone,%fp1 | ...FP1 IS K = -1 556 bra LP1CONT1 557 558 KISZERO: 559 fmoves one,%fp0 560 movel #0x3fff0000,F(%a6) 561 clrl F+8(%a6) 562 fsubx F(%a6),%fp0 | ...1-F 563 movel FFRAC(%a6),%d0 564 andil #0x7E000000,%d0 565 asrl #8,%d0 566 asrl #8,%d0 567 asrl #4,%d0 568 faddx %fp1,%fp0 | ...FP0 IS Y-F 569 fmovemx %fp2-%fp2/%fp3,-(%sp) | ...FP2 SAVED 570 lea LOGTBL,%a0 571 addal %d0,%a0 | ...A0 IS ADDRESS OF 1/F 572 fmoves zero,%fp1 | ...FP1 IS K = 0 573 bra LP1CONT1 574 575 LP1NEG0: 576 |--FPCR SAVED. D0 IS X IN COMPACT FORM. 577 cmpil #0,%d0 578 blts LP1NEG 579 LP1ZERO: 580 fmoves negone,%fp0 581 582 fmovel %d1,%fpcr 583 bra t_dz 584 585 LP1NEG: 586 fmoves zero,%fp0 587 588 fmovel %d1,%fpcr 589 bra t_operr 590 591 |end
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.