~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/m68k/fpsp040/slogn.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 |
  2 |       slogn.sa 3.1 12/10/90
  3 |
  4 |       slogn computes the natural logarithm of an
  5 |       input value. slognd does the same except the input value is a
  6 |       denormalized number. slognp1 computes log(1+X), and slognp1d
  7 |       computes log(1+X) for denormalized X.
  8 |
  9 |       Input: Double-extended value in memory location pointed to by address
 10 |               register a0.
 11 |
 12 |       Output: log(X) or log(1+X) returned in floating-point register Fp0.
 13 |
 14 |       Accuracy and Monotonicity: The returned result is within 2 ulps in
 15 |               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
 16 |               result is subsequently rounded to double precision. The
 17 |               result is provably monotonic in double precision.
 18 |
 19 |       Speed: The program slogn takes approximately 190 cycles for input
 20 |               argument X such that |X-1| >= 1/16, which is the usual
 21 |               situation. For those arguments, slognp1 takes approximately
 22 |                210 cycles. For the less common arguments, the program will
 23 |                run no worse than 10% slower.
 24 |
 25 |       Algorithm:
 26 |       LOGN:
 27 |       Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
 28 |               u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
 29 |
 30 |       Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
 31 |               significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
 32 |               2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
 33 |
 34 |       Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
 35 |               log(1+u) = poly.
 36 |
 37 |       Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
 38 |               by k*log(2) + (log(F) + poly). The values of log(F) are calculated
 39 |               beforehand and stored in the program.
 40 |
 41 |       lognp1:
 42 |       Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
 43 |               u where u = 2X/(2+X). Otherwise, move on to Step 2.
 44 |
 45 |       Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
 46 |               of the algorithm for LOGN and compute log(1+X) as
 47 |               k*log(2) + log(F) + poly where poly approximates log(1+u),
 48 |               u = (Y-F)/F.
 49 |
 50 |       Implementation Notes:
 51 |       Note 1. There are 64 different possible values for F, thus 64 log(F)'s
 52 |               need to be tabulated. Moreover, the values of 1/F are also
 53 |               tabulated so that the division in (Y-F)/F can be performed by a
 54 |               multiplication.
 55 |
 56 |       Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
 57 |               Y-F has to be calculated carefully when 1/2 <= X < 3/2.
 58 |
 59 |       Note 3. To fully exploit the pipeline, polynomials are usually separated
 60 |               into two parts evaluated independently before being added up.
 61 |
 62 
 63 |               Copyright (C) Motorola, Inc. 1990
 64 |                       All Rights Reserved
 65 |
 66 |       For details on the license for this file, please see the
 67 |       file, README, in this same directory.
 68 
 69 |slogn  idnt    2,1 | Motorola 040 Floating Point Software Package
 70 
 71         |section        8
 72 
 73 #include "fpsp.h"
 74 
 75 BOUNDS1:  .long 0x3FFEF07D,0x3FFF8841
 76 BOUNDS2:  .long 0x3FFE8000,0x3FFFC000
 77 
 78 LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
 79 
 80 one:    .long 0x3F800000
 81 zero:   .long 0x00000000
 82 infty:  .long 0x7F800000
 83 negone: .long 0xBF800000
 84 
 85 LOGA6:  .long 0x3FC2499A,0xB5E4040B
 86 LOGA5:  .long 0xBFC555B5,0x848CB7DB
 87 
 88 LOGA4:  .long 0x3FC99999,0x987D8730
 89 LOGA3:  .long 0xBFCFFFFF,0xFF6F7E97
 90 
 91 LOGA2:  .long 0x3FD55555,0x555555a4
 92 LOGA1:  .long 0xBFE00000,0x00000008
 93 
 94 LOGB5:  .long 0x3F175496,0xADD7DAD6
 95 LOGB4:  .long 0x3F3C71C2,0xFE80C7E0
 96 
 97 LOGB3:  .long 0x3F624924,0x928BCCFF
 98 LOGB2:  .long 0x3F899999,0x999995EC
 99 
100 LOGB1:  .long 0x3FB55555,0x55555555
101 TWO:    .long 0x40000000,0x00000000
102 
103 LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000
104 
105 LOGTBL:
106         .long  0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
107         .long  0x3FF70000,0xFF015358,0x833C47E2,0x00000000
108         .long  0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
109         .long  0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
110         .long  0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
111         .long  0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
112         .long  0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
113         .long  0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
114         .long  0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
115         .long  0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
116         .long  0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
117         .long  0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
118         .long  0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
119         .long  0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
120         .long  0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
121         .long  0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
122         .long  0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
123         .long  0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
124         .long  0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
125         .long  0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
126         .long  0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
127         .long  0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
128         .long  0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
129         .long  0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
130         .long  0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
131         .long  0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
132         .long  0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
133         .long  0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
134         .long  0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
135         .long  0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
136         .long  0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
137         .long  0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
138         .long  0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
139         .long  0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
140         .long  0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
141         .long  0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
142         .long  0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
143         .long  0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
144         .long  0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
145         .long  0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
146         .long  0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
147         .long  0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
148         .long  0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
149         .long  0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
150         .long  0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
151         .long  0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
152         .long  0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
153         .long  0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
154         .long  0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
155         .long  0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
156         .long  0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
157         .long  0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
158         .long  0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
159         .long  0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
160         .long  0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
161         .long  0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
162         .long  0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
163         .long  0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
164         .long  0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
165         .long  0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
166         .long  0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
167         .long  0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
168         .long  0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
169         .long  0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
170         .long  0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
171         .long  0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
172         .long  0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
173         .long  0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
174         .long  0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
175         .long  0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
176         .long  0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
177         .long  0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
178         .long  0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
179         .long  0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
180         .long  0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
181         .long  0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
182         .long  0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
183         .long  0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
184         .long  0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
185         .long  0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
186         .long  0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
187         .long  0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
188         .long  0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
189         .long  0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
190         .long  0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
191         .long  0x3FFE0000,0x825EFCED,0x49369330,0x00000000
192         .long  0x3FFE0000,0x9868C809,0x868C8098,0x00000000
193         .long  0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
194         .long  0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
195         .long  0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
196         .long  0x3FFE0000,0x95A02568,0x095A0257,0x00000000
197         .long  0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
198         .long  0x3FFE0000,0x94458094,0x45809446,0x00000000
199         .long  0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
200         .long  0x3FFE0000,0x92F11384,0x0497889C,0x00000000
201         .long  0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
202         .long  0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
203         .long  0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
204         .long  0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
205         .long  0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
206         .long  0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
207         .long  0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
208         .long  0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
209         .long  0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
210         .long  0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
211         .long  0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
212         .long  0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
213         .long  0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
214         .long  0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
215         .long  0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
216         .long  0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
217         .long  0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
218         .long  0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
219         .long  0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
220         .long  0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
221         .long  0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
222         .long  0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
223         .long  0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
224         .long  0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
225         .long  0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
226         .long  0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
227         .long  0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
228         .long  0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
229         .long  0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
230         .long  0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
231         .long  0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
232         .long  0x3FFE0000,0x80808080,0x80808081,0x00000000
233         .long  0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
234 
235         .set    ADJK,L_SCR1
236 
237         .set    X,FP_SCR1
238         .set    XDCARE,X+2
239         .set    XFRAC,X+4
240 
241         .set    F,FP_SCR2
242         .set    FFRAC,F+4
243 
244         .set    KLOG2,FP_SCR3
245 
246         .set    SAVEU,FP_SCR4
247 
248         | xref  t_frcinx
249         |xref   t_extdnrm
250         |xref   t_operr
251         |xref   t_dz
252 
253         .global slognd
254 slognd:
255 |--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
256 
257         movel           #-100,ADJK(%a6) | ...INPUT = 2^(ADJK) * FP0
258 
259 |----normalize the input value by left shifting k bits (k to be determined
260 |----below), adjusting exponent and storing -k to  ADJK
261 |----the value TWOTO100 is no longer needed.
262 |----Note that this code assumes the denormalized input is NON-ZERO.
263 
264         moveml  %d2-%d7,-(%a7)          | ...save some registers
265         movel   #0x00000000,%d3         | ...D3 is exponent of smallest norm. #
266         movel   4(%a0),%d4
267         movel   8(%a0),%d5              | ...(D4,D5) is (Hi_X,Lo_X)
268         clrl    %d2                     | ...D2 used for holding K
269 
270         tstl    %d4
271         bnes    HiX_not0
272 
273 HiX_0:
274         movel   %d5,%d4
275         clrl    %d5
276         movel   #32,%d2
277         clrl    %d6
278         bfffo      %d4{#0:#32},%d6
279         lsll      %d6,%d4
280         addl    %d6,%d2                 | ...(D3,D4,D5) is normalized
281 
282         movel   %d3,X(%a6)
283         movel   %d4,XFRAC(%a6)
284         movel   %d5,XFRAC+4(%a6)
285         negl    %d2
286         movel   %d2,ADJK(%a6)
287         fmovex  X(%a6),%fp0
288         moveml  (%a7)+,%d2-%d7          | ...restore registers
289         lea     X(%a6),%a0
290         bras    LOGBGN                  | ...begin regular log(X)
291 
292 
293 HiX_not0:
294         clrl    %d6
295         bfffo   %d4{#0:#32},%d6         | ...find first 1
296         movel   %d6,%d2                 | ...get k
297         lsll    %d6,%d4
298         movel   %d5,%d7                 | ...a copy of D5
299         lsll    %d6,%d5
300         negl    %d6
301         addil   #32,%d6
302         lsrl    %d6,%d7
303         orl     %d7,%d4                 | ...(D3,D4,D5) normalized
304 
305         movel   %d3,X(%a6)
306         movel   %d4,XFRAC(%a6)
307         movel   %d5,XFRAC+4(%a6)
308         negl    %d2
309         movel   %d2,ADJK(%a6)
310         fmovex  X(%a6),%fp0
311         moveml  (%a7)+,%d2-%d7          | ...restore registers
312         lea     X(%a6),%a0
313         bras    LOGBGN                  | ...begin regular log(X)
314 
315 
316         .global slogn
317 slogn:
318 |--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
319 
320         fmovex          (%a0),%fp0      | ...LOAD INPUT
321         movel           #0x00000000,ADJK(%a6)
322 
323 LOGBGN:
324 |--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
325 |--A FINITE, NON-ZERO, NORMALIZED NUMBER.
326 
327         movel   (%a0),%d0
328         movew   4(%a0),%d0
329 
330         movel   (%a0),X(%a6)
331         movel   4(%a0),X+4(%a6)
332         movel   8(%a0),X+8(%a6)
333 
334         cmpil   #0,%d0          | ...CHECK IF X IS NEGATIVE
335         blt     LOGNEG          | ...LOG OF NEGATIVE ARGUMENT IS INVALID
336         cmp2l   BOUNDS1,%d0     | ...X IS POSITIVE, CHECK IF X IS NEAR 1
337         bcc     LOGNEAR1        | ...BOUNDS IS ROUGHLY [15/16, 17/16]
338 
339 LOGMAIN:
340 |--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
341 
342 |--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
343 |--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
344 |--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
345 |--                      = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
346 |--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
347 |--LOG(1+U) CAN BE VERY EFFICIENT.
348 |--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
349 |--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
350 
351 |--GET K, Y, F, AND ADDRESS OF 1/F.
352         asrl    #8,%d0
353         asrl    #8,%d0          | ...SHIFTED 16 BITS, BIASED EXPO. OF X
354         subil   #0x3FFF,%d0     | ...THIS IS K
355         addl    ADJK(%a6),%d0   | ...ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
356         lea     LOGTBL,%a0      | ...BASE ADDRESS OF 1/F AND LOG(F)
357         fmovel  %d0,%fp1                | ...CONVERT K TO FLOATING-POINT FORMAT
358 
359 |--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
360         movel   #0x3FFF0000,X(%a6)      | ...X IS NOW Y, I.E. 2^(-K)*X
361         movel   XFRAC(%a6),FFRAC(%a6)
362         andil   #0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y
363         oril    #0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT
364         movel   FFRAC(%a6),%d0  | ...READY TO GET ADDRESS OF 1/F
365         andil   #0x7E000000,%d0
366         asrl    #8,%d0
367         asrl    #8,%d0
368         asrl    #4,%d0          | ...SHIFTED 20, D0 IS THE DISPLACEMENT
369         addal   %d0,%a0         | ...A0 IS THE ADDRESS FOR 1/F
370 
371         fmovex  X(%a6),%fp0
372         movel   #0x3fff0000,F(%a6)
373         clrl    F+8(%a6)
374         fsubx   F(%a6),%fp0             | ...Y-F
375         fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...SAVE FP2 WHILE FP0 IS NOT READY
376 |--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
377 |--REGISTERS SAVED: FPCR, FP1, FP2
378 
379 LP1CONT1:
380 |--AN RE-ENTRY POINT FOR LOGNP1
381         fmulx   (%a0),%fp0      | ...FP0 IS U = (Y-F)/F
382         fmulx   LOGOF2,%fp1     | ...GET K*LOG2 WHILE FP0 IS NOT READY
383         fmovex  %fp0,%fp2
384         fmulx   %fp2,%fp2               | ...FP2 IS V=U*U
385         fmovex  %fp1,KLOG2(%a6) | ...PUT K*LOG2 IN MEMORY, FREE FP1
386 
387 |--LOG(1+U) IS APPROXIMATED BY
388 |--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
389 |--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
390 
391         fmovex  %fp2,%fp3
392         fmovex  %fp2,%fp1
393 
394         fmuld   LOGA6,%fp1      | ...V*A6
395         fmuld   LOGA5,%fp2      | ...V*A5
396 
397         faddd   LOGA4,%fp1      | ...A4+V*A6
398         faddd   LOGA3,%fp2      | ...A3+V*A5
399 
400         fmulx   %fp3,%fp1               | ...V*(A4+V*A6)
401         fmulx   %fp3,%fp2               | ...V*(A3+V*A5)
402 
403         faddd   LOGA2,%fp1      | ...A2+V*(A4+V*A6)
404         faddd   LOGA1,%fp2      | ...A1+V*(A3+V*A5)
405 
406         fmulx   %fp3,%fp1               | ...V*(A2+V*(A4+V*A6))
407         addal   #16,%a0         | ...ADDRESS OF LOG(F)
408         fmulx   %fp3,%fp2               | ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
409 
410         fmulx   %fp0,%fp1               | ...U*V*(A2+V*(A4+V*A6))
411         faddx   %fp2,%fp0               | ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
412 
413         faddx   (%a0),%fp1      | ...LOG(F)+U*V*(A2+V*(A4+V*A6))
414         fmovemx  (%sp)+,%fp2-%fp2/%fp3  | ...RESTORE FP2
415         faddx   %fp1,%fp0               | ...FP0 IS LOG(F) + LOG(1+U)
416 
417         fmovel  %d1,%fpcr
418         faddx   KLOG2(%a6),%fp0 | ...FINAL ADD
419         bra     t_frcinx
420 
421 
422 LOGNEAR1:
423 |--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
424         fmovex  %fp0,%fp1
425         fsubs   one,%fp1                | ...FP1 IS X-1
426         fadds   one,%fp0                | ...FP0 IS X+1
427         faddx   %fp1,%fp1               | ...FP1 IS 2(X-1)
428 |--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
429 |--IN U, U = 2(X-1)/(X+1) = FP1/FP0
430 
431 LP1CONT2:
432 |--THIS IS AN RE-ENTRY POINT FOR LOGNP1
433         fdivx   %fp0,%fp1               | ...FP1 IS U
434         fmovemx %fp2-%fp2/%fp3,-(%sp)    | ...SAVE FP2
435 |--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
436 |--LET V=U*U, W=V*V, CALCULATE
437 |--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
438 |--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
439         fmovex  %fp1,%fp0
440         fmulx   %fp0,%fp0       | ...FP0 IS V
441         fmovex  %fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1
442         fmovex  %fp0,%fp1
443         fmulx   %fp1,%fp1       | ...FP1 IS W
444 
445         fmoved  LOGB5,%fp3
446         fmoved  LOGB4,%fp2
447 
448         fmulx   %fp1,%fp3       | ...W*B5
449         fmulx   %fp1,%fp2       | ...W*B4
450 
451         faddd   LOGB3,%fp3 | ...B3+W*B5
452         faddd   LOGB2,%fp2 | ...B2+W*B4
453 
454         fmulx   %fp3,%fp1       | ...W*(B3+W*B5), FP3 RELEASED
455 
456         fmulx   %fp0,%fp2       | ...V*(B2+W*B4)
457 
458         faddd   LOGB1,%fp1 | ...B1+W*(B3+W*B5)
459         fmulx   SAVEU(%a6),%fp0 | ...FP0 IS U*V
460 
461         faddx   %fp2,%fp1       | ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
462         fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED
463 
464         fmulx   %fp1,%fp0       | ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
465 
466         fmovel  %d1,%fpcr
467         faddx   SAVEU(%a6),%fp0
468         bra     t_frcinx
469         rts
470 
471 LOGNEG:
472 |--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
473         bra     t_operr
474 
475         .global slognp1d
476 slognp1d:
477 |--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
478 | Simply return the denorm
479 
480         bra     t_extdnrm
481 
482         .global slognp1
483 slognp1:
484 |--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
485 
486         fmovex  (%a0),%fp0      | ...LOAD INPUT
487         fabsx   %fp0            |test magnitude
488         fcmpx   LTHOLD,%fp0     |compare with min threshold
489         fbgt    LP1REAL         |if greater, continue
490         fmovel  #0,%fpsr                |clr N flag from compare
491         fmovel  %d1,%fpcr
492         fmovex  (%a0),%fp0      |return signed argument
493         bra     t_frcinx
494 
495 LP1REAL:
496         fmovex  (%a0),%fp0      | ...LOAD INPUT
497         movel   #0x00000000,ADJK(%a6)
498         fmovex  %fp0,%fp1       | ...FP1 IS INPUT Z
499         fadds   one,%fp0        | ...X := ROUND(1+Z)
500         fmovex  %fp0,X(%a6)
501         movew   XFRAC(%a6),XDCARE(%a6)
502         movel   X(%a6),%d0
503         cmpil   #0,%d0
504         ble     LP1NEG0 | ...LOG OF ZERO OR -VE
505         cmp2l   BOUNDS2,%d0
506         bcs     LOGMAIN | ...BOUNDS2 IS [1/2,3/2]
507 |--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
508 |--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
509 |--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
510 
511 LP1NEAR1:
512 |--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
513         cmp2l   BOUNDS1,%d0
514         bcss    LP1CARE
515 
516 LP1ONE16:
517 |--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
518 |--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
519         faddx   %fp1,%fp1       | ...FP1 IS 2Z
520         fadds   one,%fp0        | ...FP0 IS 1+X
521 |--U = FP1/FP0
522         bra     LP1CONT2
523 
524 LP1CARE:
525 |--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
526 |--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
527 |--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
528 |--THERE ARE ONLY TWO CASES.
529 |--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
530 |--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
531 |--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
532 |--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
533 
534         movel   XFRAC(%a6),FFRAC(%a6)
535         andil   #0xFE000000,FFRAC(%a6)
536         oril    #0x01000000,FFRAC(%a6)  | ...F OBTAINED
537         cmpil   #0x3FFF8000,%d0 | ...SEE IF 1+Z > 1
538         bges    KISZERO
539 
540 KISNEG1:
541         fmoves  TWO,%fp0
542         movel   #0x3fff0000,F(%a6)
543         clrl    F+8(%a6)
544         fsubx   F(%a6),%fp0     | ...2-F
545         movel   FFRAC(%a6),%d0
546         andil   #0x7E000000,%d0
547         asrl    #8,%d0
548         asrl    #8,%d0
549         asrl    #4,%d0          | ...D0 CONTAINS DISPLACEMENT FOR 1/F
550         faddx   %fp1,%fp1               | ...GET 2Z
551         fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...SAVE FP2
552         faddx   %fp1,%fp0               | ...FP0 IS Y-F = (2-F)+2Z
553         lea     LOGTBL,%a0      | ...A0 IS ADDRESS OF 1/F
554         addal   %d0,%a0
555         fmoves  negone,%fp1     | ...FP1 IS K = -1
556         bra     LP1CONT1
557 
558 KISZERO:
559         fmoves  one,%fp0
560         movel   #0x3fff0000,F(%a6)
561         clrl    F+8(%a6)
562         fsubx   F(%a6),%fp0             | ...1-F
563         movel   FFRAC(%a6),%d0
564         andil   #0x7E000000,%d0
565         asrl    #8,%d0
566         asrl    #8,%d0
567         asrl    #4,%d0
568         faddx   %fp1,%fp0               | ...FP0 IS Y-F
569         fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...FP2 SAVED
570         lea     LOGTBL,%a0
571         addal   %d0,%a0         | ...A0 IS ADDRESS OF 1/F
572         fmoves  zero,%fp1       | ...FP1 IS K = 0
573         bra     LP1CONT1
574 
575 LP1NEG0:
576 |--FPCR SAVED. D0 IS X IN COMPACT FORM.
577         cmpil   #0,%d0
578         blts    LP1NEG
579 LP1ZERO:
580         fmoves  negone,%fp0
581 
582         fmovel  %d1,%fpcr
583         bra t_dz
584 
585 LP1NEG:
586         fmoves  zero,%fp0
587 
588         fmovel  %d1,%fpcr
589         bra     t_operr
590 
591         |end

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php