~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/m68k/fpsp040/stan.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 |
  2 |       stan.sa 3.3 7/29/91
  3 |
  4 |       The entry point stan computes the tangent of
  5 |       an input argument;
  6 |       stand does the same except for denormalized input.
  7 |
  8 |       Input: Double-extended number X in location pointed to
  9 |               by address register a0.
 10 |
 11 |       Output: The value tan(X) returned in floating-point register Fp0.
 12 |
 13 |       Accuracy and Monotonicity: The returned result is within 3 ulp in
 14 |               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
 15 |               result is subsequently rounded to double precision. The
 16 |               result is provably monotonic in double precision.
 17 |
 18 |       Speed: The program sTAN takes approximately 170 cycles for
 19 |               input argument X such that |X| < 15Pi, which is the usual
 20 |               situation.
 21 |
 22 |       Algorithm:
 23 |
 24 |       1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
 25 |
 26 |       2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
 27 |               k = N mod 2, so in particular, k = 0 or 1.
 28 |
 29 |       3. If k is odd, go to 5.
 30 |
 31 |       4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
 32 |               rational function U/V where
 33 |               U = r + r*s*(P1 + s*(P2 + s*P3)), and
 34 |               V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.
 35 |               Exit.
 36 |
 37 |       4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
 38 |               rational function U/V where
 39 |               U = r + r*s*(P1 + s*(P2 + s*P3)), and
 40 |               V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
 41 |               -Cot(r) = -V/U. Exit.
 42 |
 43 |       6. If |X| > 1, go to 8.
 44 |
 45 |       7. (|X|<2**(-40)) Tan(X) = X. Exit.
 46 |
 47 |       8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
 48 |
 49 
 50 |               Copyright (C) Motorola, Inc. 1990
 51 |                       All Rights Reserved
 52 |
 53 |       For details on the license for this file, please see the
 54 |       file, README, in this same directory.
 55 
 56 |STAN   idnt    2,1 | Motorola 040 Floating Point Software Package
 57 
 58         |section        8
 59 
 60 #include "fpsp.h"
 61 
 62 BOUNDS1:        .long 0x3FD78000,0x4004BC7E
 63 TWOBYPI:        .long 0x3FE45F30,0x6DC9C883
 64 
 65 TANQ4:  .long 0x3EA0B759,0xF50F8688
 66 TANP3:  .long 0xBEF2BAA5,0xA8924F04
 67 
 68 TANQ3:  .long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
 69 
 70 TANP2:  .long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
 71 
 72 TANQ2:  .long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
 73 
 74 TANP1:  .long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
 75 
 76 TANQ1:  .long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
 77 
 78 INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
 79 
 80 TWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
 81 TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
 82 
 83 |--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
 84 |--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
 85 |--MOST 69 BITS LONG.
 86         .global PITBL
 87 PITBL:
 88   .long  0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
 89   .long  0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
 90   .long  0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
 91   .long  0xC0040000,0xB6365E22,0xEE46F000,0x21480000
 92   .long  0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
 93   .long  0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
 94   .long  0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
 95   .long  0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
 96   .long  0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
 97   .long  0xC0040000,0x90836524,0x88034B96,0x20B00000
 98   .long  0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
 99   .long  0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
100   .long  0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
101   .long  0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
102   .long  0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
103   .long  0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
104   .long  0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
105   .long  0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
106   .long  0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
107   .long  0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
108   .long  0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
109   .long  0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
110   .long  0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
111   .long  0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
112   .long  0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
113   .long  0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
114   .long  0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
115   .long  0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
116   .long  0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
117   .long  0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
118   .long  0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
119   .long  0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
120   .long  0x00000000,0x00000000,0x00000000,0x00000000
121   .long  0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
122   .long  0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
123   .long  0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
124   .long  0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
125   .long  0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
126   .long  0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
127   .long  0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
128   .long  0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
129   .long  0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
130   .long  0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
131   .long  0x40030000,0x8A3AE64F,0x76F80584,0x21080000
132   .long  0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
133   .long  0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
134   .long  0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
135   .long  0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
136   .long  0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
137   .long  0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
138   .long  0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
139   .long  0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
140   .long  0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
141   .long  0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
142   .long  0x40040000,0x8A3AE64F,0x76F80584,0x21880000
143   .long  0x40040000,0x90836524,0x88034B96,0xA0B00000
144   .long  0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
145   .long  0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
146   .long  0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
147   .long  0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
148   .long  0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
149   .long  0x40040000,0xB6365E22,0xEE46F000,0xA1480000
150   .long  0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
151   .long  0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
152   .long  0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
153 
154         .set    INARG,FP_SCR4
155 
156         .set    TWOTO63,L_SCR1
157         .set    ENDFLAG,L_SCR2
158         .set    N,L_SCR3
159 
160         | xref  t_frcinx
161         |xref   t_extdnrm
162 
163         .global stand
164 stand:
165 |--TAN(X) = X FOR DENORMALIZED X
166 
167         bra             t_extdnrm
168 
169         .global stan
170 stan:
171         fmovex          (%a0),%fp0      | ...LOAD INPUT
172 
173         movel           (%a0),%d0
174         movew           4(%a0),%d0
175         andil           #0x7FFFFFFF,%d0
176 
177         cmpil           #0x3FD78000,%d0         | ...|X| >= 2**(-40)?
178         bges            TANOK1
179         bra             TANSM
180 TANOK1:
181         cmpil           #0x4004BC7E,%d0         | ...|X| < 15 PI?
182         blts            TANMAIN
183         bra             REDUCEX
184 
185 
186 TANMAIN:
187 |--THIS IS THE USUAL CASE, |X| <= 15 PI.
188 |--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
189         fmovex          %fp0,%fp1
190         fmuld           TWOBYPI,%fp1    | ...X*2/PI
191 
192 |--HIDE THE NEXT TWO INSTRUCTIONS
193         leal            PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
194 
195 |--FP1 IS NOW READY
196         fmovel          %fp1,%d0                | ...CONVERT TO INTEGER
197 
198         asll            #4,%d0
199         addal           %d0,%a1         | ...ADDRESS N*PIBY2 IN Y1, Y2
200 
201         fsubx           (%a1)+,%fp0     | ...X-Y1
202 |--HIDE THE NEXT ONE
203 
204         fsubs           (%a1),%fp0      | ...FP0 IS R = (X-Y1)-Y2
205 
206         rorl            #5,%d0
207         andil           #0x80000000,%d0 | ...D0 WAS ODD IFF D0 < 0
208 
209 TANCONT:
210 
211         cmpil           #0,%d0
212         blt             NODD
213 
214         fmovex          %fp0,%fp1
215         fmulx           %fp1,%fp1               | ...S = R*R
216 
217         fmoved          TANQ4,%fp3
218         fmoved          TANP3,%fp2
219 
220         fmulx           %fp1,%fp3               | ...SQ4
221         fmulx           %fp1,%fp2               | ...SP3
222 
223         faddd           TANQ3,%fp3      | ...Q3+SQ4
224         faddx           TANP2,%fp2      | ...P2+SP3
225 
226         fmulx           %fp1,%fp3               | ...S(Q3+SQ4)
227         fmulx           %fp1,%fp2               | ...S(P2+SP3)
228 
229         faddx           TANQ2,%fp3      | ...Q2+S(Q3+SQ4)
230         faddx           TANP1,%fp2      | ...P1+S(P2+SP3)
231 
232         fmulx           %fp1,%fp3               | ...S(Q2+S(Q3+SQ4))
233         fmulx           %fp1,%fp2               | ...S(P1+S(P2+SP3))
234 
235         faddx           TANQ1,%fp3      | ...Q1+S(Q2+S(Q3+SQ4))
236         fmulx           %fp0,%fp2               | ...RS(P1+S(P2+SP3))
237 
238         fmulx           %fp3,%fp1               | ...S(Q1+S(Q2+S(Q3+SQ4)))
239 
240 
241         faddx           %fp2,%fp0               | ...R+RS(P1+S(P2+SP3))
242 
243 
244         fadds           #0x3F800000,%fp1        | ...1+S(Q1+...)
245 
246         fmovel          %d1,%fpcr               |restore users exceptions
247         fdivx           %fp1,%fp0               |last inst - possible exception set
248 
249         bra             t_frcinx
250 
251 NODD:
252         fmovex          %fp0,%fp1
253         fmulx           %fp0,%fp0               | ...S = R*R
254 
255         fmoved          TANQ4,%fp3
256         fmoved          TANP3,%fp2
257 
258         fmulx           %fp0,%fp3               | ...SQ4
259         fmulx           %fp0,%fp2               | ...SP3
260 
261         faddd           TANQ3,%fp3      | ...Q3+SQ4
262         faddx           TANP2,%fp2      | ...P2+SP3
263 
264         fmulx           %fp0,%fp3               | ...S(Q3+SQ4)
265         fmulx           %fp0,%fp2               | ...S(P2+SP3)
266 
267         faddx           TANQ2,%fp3      | ...Q2+S(Q3+SQ4)
268         faddx           TANP1,%fp2      | ...P1+S(P2+SP3)
269 
270         fmulx           %fp0,%fp3               | ...S(Q2+S(Q3+SQ4))
271         fmulx           %fp0,%fp2               | ...S(P1+S(P2+SP3))
272 
273         faddx           TANQ1,%fp3      | ...Q1+S(Q2+S(Q3+SQ4))
274         fmulx           %fp1,%fp2               | ...RS(P1+S(P2+SP3))
275 
276         fmulx           %fp3,%fp0               | ...S(Q1+S(Q2+S(Q3+SQ4)))
277 
278 
279         faddx           %fp2,%fp1               | ...R+RS(P1+S(P2+SP3))
280         fadds           #0x3F800000,%fp0        | ...1+S(Q1+...)
281 
282 
283         fmovex          %fp1,-(%sp)
284         eoril           #0x80000000,(%sp)
285 
286         fmovel          %d1,%fpcr               |restore users exceptions
287         fdivx           (%sp)+,%fp0     |last inst - possible exception set
288 
289         bra             t_frcinx
290 
291 TANBORS:
292 |--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
293 |--IF |X| < 2**(-40), RETURN X OR 1.
294         cmpil           #0x3FFF8000,%d0
295         bgts            REDUCEX
296 
297 TANSM:
298 
299         fmovex          %fp0,-(%sp)
300         fmovel          %d1,%fpcr                |restore users exceptions
301         fmovex          (%sp)+,%fp0     |last inst - possible exception set
302 
303         bra             t_frcinx
304 
305 
306 REDUCEX:
307 |--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
308 |--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
309 |--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
310 
311         fmovemx %fp2-%fp5,-(%a7)        | ...save FP2 through FP5
312         movel           %d2,-(%a7)
313         fmoves         #0x00000000,%fp1
314 
315 |--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
316 |--there is a danger of unwanted overflow in first LOOP iteration.  In this
317 |--case, reduce argument by one remainder step to make subsequent reduction
318 |--safe.
319         cmpil   #0x7ffeffff,%d0         |is argument dangerously large?
320         bnes    LOOP
321         movel   #0x7ffe0000,FP_SCR2(%a6)        |yes
322 |                                       ;create 2**16383*PI/2
323         movel   #0xc90fdaa2,FP_SCR2+4(%a6)
324         clrl    FP_SCR2+8(%a6)
325         ftstx   %fp0                    |test sign of argument
326         movel   #0x7fdc0000,FP_SCR3(%a6)        |create low half of 2**16383*
327 |                                       ;PI/2 at FP_SCR3
328         movel   #0x85a308d3,FP_SCR3+4(%a6)
329         clrl   FP_SCR3+8(%a6)
330         fblt    red_neg
331         orw     #0x8000,FP_SCR2(%a6)    |positive arg
332         orw     #0x8000,FP_SCR3(%a6)
333 red_neg:
334         faddx  FP_SCR2(%a6),%fp0                |high part of reduction is exact
335         fmovex  %fp0,%fp1               |save high result in fp1
336         faddx  FP_SCR3(%a6),%fp0                |low part of reduction
337         fsubx  %fp0,%fp1                        |determine low component of result
338         faddx  FP_SCR3(%a6),%fp1                |fp0/fp1 are reduced argument.
339 
340 |--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
341 |--integer quotient will be stored in N
342 |--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
343 
344 LOOP:
345         fmovex          %fp0,INARG(%a6) | ...+-2**K * F, 1 <= F < 2
346         movew           INARG(%a6),%d0
347         movel          %d0,%a1          | ...save a copy of D0
348         andil           #0x00007FFF,%d0
349         subil           #0x00003FFF,%d0 | ...D0 IS K
350         cmpil           #28,%d0
351         bles            LASTLOOP
352 CONTLOOP:
353         subil           #27,%d0  | ...D0 IS L := K-27
354         movel           #0,ENDFLAG(%a6)
355         bras            WORK
356 LASTLOOP:
357         clrl            %d0             | ...D0 IS L := 0
358         movel           #1,ENDFLAG(%a6)
359 
360 WORK:
361 |--FIND THE REMAINDER OF (R,r) W.R.T.   2**L * (PI/2). L IS SO CHOSEN
362 |--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
363 
364 |--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
365 |--2**L * (PIby2_1), 2**L * (PIby2_2)
366 
367         movel           #0x00003FFE,%d2 | ...BIASED EXPO OF 2/PI
368         subl            %d0,%d2         | ...BIASED EXPO OF 2**(-L)*(2/PI)
369 
370         movel           #0xA2F9836E,FP_SCR1+4(%a6)
371         movel           #0x4E44152A,FP_SCR1+8(%a6)
372         movew           %d2,FP_SCR1(%a6)        | ...FP_SCR1 is 2**(-L)*(2/PI)
373 
374         fmovex          %fp0,%fp2
375         fmulx           FP_SCR1(%a6),%fp2
376 |--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
377 |--FLOATING POINT FORMAT, THE TWO FMOVE'S       FMOVE.L FP <--> N
378 |--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
379 |--(SIGN(INARG)*2**63   +       FP2) - SIGN(INARG)*2**63 WILL GIVE
380 |--US THE DESIRED VALUE IN FLOATING POINT.
381 
382 |--HIDE SIX CYCLES OF INSTRUCTION
383         movel           %a1,%d2
384         swap            %d2
385         andil           #0x80000000,%d2
386         oril            #0x5F000000,%d2 | ...D2 IS SIGN(INARG)*2**63 IN SGL
387         movel           %d2,TWOTO63(%a6)
388 
389         movel           %d0,%d2
390         addil           #0x00003FFF,%d2 | ...BIASED EXPO OF 2**L * (PI/2)
391 
392 |--FP2 IS READY
393         fadds           TWOTO63(%a6),%fp2       | ...THE FRACTIONAL PART OF FP1 IS ROUNDED
394 
395 |--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
396         movew           %d2,FP_SCR2(%a6)
397         clrw           FP_SCR2+2(%a6)
398         movel           #0xC90FDAA2,FP_SCR2+4(%a6)
399         clrl            FP_SCR2+8(%a6)          | ...FP_SCR2 is  2**(L) * Piby2_1
400 
401 |--FP2 IS READY
402         fsubs           TWOTO63(%a6),%fp2               | ...FP2 is N
403 
404         addil           #0x00003FDD,%d0
405         movew           %d0,FP_SCR3(%a6)
406         clrw           FP_SCR3+2(%a6)
407         movel           #0x85A308D3,FP_SCR3+4(%a6)
408         clrl            FP_SCR3+8(%a6)          | ...FP_SCR3 is 2**(L) * Piby2_2
409 
410         movel           ENDFLAG(%a6),%d0
411 
412 |--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
413 |--P2 = 2**(L) * Piby2_2
414         fmovex          %fp2,%fp4
415         fmulx           FP_SCR2(%a6),%fp4               | ...W = N*P1
416         fmovex          %fp2,%fp5
417         fmulx           FP_SCR3(%a6),%fp5               | ...w = N*P2
418         fmovex          %fp4,%fp3
419 |--we want P+p = W+w  but  |p| <= half ulp of P
420 |--Then, we need to compute  A := R-P   and  a := r-p
421         faddx           %fp5,%fp3                       | ...FP3 is P
422         fsubx           %fp3,%fp4                       | ...W-P
423 
424         fsubx           %fp3,%fp0                       | ...FP0 is A := R - P
425         faddx           %fp5,%fp4                       | ...FP4 is p = (W-P)+w
426 
427         fmovex          %fp0,%fp3                       | ...FP3 A
428         fsubx           %fp4,%fp1                       | ...FP1 is a := r - p
429 
430 |--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
431 |--|r| <= half ulp of R.
432         faddx           %fp1,%fp0                       | ...FP0 is R := A+a
433 |--No need to calculate r if this is the last loop
434         cmpil           #0,%d0
435         bgt             RESTORE
436 
437 |--Need to calculate r
438         fsubx           %fp0,%fp3                       | ...A-R
439         faddx           %fp3,%fp1                       | ...FP1 is r := (A-R)+a
440         bra             LOOP
441 
442 RESTORE:
443         fmovel          %fp2,N(%a6)
444         movel           (%a7)+,%d2
445         fmovemx (%a7)+,%fp2-%fp5
446 
447 
448         movel           N(%a6),%d0
449         rorl            #1,%d0
450 
451 
452         bra             TANCONT
453 
454         |end

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php