~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/math-emu/wm_sqrt.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0 */
  2         .file   "wm_sqrt.S"
  3 /*---------------------------------------------------------------------------+
  4  |  wm_sqrt.S                                                                |
  5  |                                                                           |
  6  | Fixed point arithmetic square root evaluation.                            |
  7  |                                                                           |
  8  | Copyright (C) 1992,1993,1995,1997                                         |
  9  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
 10  |                       Australia.  E-mail billm@suburbia.net               |
 11  |                                                                           |
 12  | Call from C as:                                                           |
 13  |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
 14  |                                                                           |
 15  +---------------------------------------------------------------------------*/
 16 
 17 /*---------------------------------------------------------------------------+
 18  |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
 19  |    returns the square root of n in n.                                     |
 20  |                                                                           |
 21  |  Use Newton's method to compute the square root of a number, which must   |
 22  |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
 23  |  Does not check the sign or tag of the argument.                          |
 24  |  Sets the exponent, but not the sign or tag of the result.                |
 25  |                                                                           |
 26  |  The guess is kept in %esi:%edi                                           |
 27  +---------------------------------------------------------------------------*/
 28 
 29 #include "exception.h"
 30 #include "fpu_emu.h"
 31 
 32 
 33 #ifndef NON_REENTRANT_FPU
 34 /*      Local storage on the stack: */
 35 #define FPU_accum_3     -4(%ebp)        /* ms word */
 36 #define FPU_accum_2     -8(%ebp)
 37 #define FPU_accum_1     -12(%ebp)
 38 #define FPU_accum_0     -16(%ebp)
 39 
 40 /*
 41  * The de-normalised argument:
 42  *                  sq_2                  sq_1              sq_0
 43  *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
 44  *           ^ binary point here
 45  */
 46 #define FPU_fsqrt_arg_2 -20(%ebp)       /* ms word */
 47 #define FPU_fsqrt_arg_1 -24(%ebp)
 48 #define FPU_fsqrt_arg_0 -28(%ebp)       /* ls word, at most the ms bit is set */
 49 
 50 #else
 51 /*      Local storage in a static area: */
 52 .data
 53         .align 4,0
 54 FPU_accum_3:
 55         .long   0               /* ms word */
 56 FPU_accum_2:
 57         .long   0
 58 FPU_accum_1:
 59         .long   0
 60 FPU_accum_0:
 61         .long   0
 62 
 63 /* The de-normalised argument:
 64                     sq_2                  sq_1              sq_0
 65           b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
 66              ^ binary point here
 67  */
 68 FPU_fsqrt_arg_2:
 69         .long   0               /* ms word */
 70 FPU_fsqrt_arg_1:
 71         .long   0
 72 FPU_fsqrt_arg_0:
 73         .long   0               /* ls word, at most the ms bit is set */
 74 #endif /* NON_REENTRANT_FPU */ 
 75 
 76 
 77 .text
 78 SYM_FUNC_START(wm_sqrt)
 79         pushl   %ebp
 80         movl    %esp,%ebp
 81 #ifndef NON_REENTRANT_FPU
 82         subl    $28,%esp
 83 #endif /* NON_REENTRANT_FPU */
 84         pushl   %esi
 85         pushl   %edi
 86         pushl   %ebx
 87 
 88         movl    PARAM1,%esi
 89 
 90         movl    SIGH(%esi),%eax
 91         movl    SIGL(%esi),%ecx
 92         xorl    %edx,%edx
 93 
 94 /* We use a rough linear estimate for the first guess.. */
 95 
 96         cmpw    EXP_BIAS,EXP(%esi)
 97         jnz     sqrt_arg_ge_2
 98 
 99         shrl    $1,%eax                 /* arg is in the range  [1.0 .. 2.0) */
100         rcrl    $1,%ecx
101         rcrl    $1,%edx
102 
103 sqrt_arg_ge_2:
104 /* From here on, n is never accessed directly again until it is
105    replaced by the answer. */
106 
107         movl    %eax,FPU_fsqrt_arg_2            /* ms word of n */
108         movl    %ecx,FPU_fsqrt_arg_1
109         movl    %edx,FPU_fsqrt_arg_0
110 
111 /* Make a linear first estimate */
112         shrl    $1,%eax
113         addl    $0x40000000,%eax
114         movl    $0xaaaaaaaa,%ecx
115         mull    %ecx
116         shll    %edx                    /* max result was 7fff... */
117         testl   $0x80000000,%edx        /* but min was 3fff... */
118         jnz     sqrt_prelim_no_adjust
119 
120         movl    $0x80000000,%edx        /* round up */
121 
122 sqrt_prelim_no_adjust:
123         movl    %edx,%esi       /* Our first guess */
124 
125 /* We have now computed (approx)   (2 + x) / 3, which forms the basis
126    for a few iterations of Newton's method */
127 
128         movl    FPU_fsqrt_arg_2,%ecx    /* ms word */
129 
130 /*
131  * From our initial estimate, three iterations are enough to get us
132  * to 30 bits or so. This will then allow two iterations at better
133  * precision to complete the process.
134  */
135 
136 /* Compute  (g + n/g)/2  at each iteration (g is the guess). */
137         shrl    %ecx            /* Doing this first will prevent a divide */
138                                 /* overflow later. */
139 
140         movl    %ecx,%edx       /* msw of the arg / 2 */
141         divl    %esi            /* current estimate */
142         shrl    %esi            /* divide by 2 */
143         addl    %eax,%esi       /* the new estimate */
144 
145         movl    %ecx,%edx
146         divl    %esi
147         shrl    %esi
148         addl    %eax,%esi
149 
150         movl    %ecx,%edx
151         divl    %esi
152         shrl    %esi
153         addl    %eax,%esi
154 
155 /*
156  * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
157  * we improve it to 60 bits or so.
158  *
159  * The strategy from now on is to compute new estimates from
160  *      guess := guess + (n - guess^2) / (2 * guess)
161  */
162 
163 /* First, find the square of the guess */
164         movl    %esi,%eax
165         mull    %esi
166 /* guess^2 now in %edx:%eax */
167 
168         movl    FPU_fsqrt_arg_1,%ecx
169         subl    %ecx,%eax
170         movl    FPU_fsqrt_arg_2,%ecx    /* ms word of normalized n */
171         sbbl    %ecx,%edx
172         jnc     sqrt_stage_2_positive
173 
174 /* Subtraction gives a negative result,
175    negate the result before division. */
176         notl    %edx
177         notl    %eax
178         addl    $1,%eax
179         adcl    $0,%edx
180 
181         divl    %esi
182         movl    %eax,%ecx
183 
184         movl    %edx,%eax
185         divl    %esi
186         jmp     sqrt_stage_2_finish
187 
188 sqrt_stage_2_positive:
189         divl    %esi
190         movl    %eax,%ecx
191 
192         movl    %edx,%eax
193         divl    %esi
194 
195         notl    %ecx
196         notl    %eax
197         addl    $1,%eax
198         adcl    $0,%ecx
199 
200 sqrt_stage_2_finish:
201         sarl    $1,%ecx         /* divide by 2 */
202         rcrl    $1,%eax
203 
204         /* Form the new estimate in %esi:%edi */
205         movl    %eax,%edi
206         addl    %ecx,%esi
207 
208         jnz     sqrt_stage_2_done       /* result should be [1..2) */
209 
210 #ifdef PARANOID
211 /* It should be possible to get here only if the arg is ffff....ffff */
212         cmpl    $0xffffffff,FPU_fsqrt_arg_1
213         jnz     sqrt_stage_2_error
214 #endif /* PARANOID */
215 
216 /* The best rounded result. */
217         xorl    %eax,%eax
218         decl    %eax
219         movl    %eax,%edi
220         movl    %eax,%esi
221         movl    $0x7fffffff,%eax
222         jmp     sqrt_round_result
223 
224 #ifdef PARANOID
225 sqrt_stage_2_error:
226         pushl   EX_INTERNAL|0x213
227         call    EXCEPTION
228 #endif /* PARANOID */ 
229 
230 sqrt_stage_2_done:
231 
232 /* Now the square root has been computed to better than 60 bits. */
233 
234 /* Find the square of the guess. */
235         movl    %edi,%eax               /* ls word of guess */
236         mull    %edi
237         movl    %edx,FPU_accum_1
238 
239         movl    %esi,%eax
240         mull    %esi
241         movl    %edx,FPU_accum_3
242         movl    %eax,FPU_accum_2
243 
244         movl    %edi,%eax
245         mull    %esi
246         addl    %eax,FPU_accum_1
247         adcl    %edx,FPU_accum_2
248         adcl    $0,FPU_accum_3
249 
250 /*      movl    %esi,%eax */
251 /*      mull    %edi */
252         addl    %eax,FPU_accum_1
253         adcl    %edx,FPU_accum_2
254         adcl    $0,FPU_accum_3
255 
256 /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
257 
258         movl    FPU_fsqrt_arg_0,%eax            /* get normalized n */
259         subl    %eax,FPU_accum_1
260         movl    FPU_fsqrt_arg_1,%eax
261         sbbl    %eax,FPU_accum_2
262         movl    FPU_fsqrt_arg_2,%eax            /* ms word of normalized n */
263         sbbl    %eax,FPU_accum_3
264         jnc     sqrt_stage_3_positive
265 
266 /* Subtraction gives a negative result,
267    negate the result before division */
268         notl    FPU_accum_1
269         notl    FPU_accum_2
270         notl    FPU_accum_3
271         addl    $1,FPU_accum_1
272         adcl    $0,FPU_accum_2
273 
274 #ifdef PARANOID
275         adcl    $0,FPU_accum_3  /* This must be zero */
276         jz      sqrt_stage_3_no_error
277 
278 sqrt_stage_3_error:
279         pushl   EX_INTERNAL|0x207
280         call    EXCEPTION
281 
282 sqrt_stage_3_no_error:
283 #endif /* PARANOID */
284 
285         movl    FPU_accum_2,%edx
286         movl    FPU_accum_1,%eax
287         divl    %esi
288         movl    %eax,%ecx
289 
290         movl    %edx,%eax
291         divl    %esi
292 
293         sarl    $1,%ecx         /* divide by 2 */
294         rcrl    $1,%eax
295 
296         /* prepare to round the result */
297 
298         addl    %ecx,%edi
299         adcl    $0,%esi
300 
301         jmp     sqrt_stage_3_finished
302 
303 sqrt_stage_3_positive:
304         movl    FPU_accum_2,%edx
305         movl    FPU_accum_1,%eax
306         divl    %esi
307         movl    %eax,%ecx
308 
309         movl    %edx,%eax
310         divl    %esi
311 
312         sarl    $1,%ecx         /* divide by 2 */
313         rcrl    $1,%eax
314 
315         /* prepare to round the result */
316 
317         notl    %eax            /* Negate the correction term */
318         notl    %ecx
319         addl    $1,%eax
320         adcl    $0,%ecx         /* carry here ==> correction == 0 */
321         adcl    $0xffffffff,%esi
322 
323         addl    %ecx,%edi
324         adcl    $0,%esi
325 
326 sqrt_stage_3_finished:
327 
328 /*
329  * The result in %esi:%edi:%esi should be good to about 90 bits here,
330  * and the rounding information here does not have sufficient accuracy
331  * in a few rare cases.
332  */
333         cmpl    $0xffffffe0,%eax
334         ja      sqrt_near_exact_x
335 
336         cmpl    $0x00000020,%eax
337         jb      sqrt_near_exact
338 
339         cmpl    $0x7fffffe0,%eax
340         jb      sqrt_round_result
341 
342         cmpl    $0x80000020,%eax
343         jb      sqrt_get_more_precision
344 
345 sqrt_round_result:
346 /* Set up for rounding operations */
347         movl    %eax,%edx
348         movl    %esi,%eax
349         movl    %edi,%ebx
350         movl    PARAM1,%edi
351         movw    EXP_BIAS,EXP(%edi)      /* Result is in  [1.0 .. 2.0) */
352         jmp     fpu_reg_round
353 
354 
355 sqrt_near_exact_x:
356 /* First, the estimate must be rounded up. */
357         addl    $1,%edi
358         adcl    $0,%esi
359 
360 sqrt_near_exact:
361 /*
362  * This is an easy case because x^1/2 is monotonic.
363  * We need just find the square of our estimate, compare it
364  * with the argument, and deduce whether our estimate is
365  * above, below, or exact. We use the fact that the estimate
366  * is known to be accurate to about 90 bits.
367  */
368         movl    %edi,%eax               /* ls word of guess */
369         mull    %edi
370         movl    %edx,%ebx               /* 2nd ls word of square */
371         movl    %eax,%ecx               /* ls word of square */
372 
373         movl    %edi,%eax
374         mull    %esi
375         addl    %eax,%ebx
376         addl    %eax,%ebx
377 
378 #ifdef PARANOID
379         cmp     $0xffffffb0,%ebx
380         jb      sqrt_near_exact_ok
381 
382         cmp     $0x00000050,%ebx
383         ja      sqrt_near_exact_ok
384 
385         pushl   EX_INTERNAL|0x214
386         call    EXCEPTION
387 
388 sqrt_near_exact_ok:
389 #endif /* PARANOID */ 
390 
391         or      %ebx,%ebx
392         js      sqrt_near_exact_small
393 
394         jnz     sqrt_near_exact_large
395 
396         or      %ebx,%edx
397         jnz     sqrt_near_exact_large
398 
399 /* Our estimate is exactly the right answer */
400         xorl    %eax,%eax
401         jmp     sqrt_round_result
402 
403 sqrt_near_exact_small:
404 /* Our estimate is too small */
405         movl    $0x000000ff,%eax
406         jmp     sqrt_round_result
407         
408 sqrt_near_exact_large:
409 /* Our estimate is too large, we need to decrement it */
410         subl    $1,%edi
411         sbbl    $0,%esi
412         movl    $0xffffff00,%eax
413         jmp     sqrt_round_result
414 
415 
416 sqrt_get_more_precision:
417 /* This case is almost the same as the above, except we start
418    with an extra bit of precision in the estimate. */
419         stc                     /* The extra bit. */
420         rcll    $1,%edi         /* Shift the estimate left one bit */
421         rcll    $1,%esi
422 
423         movl    %edi,%eax               /* ls word of guess */
424         mull    %edi
425         movl    %edx,%ebx               /* 2nd ls word of square */
426         movl    %eax,%ecx               /* ls word of square */
427 
428         movl    %edi,%eax
429         mull    %esi
430         addl    %eax,%ebx
431         addl    %eax,%ebx
432 
433 /* Put our estimate back to its original value */
434         stc                     /* The ms bit. */
435         rcrl    $1,%esi         /* Shift the estimate left one bit */
436         rcrl    $1,%edi
437 
438 #ifdef PARANOID
439         cmp     $0xffffff60,%ebx
440         jb      sqrt_more_prec_ok
441 
442         cmp     $0x000000a0,%ebx
443         ja      sqrt_more_prec_ok
444 
445         pushl   EX_INTERNAL|0x215
446         call    EXCEPTION
447 
448 sqrt_more_prec_ok:
449 #endif /* PARANOID */ 
450 
451         or      %ebx,%ebx
452         js      sqrt_more_prec_small
453 
454         jnz     sqrt_more_prec_large
455 
456         or      %ebx,%ecx
457         jnz     sqrt_more_prec_large
458 
459 /* Our estimate is exactly the right answer */
460         movl    $0x80000000,%eax
461         jmp     sqrt_round_result
462 
463 sqrt_more_prec_small:
464 /* Our estimate is too small */
465         movl    $0x800000ff,%eax
466         jmp     sqrt_round_result
467         
468 sqrt_more_prec_large:
469 /* Our estimate is too large */
470         movl    $0x7fffff00,%eax
471         jmp     sqrt_round_result
472 SYM_FUNC_END(wm_sqrt)

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php