~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/mips/math-emu/dp_maddf.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * IEEE754 floating point arithmetic
  4  * double precision: MADDF.f (Fused Multiply Add)
  5  * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
  6  *
  7  * MIPS floating point support
  8  * Copyright (C) 2015 Imagination Technologies, Ltd.
  9  * Author: Markos Chandras <markos.chandras@imgtec.com>
 10  */
 11 
 12 #include "ieee754dp.h"
 13 
 14 
 15 /* 128 bits shift right logical with rounding. */
 16 static void srl128(u64 *hptr, u64 *lptr, int count)
 17 {
 18         u64 low;
 19 
 20         if (count >= 128) {
 21                 *lptr = *hptr != 0 || *lptr != 0;
 22                 *hptr = 0;
 23         } else if (count >= 64) {
 24                 if (count == 64) {
 25                         *lptr = *hptr | (*lptr != 0);
 26                 } else {
 27                         low = *lptr;
 28                         *lptr = *hptr >> (count - 64);
 29                         *lptr |= (*hptr << (128 - count)) != 0 || low != 0;
 30                 }
 31                 *hptr = 0;
 32         } else {
 33                 low = *lptr;
 34                 *lptr = low >> count | *hptr << (64 - count);
 35                 *lptr |= (low << (64 - count)) != 0;
 36                 *hptr = *hptr >> count;
 37         }
 38 }
 39 
 40 static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
 41                                  union ieee754dp y, enum maddf_flags flags)
 42 {
 43         int re;
 44         int rs;
 45         unsigned int lxm;
 46         unsigned int hxm;
 47         unsigned int lym;
 48         unsigned int hym;
 49         u64 lrm;
 50         u64 hrm;
 51         u64 lzm;
 52         u64 hzm;
 53         u64 t;
 54         u64 at;
 55         int s;
 56 
 57         COMPXDP;
 58         COMPYDP;
 59         COMPZDP;
 60 
 61         EXPLODEXDP;
 62         EXPLODEYDP;
 63         EXPLODEZDP;
 64 
 65         FLUSHXDP;
 66         FLUSHYDP;
 67         FLUSHZDP;
 68 
 69         ieee754_clearcx();
 70 
 71         rs = xs ^ ys;
 72         if (flags & MADDF_NEGATE_PRODUCT)
 73                 rs ^= 1;
 74         if (flags & MADDF_NEGATE_ADDITION)
 75                 zs ^= 1;
 76 
 77         /*
 78          * Handle the cases when at least one of x, y or z is a NaN.
 79          * Order of precedence is sNaN, qNaN and z, x, y.
 80          */
 81         if (zc == IEEE754_CLASS_SNAN)
 82                 return ieee754dp_nanxcpt(z);
 83         if (xc == IEEE754_CLASS_SNAN)
 84                 return ieee754dp_nanxcpt(x);
 85         if (yc == IEEE754_CLASS_SNAN)
 86                 return ieee754dp_nanxcpt(y);
 87         if (zc == IEEE754_CLASS_QNAN)
 88                 return z;
 89         if (xc == IEEE754_CLASS_QNAN)
 90                 return x;
 91         if (yc == IEEE754_CLASS_QNAN)
 92                 return y;
 93 
 94         if (zc == IEEE754_CLASS_DNORM)
 95                 DPDNORMZ;
 96         /* ZERO z cases are handled separately below */
 97 
 98         switch (CLPAIR(xc, yc)) {
 99 
100         /*
101          * Infinity handling
102          */
103         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
104         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
105                 ieee754_setcx(IEEE754_INVALID_OPERATION);
106                 return ieee754dp_indef();
107 
108         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
109         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
110         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
111         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
112         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
113                 if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
114                         /*
115                          * Cases of addition of infinities with opposite signs
116                          * or subtraction of infinities with same signs.
117                          */
118                         ieee754_setcx(IEEE754_INVALID_OPERATION);
119                         return ieee754dp_indef();
120                 }
121                 /*
122                  * z is here either not an infinity, or an infinity having the
123                  * same sign as product (x*y). The result must be an infinity,
124                  * and its sign is determined only by the sign of product (x*y).
125                  */
126                 return ieee754dp_inf(rs);
127 
128         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
129         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
130         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
131         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
132         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
133                 if (zc == IEEE754_CLASS_INF)
134                         return ieee754dp_inf(zs);
135                 if (zc == IEEE754_CLASS_ZERO) {
136                         /* Handle cases +0 + (-0) and similar ones. */
137                         if (zs == rs)
138                                 /*
139                                  * Cases of addition of zeros of equal signs
140                                  * or subtraction of zeroes of opposite signs.
141                                  * The sign of the resulting zero is in any
142                                  * such case determined only by the sign of z.
143                                  */
144                                 return z;
145 
146                         return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
147                 }
148                 /* x*y is here 0, and z is not 0, so just return z */
149                 return z;
150 
151         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
152                 DPDNORMX;
153                 fallthrough;
154         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
155                 if (zc == IEEE754_CLASS_INF)
156                         return ieee754dp_inf(zs);
157                 DPDNORMY;
158                 break;
159 
160         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
161                 if (zc == IEEE754_CLASS_INF)
162                         return ieee754dp_inf(zs);
163                 DPDNORMX;
164                 break;
165 
166         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
167                 if (zc == IEEE754_CLASS_INF)
168                         return ieee754dp_inf(zs);
169                 /* continue to real computations */
170         }
171 
172         /* Finally get to do some computation */
173 
174         /*
175          * Do the multiplication bit first
176          *
177          * rm = xm * ym, re = xe + ye basically
178          *
179          * At this point xm and ym should have been normalized.
180          */
181         assert(xm & DP_HIDDEN_BIT);
182         assert(ym & DP_HIDDEN_BIT);
183 
184         re = xe + ye;
185 
186         /* shunt to top of word */
187         xm <<= 64 - (DP_FBITS + 1);
188         ym <<= 64 - (DP_FBITS + 1);
189 
190         /*
191          * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
192          */
193 
194         lxm = xm;
195         hxm = xm >> 32;
196         lym = ym;
197         hym = ym >> 32;
198 
199         lrm = DPXMULT(lxm, lym);
200         hrm = DPXMULT(hxm, hym);
201 
202         t = DPXMULT(lxm, hym);
203 
204         at = lrm + (t << 32);
205         hrm += at < lrm;
206         lrm = at;
207 
208         hrm = hrm + (t >> 32);
209 
210         t = DPXMULT(hxm, lym);
211 
212         at = lrm + (t << 32);
213         hrm += at < lrm;
214         lrm = at;
215 
216         hrm = hrm + (t >> 32);
217 
218         /* Put explicit bit at bit 126 if necessary */
219         if ((int64_t)hrm < 0) {
220                 lrm = (hrm << 63) | (lrm >> 1);
221                 hrm = hrm >> 1;
222                 re++;
223         }
224 
225         assert(hrm & (1 << 62));
226 
227         if (zc == IEEE754_CLASS_ZERO) {
228                 /*
229                  * Move explicit bit from bit 126 to bit 55 since the
230                  * ieee754dp_format code expects the mantissa to be
231                  * 56 bits wide (53 + 3 rounding bits).
232                  */
233                 srl128(&hrm, &lrm, (126 - 55));
234                 return ieee754dp_format(rs, re, lrm);
235         }
236 
237         /* Move explicit bit from bit 52 to bit 126 */
238         lzm = 0;
239         hzm = zm << 10;
240         assert(hzm & (1 << 62));
241 
242         /* Make the exponents the same */
243         if (ze > re) {
244                 /*
245                  * Have to shift y fraction right to align.
246                  */
247                 s = ze - re;
248                 srl128(&hrm, &lrm, s);
249                 re += s;
250         } else if (re > ze) {
251                 /*
252                  * Have to shift x fraction right to align.
253                  */
254                 s = re - ze;
255                 srl128(&hzm, &lzm, s);
256                 ze += s;
257         }
258         assert(ze == re);
259         assert(ze <= DP_EMAX);
260 
261         /* Do the addition */
262         if (zs == rs) {
263                 /*
264                  * Generate 128 bit result by adding two 127 bit numbers
265                  * leaving result in hzm:lzm, zs and ze.
266                  */
267                 hzm = hzm + hrm + (lzm > (lzm + lrm));
268                 lzm = lzm + lrm;
269                 if ((int64_t)hzm < 0) {        /* carry out */
270                         srl128(&hzm, &lzm, 1);
271                         ze++;
272                 }
273         } else {
274                 if (hzm > hrm || (hzm == hrm && lzm >= lrm)) {
275                         hzm = hzm - hrm - (lzm < lrm);
276                         lzm = lzm - lrm;
277                 } else {
278                         hzm = hrm - hzm - (lrm < lzm);
279                         lzm = lrm - lzm;
280                         zs = rs;
281                 }
282                 if (lzm == 0 && hzm == 0)
283                         return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
284 
285                 /*
286                  * Put explicit bit at bit 126 if necessary.
287                  */
288                 if (hzm == 0) {
289                         /* left shift by 63 or 64 bits */
290                         if ((int64_t)lzm < 0) {
291                                 /* MSB of lzm is the explicit bit */
292                                 hzm = lzm >> 1;
293                                 lzm = lzm << 63;
294                                 ze -= 63;
295                         } else {
296                                 hzm = lzm;
297                                 lzm = 0;
298                                 ze -= 64;
299                         }
300                 }
301 
302                 t = 0;
303                 while ((hzm >> (62 - t)) == 0)
304                         t++;
305 
306                 assert(t <= 62);
307                 if (t) {
308                         hzm = hzm << t | lzm >> (64 - t);
309                         lzm = lzm << t;
310                         ze -= t;
311                 }
312         }
313 
314         /*
315          * Move explicit bit from bit 126 to bit 55 since the
316          * ieee754dp_format code expects the mantissa to be
317          * 56 bits wide (53 + 3 rounding bits).
318          */
319         srl128(&hzm, &lzm, (126 - 55));
320 
321         return ieee754dp_format(zs, ze, lzm);
322 }
323 
324 union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
325                                 union ieee754dp y)
326 {
327         return _dp_maddf(z, x, y, 0);
328 }
329 
330 union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
331                                 union ieee754dp y)
332 {
333         return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
334 }
335 
336 union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x,
337                                 union ieee754dp y)
338 {
339         return _dp_maddf(z, x, y, 0);
340 }
341 
342 union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x,
343                                 union ieee754dp y)
344 {
345         return _dp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
346 }
347 
348 union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x,
349                                 union ieee754dp y)
350 {
351         return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
352 }
353 
354 union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x,
355                                 union ieee754dp y)
356 {
357         return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
358 }
359 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php