1 /* Software floating-point emulation. 1 /* Software floating-point emulation. 2 Basic two-word fraction declaration and man 2 Basic two-word fraction declaration and manipulation. 3 Copyright (C) 1997,1998,1999 Free Software 3 Copyright (C) 1997,1998,1999 Free Software Foundation, Inc. 4 This file is part of the GNU C Library. 4 This file is part of the GNU C Library. 5 Contributed by Richard Henderson (rth@cygnu 5 Contributed by Richard Henderson (rth@cygnus.com), 6 Jakub Jelinek (jj@ultra.linu 6 Jakub Jelinek (jj@ultra.linux.cz), 7 David S. Miller (davem@redha 7 David S. Miller (davem@redhat.com) and 8 Peter Maydell (pmaydell@chia 8 Peter Maydell (pmaydell@chiark.greenend.org.uk). 9 9 10 The GNU C Library is free software; you can 10 The GNU C Library is free software; you can redistribute it and/or 11 modify it under the terms of the GNU Librar 11 modify it under the terms of the GNU Library General Public License as 12 published by the Free Software Foundation; 12 published by the Free Software Foundation; either version 2 of the 13 License, or (at your option) any later vers 13 License, or (at your option) any later version. 14 14 15 The GNU C Library is distributed in the hop 15 The GNU C Library is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 Library General Public License for more det 18 Library General Public License for more details. 19 19 20 You should have received a copy of the GNU 20 You should have received a copy of the GNU Library General Public 21 License along with the GNU C Library; see t 21 License along with the GNU C Library; see the file COPYING.LIB. If 22 not, write to the Free Software Foundation, 22 not, write to the Free Software Foundation, Inc., 23 59 Temple Place - Suite 330, Boston, MA 021 23 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 24 24 25 #ifndef __MATH_EMU_OP_2_H__ 25 #ifndef __MATH_EMU_OP_2_H__ 26 #define __MATH_EMU_OP_2_H__ 26 #define __MATH_EMU_OP_2_H__ 27 27 28 #define _FP_FRAC_DECL_2(X) _FP_W_TYPE X## 28 #define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0 = 0, X##_f1 = 0 29 #define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_ 29 #define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1) 30 #define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_ 30 #define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I) 31 #define _FP_FRAC_HIGH_2(X) (X##_f1) 31 #define _FP_FRAC_HIGH_2(X) (X##_f1) 32 #define _FP_FRAC_LOW_2(X) (X##_f0) 32 #define _FP_FRAC_LOW_2(X) (X##_f0) 33 #define _FP_FRAC_WORD_2(X,w) (X##_f##w) 33 #define _FP_FRAC_WORD_2(X,w) (X##_f##w) 34 #define _FP_FRAC_SLL_2(X, N) ( 34 #define _FP_FRAC_SLL_2(X, N) ( \ 35 (void) (((N) < _FP_W_TYPE_SIZE) 35 (void) (((N) < _FP_W_TYPE_SIZE) \ 36 ? ({ 36 ? ({ \ 37 if (__builtin_constant_p(N) && 37 if (__builtin_constant_p(N) && (N) == 1) { \ 38 X##_f1 = X##_f1 + X##_ 38 X##_f1 = X##_f1 + X##_f1 + \ 39 (((_FP_WS_TYPE 39 (((_FP_WS_TYPE) (X##_f0)) < 0); \ 40 X##_f0 += X##_f0; 40 X##_f0 += X##_f0; \ 41 } else { 41 } else { \ 42 X##_f1 = X##_f1 << (N) 42 X##_f1 = X##_f1 << (N) | X##_f0 >> \ 43 43 (_FP_W_TYPE_SIZE - (N)); \ 44 X##_f0 <<= (N); 44 X##_f0 <<= (N); \ 45 } 45 } \ 46 0; 46 0; \ 47 }) 47 }) \ 48 : ({ 48 : ({ \ 49 X##_f1 = X##_f0 << ((N) - _FP_W_ 49 X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \ 50 X##_f0 = 0; 50 X##_f0 = 0; \ 51 }))) 51 }))) 52 52 53 53 54 #define _FP_FRAC_SRL_2(X, N) ( 54 #define _FP_FRAC_SRL_2(X, N) ( \ 55 (void) (((N) < _FP_W_TYPE_SIZE) 55 (void) (((N) < _FP_W_TYPE_SIZE) \ 56 ? ({ 56 ? ({ \ 57 X##_f0 = X##_f0 >> (N) | X##_f1 57 X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \ 58 X##_f1 >>= (N); 58 X##_f1 >>= (N); \ 59 }) 59 }) \ 60 : ({ 60 : ({ \ 61 X##_f0 = X##_f1 >> ((N) - _FP_W_ 61 X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \ 62 X##_f1 = 0; 62 X##_f1 = 0; \ 63 }))) 63 }))) 64 64 65 65 66 /* Right shift with sticky-lsb. */ 66 /* Right shift with sticky-lsb. */ 67 #define _FP_FRAC_SRS_2(X, N, sz) ( 67 #define _FP_FRAC_SRS_2(X, N, sz) ( \ 68 (void) (((N) < _FP_W_TYPE_SIZE) 68 (void) (((N) < _FP_W_TYPE_SIZE) \ 69 ? ({ 69 ? ({ \ 70 X##_f0 = (X##_f1 << (_FP_W_TYPE_ 70 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \ 71 | (__builtin_constant_ 71 | (__builtin_constant_p(N) && (N) == 1 \ 72 ? X##_f0 & 1 72 ? X##_f0 & 1 \ 73 : (X##_f0 << (_FP_W 73 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \ 74 X##_f1 >>= (N); 74 X##_f1 >>= (N); \ 75 }) 75 }) \ 76 : ({ 76 : ({ \ 77 X##_f0 = (X##_f1 >> ((N) - _FP_W 77 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) \ 78 | ((((N) == _FP_W_TYPE 78 | ((((N) == _FP_W_TYPE_SIZE \ 79 ? 0 79 ? 0 \ 80 : (X##_f1 << (2*_ 80 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \ 81 | X##_f0) != 0)); 81 | X##_f0) != 0)); \ 82 X##_f1 = 0; 82 X##_f1 = 0; \ 83 }))) 83 }))) 84 84 85 #define _FP_FRAC_ADDI_2(X,I) \ 85 #define _FP_FRAC_ADDI_2(X,I) \ 86 __FP_FRAC_ADDI_2(X##_f1, X##_f0, I) 86 __FP_FRAC_ADDI_2(X##_f1, X##_f0, I) 87 87 88 #define _FP_FRAC_ADD_2(R,X,Y) \ 88 #define _FP_FRAC_ADD_2(R,X,Y) \ 89 __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_ 89 __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) 90 90 91 #define _FP_FRAC_SUB_2(R,X,Y) \ 91 #define _FP_FRAC_SUB_2(R,X,Y) \ 92 __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_ 92 __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) 93 93 94 #define _FP_FRAC_DEC_2(X,Y) \ 94 #define _FP_FRAC_DEC_2(X,Y) \ 95 __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_ 95 __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0) 96 96 97 #define _FP_FRAC_CLZ_2(R,X) \ 97 #define _FP_FRAC_CLZ_2(R,X) \ 98 do { \ 98 do { \ 99 if (X##_f1) \ 99 if (X##_f1) \ 100 __FP_CLZ(R,X##_f1); \ 100 __FP_CLZ(R,X##_f1); \ 101 else \ 101 else \ 102 { \ 102 { \ 103 __FP_CLZ(R,X##_f0); \ 103 __FP_CLZ(R,X##_f0); \ 104 R += _FP_W_TYPE_SIZE; \ 104 R += _FP_W_TYPE_SIZE; \ 105 } \ 105 } \ 106 } while(0) 106 } while(0) 107 107 108 /* Predicates */ 108 /* Predicates */ 109 #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE) 109 #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0) 110 #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X## 110 #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0) 111 #define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH 111 #define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs) 112 #define _FP_FRAC_CLEAR_OVERP_2(fs,X) (_FP_F 112 #define _FP_FRAC_CLEAR_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs) 113 #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y## 113 #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0) 114 #define _FP_FRAC_GT_2(X, Y) \ 114 #define _FP_FRAC_GT_2(X, Y) \ 115 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X## 115 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0)) 116 #define _FP_FRAC_GE_2(X, Y) \ 116 #define _FP_FRAC_GE_2(X, Y) \ 117 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X## 117 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0)) 118 118 119 #define _FP_ZEROFRAC_2 0, 0 119 #define _FP_ZEROFRAC_2 0, 0 120 #define _FP_MINFRAC_2 0, 1 120 #define _FP_MINFRAC_2 0, 1 121 #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE 121 #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0) 122 122 123 /* 123 /* 124 * Internals 124 * Internals 125 */ 125 */ 126 126 127 #define __FP_FRAC_SET_2(X,I1,I0) (X##_f 127 #define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1) 128 128 129 #define __FP_CLZ_2(R, xh, xl) \ 129 #define __FP_CLZ_2(R, xh, xl) \ 130 do { \ 130 do { \ 131 if (xh) \ 131 if (xh) \ 132 __FP_CLZ(R,xh); \ 132 __FP_CLZ(R,xh); \ 133 else \ 133 else \ 134 { \ 134 { \ 135 __FP_CLZ(R,xl); \ 135 __FP_CLZ(R,xl); \ 136 R += _FP_W_TYPE_SIZE; \ 136 R += _FP_W_TYPE_SIZE; \ 137 } \ 137 } \ 138 } while(0) 138 } while(0) 139 139 140 #if 0 140 #if 0 141 141 142 #ifndef __FP_FRAC_ADDI_2 142 #ifndef __FP_FRAC_ADDI_2 143 #define __FP_FRAC_ADDI_2(xh, xl, i) \ 143 #define __FP_FRAC_ADDI_2(xh, xl, i) \ 144 (xh += ((xl += i) < i)) 144 (xh += ((xl += i) < i)) 145 #endif 145 #endif 146 #ifndef __FP_FRAC_ADD_2 146 #ifndef __FP_FRAC_ADD_2 147 #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl 147 #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \ 148 (rh = xh + yh + ((rl = xl + yl) < xl)) 148 (rh = xh + yh + ((rl = xl + yl) < xl)) 149 #endif 149 #endif 150 #ifndef __FP_FRAC_SUB_2 150 #ifndef __FP_FRAC_SUB_2 151 #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl 151 #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \ 152 (rh = xh - yh - ((rl = xl - yl) > xl)) 152 (rh = xh - yh - ((rl = xl - yl) > xl)) 153 #endif 153 #endif 154 #ifndef __FP_FRAC_DEC_2 154 #ifndef __FP_FRAC_DEC_2 155 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) \ 155 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) \ 156 do { \ 156 do { \ 157 UWtype _t = xl; \ 157 UWtype _t = xl; \ 158 xh -= yh + ((xl -= yl) > _t); \ 158 xh -= yh + ((xl -= yl) > _t); \ 159 } while (0) 159 } while (0) 160 #endif 160 #endif 161 161 162 #else 162 #else 163 163 164 #undef __FP_FRAC_ADDI_2 164 #undef __FP_FRAC_ADDI_2 165 #define __FP_FRAC_ADDI_2(xh, xl, i) add_ss 165 #define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i) 166 #undef __FP_FRAC_ADD_2 166 #undef __FP_FRAC_ADD_2 167 #define __FP_FRAC_ADD_2 add_ss 167 #define __FP_FRAC_ADD_2 add_ssaaaa 168 #undef __FP_FRAC_SUB_2 168 #undef __FP_FRAC_SUB_2 169 #define __FP_FRAC_SUB_2 sub_dd 169 #define __FP_FRAC_SUB_2 sub_ddmmss 170 #undef __FP_FRAC_DEC_2 170 #undef __FP_FRAC_DEC_2 171 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_dd 171 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl) 172 172 173 #endif 173 #endif 174 174 175 /* 175 /* 176 * Unpack the raw bits of a native fp value. 176 * Unpack the raw bits of a native fp value. Do not classify or 177 * normalize the data. 177 * normalize the data. 178 */ 178 */ 179 179 180 #define _FP_UNPACK_RAW_2(fs, X, val) 180 #define _FP_UNPACK_RAW_2(fs, X, val) \ 181 do { 181 do { \ 182 union _FP_UNION_##fs _flo; _flo.flt = (val 182 union _FP_UNION_##fs _flo; _flo.flt = (val); \ 183 183 \ 184 X##_f0 = _flo.bits.frac0; 184 X##_f0 = _flo.bits.frac0; \ 185 X##_f1 = _flo.bits.frac1; 185 X##_f1 = _flo.bits.frac1; \ 186 X##_e = _flo.bits.exp; 186 X##_e = _flo.bits.exp; \ 187 X##_s = _flo.bits.sign; 187 X##_s = _flo.bits.sign; \ 188 } while (0) 188 } while (0) 189 189 190 #define _FP_UNPACK_RAW_2_P(fs, X, val) 190 #define _FP_UNPACK_RAW_2_P(fs, X, val) \ 191 do { 191 do { \ 192 union _FP_UNION_##fs *_flo = 192 union _FP_UNION_##fs *_flo = \ 193 (union _FP_UNION_##fs *)(val); 193 (union _FP_UNION_##fs *)(val); \ 194 194 \ 195 X##_f0 = _flo->bits.frac0; 195 X##_f0 = _flo->bits.frac0; \ 196 X##_f1 = _flo->bits.frac1; 196 X##_f1 = _flo->bits.frac1; \ 197 X##_e = _flo->bits.exp; 197 X##_e = _flo->bits.exp; \ 198 X##_s = _flo->bits.sign; 198 X##_s = _flo->bits.sign; \ 199 } while (0) 199 } while (0) 200 200 201 201 202 /* 202 /* 203 * Repack the raw bits of a native fp value. 203 * Repack the raw bits of a native fp value. 204 */ 204 */ 205 205 206 #define _FP_PACK_RAW_2(fs, val, X) 206 #define _FP_PACK_RAW_2(fs, val, X) \ 207 do { 207 do { \ 208 union _FP_UNION_##fs _flo; 208 union _FP_UNION_##fs _flo; \ 209 209 \ 210 _flo.bits.frac0 = X##_f0; 210 _flo.bits.frac0 = X##_f0; \ 211 _flo.bits.frac1 = X##_f1; 211 _flo.bits.frac1 = X##_f1; \ 212 _flo.bits.exp = X##_e; 212 _flo.bits.exp = X##_e; \ 213 _flo.bits.sign = X##_s; 213 _flo.bits.sign = X##_s; \ 214 214 \ 215 (val) = _flo.flt; 215 (val) = _flo.flt; \ 216 } while (0) 216 } while (0) 217 217 218 #define _FP_PACK_RAW_2_P(fs, val, X) 218 #define _FP_PACK_RAW_2_P(fs, val, X) \ 219 do { 219 do { \ 220 union _FP_UNION_##fs *_flo = 220 union _FP_UNION_##fs *_flo = \ 221 (union _FP_UNION_##fs *)(val); 221 (union _FP_UNION_##fs *)(val); \ 222 222 \ 223 _flo->bits.frac0 = X##_f0; 223 _flo->bits.frac0 = X##_f0; \ 224 _flo->bits.frac1 = X##_f1; 224 _flo->bits.frac1 = X##_f1; \ 225 _flo->bits.exp = X##_e; 225 _flo->bits.exp = X##_e; \ 226 _flo->bits.sign = X##_s; 226 _flo->bits.sign = X##_s; \ 227 } while (0) 227 } while (0) 228 228 229 229 230 /* 230 /* 231 * Multiplication algorithms: 231 * Multiplication algorithms: 232 */ 232 */ 233 233 234 /* Given a 1W * 1W => 2W primitive, do the ext 234 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ 235 235 236 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y 236 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \ 237 do { 237 do { \ 238 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); 238 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \ 239 239 \ 240 doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_ 240 doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \ 241 doit(_b_f1, _b_f0, X##_f0, Y##_f1); 241 doit(_b_f1, _b_f0, X##_f0, Y##_f1); \ 242 doit(_c_f1, _c_f0, X##_f1, Y##_f0); 242 doit(_c_f1, _c_f0, X##_f1, Y##_f0); \ 243 doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_ 243 doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \ 244 244 \ 245 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_ 245 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 246 _FP_FRAC_WORD_4(_z,1), 0, 246 _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0, \ 247 _FP_FRAC_WORD_4(_z,3),_FP_ 247 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 248 _FP_FRAC_WORD_4(_z,1)); 248 _FP_FRAC_WORD_4(_z,1)); \ 249 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_ 249 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 250 _FP_FRAC_WORD_4(_z,1), 0, 250 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0, \ 251 _FP_FRAC_WORD_4(_z,3),_FP_ 251 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 252 _FP_FRAC_WORD_4(_z,1)); 252 _FP_FRAC_WORD_4(_z,1)); \ 253 253 \ 254 /* Normalize since we know where the msb o 254 /* Normalize since we know where the msb of the multiplicands \ 255 were (bit B), we know that the msb of t 255 were (bit B), we know that the msb of the of the product is \ 256 at either 2B or 2B-1. */ 256 at either 2B or 2B-1. */ \ 257 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbit 257 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \ 258 R##_f0 = _FP_FRAC_WORD_4(_z,0); 258 R##_f0 = _FP_FRAC_WORD_4(_z,0); \ 259 R##_f1 = _FP_FRAC_WORD_4(_z,1); 259 R##_f1 = _FP_FRAC_WORD_4(_z,1); \ 260 } while (0) 260 } while (0) 261 261 262 /* Given a 1W * 1W => 2W primitive, do the ext 262 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. 263 Do only 3 multiplications instead of four. 263 Do only 3 multiplications instead of four. This one is for machines 264 where multiplication is much more expensive 264 where multiplication is much more expensive than subtraction. */ 265 265 266 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, 266 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \ 267 do { 267 do { \ 268 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); 268 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \ 269 _FP_W_TYPE _d; 269 _FP_W_TYPE _d; \ 270 int _c1, _c2; 270 int _c1, _c2; \ 271 271 \ 272 _b_f0 = X##_f0 + X##_f1; 272 _b_f0 = X##_f0 + X##_f1; \ 273 _c1 = _b_f0 < X##_f0; 273 _c1 = _b_f0 < X##_f0; \ 274 _b_f1 = Y##_f0 + Y##_f1; 274 _b_f1 = Y##_f0 + Y##_f1; \ 275 _c2 = _b_f1 < Y##_f0; 275 _c2 = _b_f1 < Y##_f0; \ 276 doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y# 276 doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \ 277 doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_ 277 doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1); \ 278 doit(_c_f1, _c_f0, X##_f1, Y##_f1); 278 doit(_c_f1, _c_f0, X##_f1, Y##_f1); \ 279 279 \ 280 _b_f0 &= -_c2; 280 _b_f0 &= -_c2; \ 281 _b_f1 &= -_c1; 281 _b_f1 &= -_c1; \ 282 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_ 282 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 283 _FP_FRAC_WORD_4(_z,1), (_c 283 _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d, \ 284 0, _FP_FRAC_WORD_4(_z,2), 284 0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1)); \ 285 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP 285 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 286 _b_f0); 286 _b_f0); \ 287 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP 287 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 288 _b_f1); 288 _b_f1); \ 289 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_ 289 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 290 _FP_FRAC_WORD_4(_z,1), 290 _FP_FRAC_WORD_4(_z,1), \ 291 0, _d, _FP_FRAC_WORD_4(_z, 291 0, _d, _FP_FRAC_WORD_4(_z,0)); \ 292 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_ 292 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 293 _FP_FRAC_WORD_4(_z,1), 0, 293 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0); \ 294 __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP 294 __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), \ 295 _c_f1, _c_f0, 295 _c_f1, _c_f0, \ 296 _FP_FRAC_WORD_4(_z,3), _FP 296 _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2)); \ 297 297 \ 298 /* Normalize since we know where the msb o 298 /* Normalize since we know where the msb of the multiplicands \ 299 were (bit B), we know that the msb of t 299 were (bit B), we know that the msb of the of the product is \ 300 at either 2B or 2B-1. */ 300 at either 2B or 2B-1. */ \ 301 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbit 301 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \ 302 R##_f0 = _FP_FRAC_WORD_4(_z,0); 302 R##_f0 = _FP_FRAC_WORD_4(_z,0); \ 303 R##_f1 = _FP_FRAC_WORD_4(_z,1); 303 R##_f1 = _FP_FRAC_WORD_4(_z,1); \ 304 } while (0) 304 } while (0) 305 305 306 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) 306 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \ 307 do { 307 do { \ 308 _FP_FRAC_DECL_4(_z); 308 _FP_FRAC_DECL_4(_z); \ 309 _FP_W_TYPE _x[2], _y[2]; 309 _FP_W_TYPE _x[2], _y[2]; \ 310 _x[0] = X##_f0; _x[1] = X##_f1; 310 _x[0] = X##_f0; _x[1] = X##_f1; \ 311 _y[0] = Y##_f0; _y[1] = Y##_f1; 311 _y[0] = Y##_f0; _y[1] = Y##_f1; \ 312 312 \ 313 mpn_mul_n(_z_f, _x, _y, 2); 313 mpn_mul_n(_z_f, _x, _y, 2); \ 314 314 \ 315 /* Normalize since we know where the msb o 315 /* Normalize since we know where the msb of the multiplicands \ 316 were (bit B), we know that the msb of t 316 were (bit B), we know that the msb of the of the product is \ 317 at either 2B or 2B-1. */ 317 at either 2B or 2B-1. */ \ 318 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbit 318 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \ 319 R##_f0 = _z_f[0]; 319 R##_f0 = _z_f[0]; \ 320 R##_f1 = _z_f[1]; 320 R##_f1 = _z_f[1]; \ 321 } while (0) 321 } while (0) 322 322 323 /* Do at most 120x120=240 bits multiplication 323 /* Do at most 120x120=240 bits multiplication using double floating 324 point multiplication. This is useful if fl 324 point multiplication. This is useful if floating point 325 multiplication has much bigger throughput t 325 multiplication has much bigger throughput than integer multiply. 326 It is supposed to work for _FP_W_TYPE_SIZE 326 It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits 327 between 106 and 120 only. 327 between 106 and 120 only. 328 Caller guarantees that X and Y has (1LLL << 328 Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set. 329 SETFETZ is a macro which will disable all F 329 SETFETZ is a macro which will disable all FPU exceptions and set rounding 330 towards zero, RESETFE should optionally re 330 towards zero, RESETFE should optionally reset it back. */ 331 331 332 #define _FP_MUL_MEAT_2_120_240_double(wfracbit 332 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \ 333 do { 333 do { \ 334 static const double _const[] = { 334 static const double _const[] = { \ 335 /* 2^-24 */ 5.9604644775390625e-08, 335 /* 2^-24 */ 5.9604644775390625e-08, \ 336 /* 2^-48 */ 3.5527136788005009e-15, 336 /* 2^-48 */ 3.5527136788005009e-15, \ 337 /* 2^-72 */ 2.1175823681357508e-22, 337 /* 2^-72 */ 2.1175823681357508e-22, \ 338 /* 2^-96 */ 1.2621774483536189e-29, 338 /* 2^-96 */ 1.2621774483536189e-29, \ 339 /* 2^28 */ 2.68435456e+08, 339 /* 2^28 */ 2.68435456e+08, \ 340 /* 2^4 */ 1.600000e+01, 340 /* 2^4 */ 1.600000e+01, \ 341 /* 2^-20 */ 9.5367431640625e-07, 341 /* 2^-20 */ 9.5367431640625e-07, \ 342 /* 2^-44 */ 5.6843418860808015e-14, 342 /* 2^-44 */ 5.6843418860808015e-14, \ 343 /* 2^-68 */ 3.3881317890172014e-21, 343 /* 2^-68 */ 3.3881317890172014e-21, \ 344 /* 2^-92 */ 2.0194839173657902e-28, 344 /* 2^-92 */ 2.0194839173657902e-28, \ 345 /* 2^-116 */ 1.2037062152420224e-35}; 345 /* 2^-116 */ 1.2037062152420224e-35}; \ 346 double _a240, _b240, _c240, _d240, _e240, 346 double _a240, _b240, _c240, _d240, _e240, _f240, \ 347 _g240, _h240, _i240, _j240, _k240; 347 _g240, _h240, _i240, _j240, _k240; \ 348 union { double d; UDItype i; } _l240, _m24 348 union { double d; UDItype i; } _l240, _m240, _n240, _o240, \ 349 _p240, _q24 349 _p240, _q240, _r240, _s240; \ 350 UDItype _t240, _u240, _v240, _w240, _x240, 350 UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \ 351 351 \ 352 if (wfracbits < 106 || wfracbits > 120) 352 if (wfracbits < 106 || wfracbits > 120) \ 353 abort(); 353 abort(); \ 354 354 \ 355 setfetz; 355 setfetz; \ 356 356 \ 357 _e240 = (double)(long)(X##_f0 & 0xffffff); 357 _e240 = (double)(long)(X##_f0 & 0xffffff); \ 358 _j240 = (double)(long)(Y##_f0 & 0xffffff); 358 _j240 = (double)(long)(Y##_f0 & 0xffffff); \ 359 _d240 = (double)(long)((X##_f0 >> 24) & 0x 359 _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff); \ 360 _i240 = (double)(long)((Y##_f0 >> 24) & 0x 360 _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff); \ 361 _c240 = (double)(long)(((X##_f1 << 16) & 0 361 _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \ 362 _h240 = (double)(long)(((Y##_f1 << 16) & 0 362 _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \ 363 _b240 = (double)(long)((X##_f1 >> 8) & 0xf 363 _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff); \ 364 _g240 = (double)(long)((Y##_f1 >> 8) & 0xf 364 _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff); \ 365 _a240 = (double)(long)(X##_f1 >> 32); 365 _a240 = (double)(long)(X##_f1 >> 32); \ 366 _f240 = (double)(long)(Y##_f1 >> 32); 366 _f240 = (double)(long)(Y##_f1 >> 32); \ 367 _e240 *= _const[3]; 367 _e240 *= _const[3]; \ 368 _j240 *= _const[3]; 368 _j240 *= _const[3]; \ 369 _d240 *= _const[2]; 369 _d240 *= _const[2]; \ 370 _i240 *= _const[2]; 370 _i240 *= _const[2]; \ 371 _c240 *= _const[1]; 371 _c240 *= _const[1]; \ 372 _h240 *= _const[1]; 372 _h240 *= _const[1]; \ 373 _b240 *= _const[0]; 373 _b240 *= _const[0]; \ 374 _g240 *= _const[0]; 374 _g240 *= _const[0]; \ 375 _s240.d = 375 _s240.d = _e240*_j240;\ 376 _r240.d = 376 _r240.d = _d240*_j240 + _e240*_i240;\ 377 _q240.d = _c24 377 _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240;\ 378 _p240.d = _b240*_j240 + _c24 378 _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\ 379 _o240.d = _a240*_j240 + _b240*_i240 + _c24 379 _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\ 380 _n240.d = _a240*_i240 + _b240*_h240 + _c24 380 _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \ 381 _m240.d = _a240*_h240 + _b240*_g240 + _c24 381 _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \ 382 _l240.d = _a240*_g240 + _b240*_f240; 382 _l240.d = _a240*_g240 + _b240*_f240; \ 383 _k240 = _a240*_f240; 383 _k240 = _a240*_f240; \ 384 _r240.d += _s240.d; 384 _r240.d += _s240.d; \ 385 _q240.d += _r240.d; 385 _q240.d += _r240.d; \ 386 _p240.d += _q240.d; 386 _p240.d += _q240.d; \ 387 _o240.d += _p240.d; 387 _o240.d += _p240.d; \ 388 _n240.d += _o240.d; 388 _n240.d += _o240.d; \ 389 _m240.d += _n240.d; 389 _m240.d += _n240.d; \ 390 _l240.d += _m240.d; 390 _l240.d += _m240.d; \ 391 _k240 += _l240.d; 391 _k240 += _l240.d; \ 392 _s240.d -= ((_const[10]+_s240.d)-_const[10 392 _s240.d -= ((_const[10]+_s240.d)-_const[10]); \ 393 _r240.d -= ((_const[9]+_r240.d)-_const[9]) 393 _r240.d -= ((_const[9]+_r240.d)-_const[9]); \ 394 _q240.d -= ((_const[8]+_q240.d)-_const[8]) 394 _q240.d -= ((_const[8]+_q240.d)-_const[8]); \ 395 _p240.d -= ((_const[7]+_p240.d)-_const[7]) 395 _p240.d -= ((_const[7]+_p240.d)-_const[7]); \ 396 _o240.d += _const[7]; 396 _o240.d += _const[7]; \ 397 _n240.d += _const[6]; 397 _n240.d += _const[6]; \ 398 _m240.d += _const[5]; 398 _m240.d += _const[5]; \ 399 _l240.d += _const[4]; 399 _l240.d += _const[4]; \ 400 if (_s240.d != 0.0) _y240 = 1; 400 if (_s240.d != 0.0) _y240 = 1; \ 401 if (_r240.d != 0.0) _y240 = 1; 401 if (_r240.d != 0.0) _y240 = 1; \ 402 if (_q240.d != 0.0) _y240 = 1; 402 if (_q240.d != 0.0) _y240 = 1; \ 403 if (_p240.d != 0.0) _y240 = 1; 403 if (_p240.d != 0.0) _y240 = 1; \ 404 _t240 = (DItype)_k240; 404 _t240 = (DItype)_k240; \ 405 _u240 = _l240.i; 405 _u240 = _l240.i; \ 406 _v240 = _m240.i; 406 _v240 = _m240.i; \ 407 _w240 = _n240.i; 407 _w240 = _n240.i; \ 408 _x240 = _o240.i; 408 _x240 = _o240.i; \ 409 R##_f1 = (_t240 << (128 - (wfracbits - 1)) 409 R##_f1 = (_t240 << (128 - (wfracbits - 1))) \ 410 | ((_u240 & 0xffffff) >> ((wfracb 410 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)); \ 411 R##_f0 = ((_u240 & 0xffffff) << (168 - (wf 411 R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \ 412 | ((_v240 & 0xffffff) << (144 - ( 412 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \ 413 | ((_w240 & 0xffffff) << (120 - ( 413 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \ 414 | ((_x240 & 0xffffff) >> ((wfracb 414 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \ 415 | _y240; 415 | _y240; \ 416 resetfe; 416 resetfe; \ 417 } while (0) 417 } while (0) 418 418 419 /* 419 /* 420 * Division algorithms: 420 * Division algorithms: 421 */ 421 */ 422 422 423 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) 423 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \ 424 do { 424 do { \ 425 _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_ 425 _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \ 426 if (_FP_FRAC_GT_2(X, Y)) 426 if (_FP_FRAC_GT_2(X, Y)) \ 427 { 427 { \ 428 _n_f2 = X##_f1 >> 1; 428 _n_f2 = X##_f1 >> 1; \ 429 _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1 429 _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \ 430 _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1 430 _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \ 431 } 431 } \ 432 else 432 else \ 433 { 433 { \ 434 R##_e--; 434 R##_e--; \ 435 _n_f2 = X##_f1; 435 _n_f2 = X##_f1; \ 436 _n_f1 = X##_f0; 436 _n_f1 = X##_f0; \ 437 _n_f0 = 0; 437 _n_f0 = 0; \ 438 } 438 } \ 439 439 \ 440 /* Normalize, i.e. make the most significa 440 /* Normalize, i.e. make the most significant bit of the \ 441 denominator set. */ 441 denominator set. */ \ 442 _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs); 442 _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs); \ 443 443 \ 444 udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y# 444 udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \ 445 umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0); 445 umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0); \ 446 _r_f0 = _n_f0; 446 _r_f0 = _n_f0; \ 447 if (_FP_FRAC_GT_2(_m, _r)) 447 if (_FP_FRAC_GT_2(_m, _r)) \ 448 { 448 { \ 449 R##_f1--; 449 R##_f1--; \ 450 _FP_FRAC_ADD_2(_r, Y, _r); 450 _FP_FRAC_ADD_2(_r, Y, _r); \ 451 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_G 451 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \ 452 { 452 { \ 453 R##_f1--; 453 R##_f1--; \ 454 _FP_FRAC_ADD_2(_r, Y, _r); 454 _FP_FRAC_ADD_2(_r, Y, _r); \ 455 } 455 } \ 456 } 456 } \ 457 _FP_FRAC_DEC_2(_r, _m); 457 _FP_FRAC_DEC_2(_r, _m); \ 458 458 \ 459 if (_r_f1 == Y##_f1) 459 if (_r_f1 == Y##_f1) \ 460 { 460 { \ 461 /* This is a special case, not an opti 461 /* This is a special case, not an optimization \ 462 (_r/Y##_f1 would not fit into UWtyp 462 (_r/Y##_f1 would not fit into UWtype). \ 463 As _r is guaranteed to be < Y, R## 463 As _r is guaranteed to be < Y, R##_f0 can be either \ 464 (UWtype)-1 or (UWtype)-2. But as w 464 (UWtype)-1 or (UWtype)-2. But as we know what kind \ 465 of bits it is (sticky, guard, round 465 of bits it is (sticky, guard, round), we don't care. \ 466 We also don't care what the reminde 466 We also don't care what the reminder is, because the \ 467 guard bit will be set anyway. -jj 467 guard bit will be set anyway. -jj */ \ 468 R##_f0 = -1; 468 R##_f0 = -1; \ 469 } 469 } \ 470 else 470 else \ 471 { 471 { \ 472 udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0 472 udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \ 473 umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0 473 umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0); \ 474 _r_f0 = 0; 474 _r_f0 = 0; \ 475 if (_FP_FRAC_GT_2(_m, _r)) 475 if (_FP_FRAC_GT_2(_m, _r)) \ 476 { 476 { \ 477 R##_f0--; 477 R##_f0--; \ 478 _FP_FRAC_ADD_2(_r, Y, _r); 478 _FP_FRAC_ADD_2(_r, Y, _r); \ 479 if (_FP_FRAC_GE_2(_r, Y) && _FP_FR 479 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \ 480 { 480 { \ 481 R##_f0--; 481 R##_f0--; \ 482 _FP_FRAC_ADD_2(_r, Y, _r); 482 _FP_FRAC_ADD_2(_r, Y, _r); \ 483 } 483 } \ 484 } 484 } \ 485 if (!_FP_FRAC_EQ_2(_r, _m)) 485 if (!_FP_FRAC_EQ_2(_r, _m)) \ 486 R##_f0 |= _FP_WORK_STICKY; 486 R##_f0 |= _FP_WORK_STICKY; \ 487 } 487 } \ 488 } while (0) 488 } while (0) 489 489 490 490 491 #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) 491 #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \ 492 do { 492 do { \ 493 _FP_W_TYPE _x[4], _y[2], _z[4]; 493 _FP_W_TYPE _x[4], _y[2], _z[4]; \ 494 _y[0] = Y##_f0; _y[1] = Y##_f1; 494 _y[0] = Y##_f0; _y[1] = Y##_f1; \ 495 _x[0] = _x[3] = 0; 495 _x[0] = _x[3] = 0; \ 496 if (_FP_FRAC_GT_2(X, Y)) 496 if (_FP_FRAC_GT_2(X, Y)) \ 497 { 497 { \ 498 R##_e++; 498 R##_e++; \ 499 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs 499 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) | \ 500 X##_f1 >> (_FP_W_TYPE_SIZE - 500 X##_f1 >> (_FP_W_TYPE_SIZE - \ 501 (_FP_WFRACBITS_##f 501 (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \ 502 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs- 502 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE); \ 503 } 503 } \ 504 else 504 else \ 505 { 505 { \ 506 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs 506 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) | \ 507 X##_f1 >> (_FP_W_TYPE_SIZE - 507 X##_f1 >> (_FP_W_TYPE_SIZE - \ 508 (_FP_WFRACBITS_##f 508 (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE))); \ 509 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs 509 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE); \ 510 } 510 } \ 511 511 \ 512 (void) mpn_divrem (_z, 0, _x, 4, _y, 2); 512 (void) mpn_divrem (_z, 0, _x, 4, _y, 2); \ 513 R##_f1 = _z[1]; 513 R##_f1 = _z[1]; \ 514 R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); 514 R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \ 515 } while (0) 515 } while (0) 516 516 517 517 518 /* 518 /* 519 * Square root algorithms: 519 * Square root algorithms: 520 * We have just one right now, maybe Newton ap 520 * We have just one right now, maybe Newton approximation 521 * should be added for those machines where di 521 * should be added for those machines where division is fast. 522 */ 522 */ 523 523 524 #define _FP_SQRT_MEAT_2(R, S, T, X, q) 524 #define _FP_SQRT_MEAT_2(R, S, T, X, q) \ 525 do { 525 do { \ 526 while (q) 526 while (q) \ 527 { 527 { \ 528 T##_f1 = S##_f1 + q; 528 T##_f1 = S##_f1 + q; \ 529 if (T##_f1 <= X##_f1) 529 if (T##_f1 <= X##_f1) \ 530 { 530 { \ 531 S##_f1 = T##_f1 + q; 531 S##_f1 = T##_f1 + q; \ 532 X##_f1 -= T##_f1; 532 X##_f1 -= T##_f1; \ 533 R##_f1 += q; 533 R##_f1 += q; \ 534 } 534 } \ 535 _FP_FRAC_SLL_2(X, 1); 535 _FP_FRAC_SLL_2(X, 1); \ 536 q >>= 1; 536 q >>= 1; \ 537 } 537 } \ 538 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1) 538 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ 539 while (q != _FP_WORK_ROUND) 539 while (q != _FP_WORK_ROUND) \ 540 { 540 { \ 541 T##_f0 = S##_f0 + q; 541 T##_f0 = S##_f0 + q; \ 542 T##_f1 = S##_f1; 542 T##_f1 = S##_f1; \ 543 if (T##_f1 < X##_f1 || 543 if (T##_f1 < X##_f1 || \ 544 (T##_f1 == X##_f1 && T##_f0 <= X## 544 (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \ 545 { 545 { \ 546 S##_f0 = T##_f0 + q; 546 S##_f0 = T##_f0 + q; \ 547 S##_f1 += (T##_f0 > S##_f0); 547 S##_f1 += (T##_f0 > S##_f0); \ 548 _FP_FRAC_DEC_2(X, T); 548 _FP_FRAC_DEC_2(X, T); \ 549 R##_f0 += q; 549 R##_f0 += q; \ 550 } 550 } \ 551 _FP_FRAC_SLL_2(X, 1); 551 _FP_FRAC_SLL_2(X, 1); \ 552 q >>= 1; 552 q >>= 1; \ 553 } 553 } \ 554 if (X##_f0 | X##_f1) 554 if (X##_f0 | X##_f1) \ 555 { 555 { \ 556 if (S##_f1 < X##_f1 || 556 if (S##_f1 < X##_f1 || \ 557 (S##_f1 == X##_f1 && S##_f0 < X##_ 557 (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \ 558 R##_f0 |= _FP_WORK_ROUND; 558 R##_f0 |= _FP_WORK_ROUND; \ 559 R##_f0 |= _FP_WORK_STICKY; 559 R##_f0 |= _FP_WORK_STICKY; \ 560 } 560 } \ 561 } while (0) 561 } while (0) 562 562 563 563 564 /* 564 /* 565 * Assembly/disassembly for converting to/from 565 * Assembly/disassembly for converting to/from integral types. 566 * No shifting or overflow handled here. 566 * No shifting or overflow handled here. 567 */ 567 */ 568 568 569 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) 569 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \ 570 (void) (((rsize) <= _FP_W_TYPE_SIZE) !! 570 do { \ 571 ? ({ (r) = X##_f0; }) !! 571 if (rsize <= _FP_W_TYPE_SIZE) \ 572 : ({ !! 572 r = X##_f0; \ 573 (r) = X##_f1; !! 573 else \ 574 (r) <<= _FP_W_TYPE_SIZE; !! 574 { \ 575 (r) += X##_f0; !! 575 r = X##_f1; \ 576 })) !! 576 r <<= _FP_W_TYPE_SIZE; \ >> 577 r += X##_f0; \ >> 578 } \ >> 579 } while (0) 577 580 578 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) 581 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \ 579 do { 582 do { \ 580 X##_f0 = r; 583 X##_f0 = r; \ 581 X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r 584 X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \ 582 } while (0) 585 } while (0) 583 586 584 /* 587 /* 585 * Convert FP values between word sizes 588 * Convert FP values between word sizes 586 */ 589 */ 587 590 588 #define _FP_FRAC_CONV_1_2(dfs, sfs, D, S) 591 #define _FP_FRAC_CONV_1_2(dfs, sfs, D, S) \ 589 do { 592 do { \ 590 if (S##_c != FP_CLS_NAN) 593 if (S##_c != FP_CLS_NAN) \ 591 _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - 594 _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \ 592 _FP_WFRACBITS_##sfs); 595 _FP_WFRACBITS_##sfs); \ 593 else 596 else \ 594 _FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - 597 _FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs)); \ 595 D##_f = S##_f0; 598 D##_f = S##_f0; \ 596 } while (0) 599 } while (0) 597 600 598 #define _FP_FRAC_CONV_2_1(dfs, sfs, D, S) 601 #define _FP_FRAC_CONV_2_1(dfs, sfs, D, S) \ 599 do { 602 do { \ 600 D##_f0 = S##_f; 603 D##_f0 = S##_f; \ 601 D##_f1 = 0; 604 D##_f1 = 0; \ 602 _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _ 605 _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \ 603 } while (0) 606 } while (0) 604 607 605 #endif 608 #endif 606 609
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.