1 /* Software floating-point emulation. 1 /* Software floating-point emulation. 2 Basic two-word fraction declaration and man 2 Basic two-word fraction declaration and manipulation. 3 Copyright (C) 1997,1998,1999 Free Software 3 Copyright (C) 1997,1998,1999 Free Software Foundation, Inc. 4 This file is part of the GNU C Library. 4 This file is part of the GNU C Library. 5 Contributed by Richard Henderson (rth@cygnu 5 Contributed by Richard Henderson (rth@cygnus.com), 6 Jakub Jelinek (jj@ultra.linu 6 Jakub Jelinek (jj@ultra.linux.cz), 7 David S. Miller (davem@redha 7 David S. Miller (davem@redhat.com) and 8 Peter Maydell (pmaydell@chia 8 Peter Maydell (pmaydell@chiark.greenend.org.uk). 9 9 10 The GNU C Library is free software; you can 10 The GNU C Library is free software; you can redistribute it and/or 11 modify it under the terms of the GNU Librar 11 modify it under the terms of the GNU Library General Public License as 12 published by the Free Software Foundation; 12 published by the Free Software Foundation; either version 2 of the 13 License, or (at your option) any later vers 13 License, or (at your option) any later version. 14 14 15 The GNU C Library is distributed in the hop 15 The GNU C Library is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 Library General Public License for more det 18 Library General Public License for more details. 19 19 20 You should have received a copy of the GNU 20 You should have received a copy of the GNU Library General Public 21 License along with the GNU C Library; see t 21 License along with the GNU C Library; see the file COPYING.LIB. If 22 not, write to the Free Software Foundation, 22 not, write to the Free Software Foundation, Inc., 23 59 Temple Place - Suite 330, Boston, MA 021 23 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 24 24 25 #ifndef __MATH_EMU_OP_2_H__ 25 #ifndef __MATH_EMU_OP_2_H__ 26 #define __MATH_EMU_OP_2_H__ 26 #define __MATH_EMU_OP_2_H__ 27 27 28 #define _FP_FRAC_DECL_2(X) _FP_W_TYPE X## 28 #define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0 = 0, X##_f1 = 0 29 #define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_ 29 #define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1) 30 #define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_ 30 #define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I) 31 #define _FP_FRAC_HIGH_2(X) (X##_f1) 31 #define _FP_FRAC_HIGH_2(X) (X##_f1) 32 #define _FP_FRAC_LOW_2(X) (X##_f0) 32 #define _FP_FRAC_LOW_2(X) (X##_f0) 33 #define _FP_FRAC_WORD_2(X,w) (X##_f##w) 33 #define _FP_FRAC_WORD_2(X,w) (X##_f##w) 34 #define _FP_FRAC_SLL_2(X, N) ( << 35 (void) (((N) < _FP_W_TYPE_SIZE) << 36 ? ({ << 37 if (__builtin_constant_p(N) && << 38 X##_f1 = X##_f1 + X##_ << 39 (((_FP_WS_TYPE << 40 X##_f0 += X##_f0; << 41 } else { << 42 X##_f1 = X##_f1 << (N) << 43 << 44 X##_f0 <<= (N); << 45 } << 46 0; << 47 }) << 48 : ({ << 49 X##_f1 = X##_f0 << ((N) - _FP_W_ << 50 X##_f0 = 0; << 51 }))) << 52 << 53 << 54 #define _FP_FRAC_SRL_2(X, N) ( << 55 (void) (((N) < _FP_W_TYPE_SIZE) << 56 ? ({ << 57 X##_f0 = X##_f0 >> (N) | X##_f1 << 58 X##_f1 >>= (N); << 59 }) << 60 : ({ << 61 X##_f0 = X##_f1 >> ((N) - _FP_W_ << 62 X##_f1 = 0; << 63 }))) << 64 34 >> 35 #define _FP_FRAC_SLL_2(X,N) \ >> 36 do { \ >> 37 if ((N) < _FP_W_TYPE_SIZE) \ >> 38 { \ >> 39 if (__builtin_constant_p(N) && (N) == 1) \ >> 40 { \ >> 41 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \ >> 42 X##_f0 += X##_f0; \ >> 43 } \ >> 44 else \ >> 45 { \ >> 46 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \ >> 47 X##_f0 <<= (N); \ >> 48 } \ >> 49 } \ >> 50 else \ >> 51 { \ >> 52 X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \ >> 53 X##_f0 = 0; \ >> 54 } \ >> 55 } while (0) >> 56 >> 57 #define _FP_FRAC_SRL_2(X,N) \ >> 58 do { \ >> 59 if ((N) < _FP_W_TYPE_SIZE) \ >> 60 { \ >> 61 X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \ >> 62 X##_f1 >>= (N); \ >> 63 } \ >> 64 else \ >> 65 { \ >> 66 X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \ >> 67 X##_f1 = 0; \ >> 68 } \ >> 69 } while (0) 65 70 66 /* Right shift with sticky-lsb. */ 71 /* Right shift with sticky-lsb. */ 67 #define _FP_FRAC_SRS_2(X, N, sz) ( !! 72 #define _FP_FRAC_SRS_2(X,N,sz) \ 68 (void) (((N) < _FP_W_TYPE_SIZE) !! 73 do { \ 69 ? ({ !! 74 if ((N) < _FP_W_TYPE_SIZE) \ 70 X##_f0 = (X##_f1 << (_FP_W_TYPE_ !! 75 { \ 71 | (__builtin_constant_ !! 76 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \ 72 ? X##_f0 & 1 !! 77 (__builtin_constant_p(N) && (N) == 1 \ 73 : (X##_f0 << (_FP_W !! 78 ? X##_f0 & 1 \ 74 X##_f1 >>= (N); !! 79 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \ 75 }) !! 80 X##_f1 >>= (N); \ 76 : ({ !! 81 } \ 77 X##_f0 = (X##_f1 >> ((N) - _FP_W !! 82 else \ 78 | ((((N) == _FP_W_TYPE !! 83 { \ 79 ? 0 !! 84 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \ 80 : (X##_f1 << (2*_ !! 85 (((X##_f1 << (2*_FP_W_TYPE_SIZE - (N))) | X##_f0) != 0)); \ 81 | X##_f0) != 0)); !! 86 X##_f1 = 0; \ 82 X##_f1 = 0; !! 87 } \ 83 }))) !! 88 } while (0) 84 89 85 #define _FP_FRAC_ADDI_2(X,I) \ 90 #define _FP_FRAC_ADDI_2(X,I) \ 86 __FP_FRAC_ADDI_2(X##_f1, X##_f0, I) 91 __FP_FRAC_ADDI_2(X##_f1, X##_f0, I) 87 92 88 #define _FP_FRAC_ADD_2(R,X,Y) \ 93 #define _FP_FRAC_ADD_2(R,X,Y) \ 89 __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_ 94 __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) 90 95 91 #define _FP_FRAC_SUB_2(R,X,Y) \ 96 #define _FP_FRAC_SUB_2(R,X,Y) \ 92 __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_ 97 __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) 93 98 94 #define _FP_FRAC_DEC_2(X,Y) \ 99 #define _FP_FRAC_DEC_2(X,Y) \ 95 __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_ 100 __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0) 96 101 97 #define _FP_FRAC_CLZ_2(R,X) \ 102 #define _FP_FRAC_CLZ_2(R,X) \ 98 do { \ 103 do { \ 99 if (X##_f1) \ 104 if (X##_f1) \ 100 __FP_CLZ(R,X##_f1); \ 105 __FP_CLZ(R,X##_f1); \ 101 else \ 106 else \ 102 { \ 107 { \ 103 __FP_CLZ(R,X##_f0); \ 108 __FP_CLZ(R,X##_f0); \ 104 R += _FP_W_TYPE_SIZE; \ 109 R += _FP_W_TYPE_SIZE; \ 105 } \ 110 } \ 106 } while(0) 111 } while(0) 107 112 108 /* Predicates */ 113 /* Predicates */ 109 #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE) 114 #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0) 110 #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X## 115 #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0) 111 #define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH 116 #define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs) 112 #define _FP_FRAC_CLEAR_OVERP_2(fs,X) (_FP_F 117 #define _FP_FRAC_CLEAR_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs) 113 #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y## 118 #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0) 114 #define _FP_FRAC_GT_2(X, Y) \ 119 #define _FP_FRAC_GT_2(X, Y) \ 115 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X## 120 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0)) 116 #define _FP_FRAC_GE_2(X, Y) \ 121 #define _FP_FRAC_GE_2(X, Y) \ 117 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X## 122 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0)) 118 123 119 #define _FP_ZEROFRAC_2 0, 0 124 #define _FP_ZEROFRAC_2 0, 0 120 #define _FP_MINFRAC_2 0, 1 125 #define _FP_MINFRAC_2 0, 1 121 #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE 126 #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0) 122 127 123 /* 128 /* 124 * Internals 129 * Internals 125 */ 130 */ 126 131 127 #define __FP_FRAC_SET_2(X,I1,I0) (X##_f 132 #define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1) 128 133 129 #define __FP_CLZ_2(R, xh, xl) \ 134 #define __FP_CLZ_2(R, xh, xl) \ 130 do { \ 135 do { \ 131 if (xh) \ 136 if (xh) \ 132 __FP_CLZ(R,xh); \ 137 __FP_CLZ(R,xh); \ 133 else \ 138 else \ 134 { \ 139 { \ 135 __FP_CLZ(R,xl); \ 140 __FP_CLZ(R,xl); \ 136 R += _FP_W_TYPE_SIZE; \ 141 R += _FP_W_TYPE_SIZE; \ 137 } \ 142 } \ 138 } while(0) 143 } while(0) 139 144 140 #if 0 145 #if 0 141 146 142 #ifndef __FP_FRAC_ADDI_2 147 #ifndef __FP_FRAC_ADDI_2 143 #define __FP_FRAC_ADDI_2(xh, xl, i) \ 148 #define __FP_FRAC_ADDI_2(xh, xl, i) \ 144 (xh += ((xl += i) < i)) 149 (xh += ((xl += i) < i)) 145 #endif 150 #endif 146 #ifndef __FP_FRAC_ADD_2 151 #ifndef __FP_FRAC_ADD_2 147 #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl 152 #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \ 148 (rh = xh + yh + ((rl = xl + yl) < xl)) 153 (rh = xh + yh + ((rl = xl + yl) < xl)) 149 #endif 154 #endif 150 #ifndef __FP_FRAC_SUB_2 155 #ifndef __FP_FRAC_SUB_2 151 #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl 156 #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \ 152 (rh = xh - yh - ((rl = xl - yl) > xl)) 157 (rh = xh - yh - ((rl = xl - yl) > xl)) 153 #endif 158 #endif 154 #ifndef __FP_FRAC_DEC_2 159 #ifndef __FP_FRAC_DEC_2 155 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) \ 160 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) \ 156 do { \ 161 do { \ 157 UWtype _t = xl; \ 162 UWtype _t = xl; \ 158 xh -= yh + ((xl -= yl) > _t); \ 163 xh -= yh + ((xl -= yl) > _t); \ 159 } while (0) 164 } while (0) 160 #endif 165 #endif 161 166 162 #else 167 #else 163 168 164 #undef __FP_FRAC_ADDI_2 169 #undef __FP_FRAC_ADDI_2 165 #define __FP_FRAC_ADDI_2(xh, xl, i) add_ss 170 #define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i) 166 #undef __FP_FRAC_ADD_2 171 #undef __FP_FRAC_ADD_2 167 #define __FP_FRAC_ADD_2 add_ss 172 #define __FP_FRAC_ADD_2 add_ssaaaa 168 #undef __FP_FRAC_SUB_2 173 #undef __FP_FRAC_SUB_2 169 #define __FP_FRAC_SUB_2 sub_dd 174 #define __FP_FRAC_SUB_2 sub_ddmmss 170 #undef __FP_FRAC_DEC_2 175 #undef __FP_FRAC_DEC_2 171 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_dd 176 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl) 172 177 173 #endif 178 #endif 174 179 175 /* 180 /* 176 * Unpack the raw bits of a native fp value. 181 * Unpack the raw bits of a native fp value. Do not classify or 177 * normalize the data. 182 * normalize the data. 178 */ 183 */ 179 184 180 #define _FP_UNPACK_RAW_2(fs, X, val) 185 #define _FP_UNPACK_RAW_2(fs, X, val) \ 181 do { 186 do { \ 182 union _FP_UNION_##fs _flo; _flo.flt = (val 187 union _FP_UNION_##fs _flo; _flo.flt = (val); \ 183 188 \ 184 X##_f0 = _flo.bits.frac0; 189 X##_f0 = _flo.bits.frac0; \ 185 X##_f1 = _flo.bits.frac1; 190 X##_f1 = _flo.bits.frac1; \ 186 X##_e = _flo.bits.exp; 191 X##_e = _flo.bits.exp; \ 187 X##_s = _flo.bits.sign; 192 X##_s = _flo.bits.sign; \ 188 } while (0) 193 } while (0) 189 194 190 #define _FP_UNPACK_RAW_2_P(fs, X, val) 195 #define _FP_UNPACK_RAW_2_P(fs, X, val) \ 191 do { 196 do { \ 192 union _FP_UNION_##fs *_flo = 197 union _FP_UNION_##fs *_flo = \ 193 (union _FP_UNION_##fs *)(val); 198 (union _FP_UNION_##fs *)(val); \ 194 199 \ 195 X##_f0 = _flo->bits.frac0; 200 X##_f0 = _flo->bits.frac0; \ 196 X##_f1 = _flo->bits.frac1; 201 X##_f1 = _flo->bits.frac1; \ 197 X##_e = _flo->bits.exp; 202 X##_e = _flo->bits.exp; \ 198 X##_s = _flo->bits.sign; 203 X##_s = _flo->bits.sign; \ 199 } while (0) 204 } while (0) 200 205 201 206 202 /* 207 /* 203 * Repack the raw bits of a native fp value. 208 * Repack the raw bits of a native fp value. 204 */ 209 */ 205 210 206 #define _FP_PACK_RAW_2(fs, val, X) 211 #define _FP_PACK_RAW_2(fs, val, X) \ 207 do { 212 do { \ 208 union _FP_UNION_##fs _flo; 213 union _FP_UNION_##fs _flo; \ 209 214 \ 210 _flo.bits.frac0 = X##_f0; 215 _flo.bits.frac0 = X##_f0; \ 211 _flo.bits.frac1 = X##_f1; 216 _flo.bits.frac1 = X##_f1; \ 212 _flo.bits.exp = X##_e; 217 _flo.bits.exp = X##_e; \ 213 _flo.bits.sign = X##_s; 218 _flo.bits.sign = X##_s; \ 214 219 \ 215 (val) = _flo.flt; 220 (val) = _flo.flt; \ 216 } while (0) 221 } while (0) 217 222 218 #define _FP_PACK_RAW_2_P(fs, val, X) 223 #define _FP_PACK_RAW_2_P(fs, val, X) \ 219 do { 224 do { \ 220 union _FP_UNION_##fs *_flo = 225 union _FP_UNION_##fs *_flo = \ 221 (union _FP_UNION_##fs *)(val); 226 (union _FP_UNION_##fs *)(val); \ 222 227 \ 223 _flo->bits.frac0 = X##_f0; 228 _flo->bits.frac0 = X##_f0; \ 224 _flo->bits.frac1 = X##_f1; 229 _flo->bits.frac1 = X##_f1; \ 225 _flo->bits.exp = X##_e; 230 _flo->bits.exp = X##_e; \ 226 _flo->bits.sign = X##_s; 231 _flo->bits.sign = X##_s; \ 227 } while (0) 232 } while (0) 228 233 229 234 230 /* 235 /* 231 * Multiplication algorithms: 236 * Multiplication algorithms: 232 */ 237 */ 233 238 234 /* Given a 1W * 1W => 2W primitive, do the ext 239 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ 235 240 236 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y 241 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \ 237 do { 242 do { \ 238 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); 243 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \ 239 244 \ 240 doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_ 245 doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \ 241 doit(_b_f1, _b_f0, X##_f0, Y##_f1); 246 doit(_b_f1, _b_f0, X##_f0, Y##_f1); \ 242 doit(_c_f1, _c_f0, X##_f1, Y##_f0); 247 doit(_c_f1, _c_f0, X##_f1, Y##_f0); \ 243 doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_ 248 doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \ 244 249 \ 245 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_ 250 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 246 _FP_FRAC_WORD_4(_z,1), 0, 251 _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0, \ 247 _FP_FRAC_WORD_4(_z,3),_FP_ 252 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 248 _FP_FRAC_WORD_4(_z,1)); 253 _FP_FRAC_WORD_4(_z,1)); \ 249 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_ 254 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 250 _FP_FRAC_WORD_4(_z,1), 0, 255 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0, \ 251 _FP_FRAC_WORD_4(_z,3),_FP_ 256 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 252 _FP_FRAC_WORD_4(_z,1)); 257 _FP_FRAC_WORD_4(_z,1)); \ 253 258 \ 254 /* Normalize since we know where the msb o 259 /* Normalize since we know where the msb of the multiplicands \ 255 were (bit B), we know that the msb of t 260 were (bit B), we know that the msb of the of the product is \ 256 at either 2B or 2B-1. */ 261 at either 2B or 2B-1. */ \ 257 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbit 262 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \ 258 R##_f0 = _FP_FRAC_WORD_4(_z,0); 263 R##_f0 = _FP_FRAC_WORD_4(_z,0); \ 259 R##_f1 = _FP_FRAC_WORD_4(_z,1); 264 R##_f1 = _FP_FRAC_WORD_4(_z,1); \ 260 } while (0) 265 } while (0) 261 266 262 /* Given a 1W * 1W => 2W primitive, do the ext 267 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. 263 Do only 3 multiplications instead of four. 268 Do only 3 multiplications instead of four. This one is for machines 264 where multiplication is much more expensive 269 where multiplication is much more expensive than subtraction. */ 265 270 266 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, 271 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \ 267 do { 272 do { \ 268 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); 273 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \ 269 _FP_W_TYPE _d; 274 _FP_W_TYPE _d; \ 270 int _c1, _c2; 275 int _c1, _c2; \ 271 276 \ 272 _b_f0 = X##_f0 + X##_f1; 277 _b_f0 = X##_f0 + X##_f1; \ 273 _c1 = _b_f0 < X##_f0; 278 _c1 = _b_f0 < X##_f0; \ 274 _b_f1 = Y##_f0 + Y##_f1; 279 _b_f1 = Y##_f0 + Y##_f1; \ 275 _c2 = _b_f1 < Y##_f0; 280 _c2 = _b_f1 < Y##_f0; \ 276 doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y# 281 doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \ 277 doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_ 282 doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1); \ 278 doit(_c_f1, _c_f0, X##_f1, Y##_f1); 283 doit(_c_f1, _c_f0, X##_f1, Y##_f1); \ 279 284 \ 280 _b_f0 &= -_c2; 285 _b_f0 &= -_c2; \ 281 _b_f1 &= -_c1; 286 _b_f1 &= -_c1; \ 282 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_ 287 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 283 _FP_FRAC_WORD_4(_z,1), (_c 288 _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d, \ 284 0, _FP_FRAC_WORD_4(_z,2), 289 0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1)); \ 285 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP 290 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 286 _b_f0); 291 _b_f0); \ 287 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP 292 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 288 _b_f1); 293 _b_f1); \ 289 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_ 294 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 290 _FP_FRAC_WORD_4(_z,1), 295 _FP_FRAC_WORD_4(_z,1), \ 291 0, _d, _FP_FRAC_WORD_4(_z, 296 0, _d, _FP_FRAC_WORD_4(_z,0)); \ 292 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_ 297 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ 293 _FP_FRAC_WORD_4(_z,1), 0, 298 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0); \ 294 __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP 299 __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), \ 295 _c_f1, _c_f0, 300 _c_f1, _c_f0, \ 296 _FP_FRAC_WORD_4(_z,3), _FP 301 _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2)); \ 297 302 \ 298 /* Normalize since we know where the msb o 303 /* Normalize since we know where the msb of the multiplicands \ 299 were (bit B), we know that the msb of t 304 were (bit B), we know that the msb of the of the product is \ 300 at either 2B or 2B-1. */ 305 at either 2B or 2B-1. */ \ 301 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbit 306 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \ 302 R##_f0 = _FP_FRAC_WORD_4(_z,0); 307 R##_f0 = _FP_FRAC_WORD_4(_z,0); \ 303 R##_f1 = _FP_FRAC_WORD_4(_z,1); 308 R##_f1 = _FP_FRAC_WORD_4(_z,1); \ 304 } while (0) 309 } while (0) 305 310 306 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) 311 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \ 307 do { 312 do { \ 308 _FP_FRAC_DECL_4(_z); 313 _FP_FRAC_DECL_4(_z); \ 309 _FP_W_TYPE _x[2], _y[2]; 314 _FP_W_TYPE _x[2], _y[2]; \ 310 _x[0] = X##_f0; _x[1] = X##_f1; 315 _x[0] = X##_f0; _x[1] = X##_f1; \ 311 _y[0] = Y##_f0; _y[1] = Y##_f1; 316 _y[0] = Y##_f0; _y[1] = Y##_f1; \ 312 317 \ 313 mpn_mul_n(_z_f, _x, _y, 2); 318 mpn_mul_n(_z_f, _x, _y, 2); \ 314 319 \ 315 /* Normalize since we know where the msb o 320 /* Normalize since we know where the msb of the multiplicands \ 316 were (bit B), we know that the msb of t 321 were (bit B), we know that the msb of the of the product is \ 317 at either 2B or 2B-1. */ 322 at either 2B or 2B-1. */ \ 318 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbit 323 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \ 319 R##_f0 = _z_f[0]; 324 R##_f0 = _z_f[0]; \ 320 R##_f1 = _z_f[1]; 325 R##_f1 = _z_f[1]; \ 321 } while (0) 326 } while (0) 322 327 323 /* Do at most 120x120=240 bits multiplication 328 /* Do at most 120x120=240 bits multiplication using double floating 324 point multiplication. This is useful if fl 329 point multiplication. This is useful if floating point 325 multiplication has much bigger throughput t 330 multiplication has much bigger throughput than integer multiply. 326 It is supposed to work for _FP_W_TYPE_SIZE 331 It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits 327 between 106 and 120 only. 332 between 106 and 120 only. 328 Caller guarantees that X and Y has (1LLL << 333 Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set. 329 SETFETZ is a macro which will disable all F 334 SETFETZ is a macro which will disable all FPU exceptions and set rounding 330 towards zero, RESETFE should optionally re 335 towards zero, RESETFE should optionally reset it back. */ 331 336 332 #define _FP_MUL_MEAT_2_120_240_double(wfracbit 337 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \ 333 do { 338 do { \ 334 static const double _const[] = { 339 static const double _const[] = { \ 335 /* 2^-24 */ 5.9604644775390625e-08, 340 /* 2^-24 */ 5.9604644775390625e-08, \ 336 /* 2^-48 */ 3.5527136788005009e-15, 341 /* 2^-48 */ 3.5527136788005009e-15, \ 337 /* 2^-72 */ 2.1175823681357508e-22, 342 /* 2^-72 */ 2.1175823681357508e-22, \ 338 /* 2^-96 */ 1.2621774483536189e-29, 343 /* 2^-96 */ 1.2621774483536189e-29, \ 339 /* 2^28 */ 2.68435456e+08, 344 /* 2^28 */ 2.68435456e+08, \ 340 /* 2^4 */ 1.600000e+01, 345 /* 2^4 */ 1.600000e+01, \ 341 /* 2^-20 */ 9.5367431640625e-07, 346 /* 2^-20 */ 9.5367431640625e-07, \ 342 /* 2^-44 */ 5.6843418860808015e-14, 347 /* 2^-44 */ 5.6843418860808015e-14, \ 343 /* 2^-68 */ 3.3881317890172014e-21, 348 /* 2^-68 */ 3.3881317890172014e-21, \ 344 /* 2^-92 */ 2.0194839173657902e-28, 349 /* 2^-92 */ 2.0194839173657902e-28, \ 345 /* 2^-116 */ 1.2037062152420224e-35}; 350 /* 2^-116 */ 1.2037062152420224e-35}; \ 346 double _a240, _b240, _c240, _d240, _e240, 351 double _a240, _b240, _c240, _d240, _e240, _f240, \ 347 _g240, _h240, _i240, _j240, _k240; 352 _g240, _h240, _i240, _j240, _k240; \ 348 union { double d; UDItype i; } _l240, _m24 353 union { double d; UDItype i; } _l240, _m240, _n240, _o240, \ 349 _p240, _q24 354 _p240, _q240, _r240, _s240; \ 350 UDItype _t240, _u240, _v240, _w240, _x240, 355 UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \ 351 356 \ 352 if (wfracbits < 106 || wfracbits > 120) 357 if (wfracbits < 106 || wfracbits > 120) \ 353 abort(); 358 abort(); \ 354 359 \ 355 setfetz; 360 setfetz; \ 356 361 \ 357 _e240 = (double)(long)(X##_f0 & 0xffffff); 362 _e240 = (double)(long)(X##_f0 & 0xffffff); \ 358 _j240 = (double)(long)(Y##_f0 & 0xffffff); 363 _j240 = (double)(long)(Y##_f0 & 0xffffff); \ 359 _d240 = (double)(long)((X##_f0 >> 24) & 0x 364 _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff); \ 360 _i240 = (double)(long)((Y##_f0 >> 24) & 0x 365 _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff); \ 361 _c240 = (double)(long)(((X##_f1 << 16) & 0 366 _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \ 362 _h240 = (double)(long)(((Y##_f1 << 16) & 0 367 _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \ 363 _b240 = (double)(long)((X##_f1 >> 8) & 0xf 368 _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff); \ 364 _g240 = (double)(long)((Y##_f1 >> 8) & 0xf 369 _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff); \ 365 _a240 = (double)(long)(X##_f1 >> 32); 370 _a240 = (double)(long)(X##_f1 >> 32); \ 366 _f240 = (double)(long)(Y##_f1 >> 32); 371 _f240 = (double)(long)(Y##_f1 >> 32); \ 367 _e240 *= _const[3]; 372 _e240 *= _const[3]; \ 368 _j240 *= _const[3]; 373 _j240 *= _const[3]; \ 369 _d240 *= _const[2]; 374 _d240 *= _const[2]; \ 370 _i240 *= _const[2]; 375 _i240 *= _const[2]; \ 371 _c240 *= _const[1]; 376 _c240 *= _const[1]; \ 372 _h240 *= _const[1]; 377 _h240 *= _const[1]; \ 373 _b240 *= _const[0]; 378 _b240 *= _const[0]; \ 374 _g240 *= _const[0]; 379 _g240 *= _const[0]; \ 375 _s240.d = 380 _s240.d = _e240*_j240;\ 376 _r240.d = 381 _r240.d = _d240*_j240 + _e240*_i240;\ 377 _q240.d = _c24 382 _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240;\ 378 _p240.d = _b240*_j240 + _c24 383 _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\ 379 _o240.d = _a240*_j240 + _b240*_i240 + _c24 384 _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\ 380 _n240.d = _a240*_i240 + _b240*_h240 + _c24 385 _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \ 381 _m240.d = _a240*_h240 + _b240*_g240 + _c24 386 _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \ 382 _l240.d = _a240*_g240 + _b240*_f240; 387 _l240.d = _a240*_g240 + _b240*_f240; \ 383 _k240 = _a240*_f240; 388 _k240 = _a240*_f240; \ 384 _r240.d += _s240.d; 389 _r240.d += _s240.d; \ 385 _q240.d += _r240.d; 390 _q240.d += _r240.d; \ 386 _p240.d += _q240.d; 391 _p240.d += _q240.d; \ 387 _o240.d += _p240.d; 392 _o240.d += _p240.d; \ 388 _n240.d += _o240.d; 393 _n240.d += _o240.d; \ 389 _m240.d += _n240.d; 394 _m240.d += _n240.d; \ 390 _l240.d += _m240.d; 395 _l240.d += _m240.d; \ 391 _k240 += _l240.d; 396 _k240 += _l240.d; \ 392 _s240.d -= ((_const[10]+_s240.d)-_const[10 397 _s240.d -= ((_const[10]+_s240.d)-_const[10]); \ 393 _r240.d -= ((_const[9]+_r240.d)-_const[9]) 398 _r240.d -= ((_const[9]+_r240.d)-_const[9]); \ 394 _q240.d -= ((_const[8]+_q240.d)-_const[8]) 399 _q240.d -= ((_const[8]+_q240.d)-_const[8]); \ 395 _p240.d -= ((_const[7]+_p240.d)-_const[7]) 400 _p240.d -= ((_const[7]+_p240.d)-_const[7]); \ 396 _o240.d += _const[7]; 401 _o240.d += _const[7]; \ 397 _n240.d += _const[6]; 402 _n240.d += _const[6]; \ 398 _m240.d += _const[5]; 403 _m240.d += _const[5]; \ 399 _l240.d += _const[4]; 404 _l240.d += _const[4]; \ 400 if (_s240.d != 0.0) _y240 = 1; 405 if (_s240.d != 0.0) _y240 = 1; \ 401 if (_r240.d != 0.0) _y240 = 1; 406 if (_r240.d != 0.0) _y240 = 1; \ 402 if (_q240.d != 0.0) _y240 = 1; 407 if (_q240.d != 0.0) _y240 = 1; \ 403 if (_p240.d != 0.0) _y240 = 1; 408 if (_p240.d != 0.0) _y240 = 1; \ 404 _t240 = (DItype)_k240; 409 _t240 = (DItype)_k240; \ 405 _u240 = _l240.i; 410 _u240 = _l240.i; \ 406 _v240 = _m240.i; 411 _v240 = _m240.i; \ 407 _w240 = _n240.i; 412 _w240 = _n240.i; \ 408 _x240 = _o240.i; 413 _x240 = _o240.i; \ 409 R##_f1 = (_t240 << (128 - (wfracbits - 1)) 414 R##_f1 = (_t240 << (128 - (wfracbits - 1))) \ 410 | ((_u240 & 0xffffff) >> ((wfracb 415 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)); \ 411 R##_f0 = ((_u240 & 0xffffff) << (168 - (wf 416 R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \ 412 | ((_v240 & 0xffffff) << (144 - ( 417 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \ 413 | ((_w240 & 0xffffff) << (120 - ( 418 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \ 414 | ((_x240 & 0xffffff) >> ((wfracb 419 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \ 415 | _y240; 420 | _y240; \ 416 resetfe; 421 resetfe; \ 417 } while (0) 422 } while (0) 418 423 419 /* 424 /* 420 * Division algorithms: 425 * Division algorithms: 421 */ 426 */ 422 427 423 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) 428 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \ 424 do { 429 do { \ 425 _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_ 430 _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \ 426 if (_FP_FRAC_GT_2(X, Y)) 431 if (_FP_FRAC_GT_2(X, Y)) \ 427 { 432 { \ 428 _n_f2 = X##_f1 >> 1; 433 _n_f2 = X##_f1 >> 1; \ 429 _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1 434 _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \ 430 _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1 435 _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \ 431 } 436 } \ 432 else 437 else \ 433 { 438 { \ 434 R##_e--; 439 R##_e--; \ 435 _n_f2 = X##_f1; 440 _n_f2 = X##_f1; \ 436 _n_f1 = X##_f0; 441 _n_f1 = X##_f0; \ 437 _n_f0 = 0; 442 _n_f0 = 0; \ 438 } 443 } \ 439 444 \ 440 /* Normalize, i.e. make the most significa 445 /* Normalize, i.e. make the most significant bit of the \ 441 denominator set. */ 446 denominator set. */ \ 442 _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs); 447 _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs); \ 443 448 \ 444 udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y# 449 udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \ 445 umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0); 450 umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0); \ 446 _r_f0 = _n_f0; 451 _r_f0 = _n_f0; \ 447 if (_FP_FRAC_GT_2(_m, _r)) 452 if (_FP_FRAC_GT_2(_m, _r)) \ 448 { 453 { \ 449 R##_f1--; 454 R##_f1--; \ 450 _FP_FRAC_ADD_2(_r, Y, _r); 455 _FP_FRAC_ADD_2(_r, Y, _r); \ 451 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_G 456 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \ 452 { 457 { \ 453 R##_f1--; 458 R##_f1--; \ 454 _FP_FRAC_ADD_2(_r, Y, _r); 459 _FP_FRAC_ADD_2(_r, Y, _r); \ 455 } 460 } \ 456 } 461 } \ 457 _FP_FRAC_DEC_2(_r, _m); 462 _FP_FRAC_DEC_2(_r, _m); \ 458 463 \ 459 if (_r_f1 == Y##_f1) 464 if (_r_f1 == Y##_f1) \ 460 { 465 { \ 461 /* This is a special case, not an opti 466 /* This is a special case, not an optimization \ 462 (_r/Y##_f1 would not fit into UWtyp 467 (_r/Y##_f1 would not fit into UWtype). \ 463 As _r is guaranteed to be < Y, R## 468 As _r is guaranteed to be < Y, R##_f0 can be either \ 464 (UWtype)-1 or (UWtype)-2. But as w 469 (UWtype)-1 or (UWtype)-2. But as we know what kind \ 465 of bits it is (sticky, guard, round 470 of bits it is (sticky, guard, round), we don't care. \ 466 We also don't care what the reminde 471 We also don't care what the reminder is, because the \ 467 guard bit will be set anyway. -jj 472 guard bit will be set anyway. -jj */ \ 468 R##_f0 = -1; 473 R##_f0 = -1; \ 469 } 474 } \ 470 else 475 else \ 471 { 476 { \ 472 udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0 477 udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \ 473 umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0 478 umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0); \ 474 _r_f0 = 0; 479 _r_f0 = 0; \ 475 if (_FP_FRAC_GT_2(_m, _r)) 480 if (_FP_FRAC_GT_2(_m, _r)) \ 476 { 481 { \ 477 R##_f0--; 482 R##_f0--; \ 478 _FP_FRAC_ADD_2(_r, Y, _r); 483 _FP_FRAC_ADD_2(_r, Y, _r); \ 479 if (_FP_FRAC_GE_2(_r, Y) && _FP_FR 484 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \ 480 { 485 { \ 481 R##_f0--; 486 R##_f0--; \ 482 _FP_FRAC_ADD_2(_r, Y, _r); 487 _FP_FRAC_ADD_2(_r, Y, _r); \ 483 } 488 } \ 484 } 489 } \ 485 if (!_FP_FRAC_EQ_2(_r, _m)) 490 if (!_FP_FRAC_EQ_2(_r, _m)) \ 486 R##_f0 |= _FP_WORK_STICKY; 491 R##_f0 |= _FP_WORK_STICKY; \ 487 } 492 } \ 488 } while (0) 493 } while (0) 489 494 490 495 491 #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) 496 #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \ 492 do { 497 do { \ 493 _FP_W_TYPE _x[4], _y[2], _z[4]; 498 _FP_W_TYPE _x[4], _y[2], _z[4]; \ 494 _y[0] = Y##_f0; _y[1] = Y##_f1; 499 _y[0] = Y##_f0; _y[1] = Y##_f1; \ 495 _x[0] = _x[3] = 0; 500 _x[0] = _x[3] = 0; \ 496 if (_FP_FRAC_GT_2(X, Y)) 501 if (_FP_FRAC_GT_2(X, Y)) \ 497 { 502 { \ 498 R##_e++; 503 R##_e++; \ 499 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs 504 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) | \ 500 X##_f1 >> (_FP_W_TYPE_SIZE - 505 X##_f1 >> (_FP_W_TYPE_SIZE - \ 501 (_FP_WFRACBITS_##f 506 (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \ 502 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs- 507 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE); \ 503 } 508 } \ 504 else 509 else \ 505 { 510 { \ 506 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs 511 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) | \ 507 X##_f1 >> (_FP_W_TYPE_SIZE - 512 X##_f1 >> (_FP_W_TYPE_SIZE - \ 508 (_FP_WFRACBITS_##f 513 (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE))); \ 509 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs 514 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE); \ 510 } 515 } \ 511 516 \ 512 (void) mpn_divrem (_z, 0, _x, 4, _y, 2); 517 (void) mpn_divrem (_z, 0, _x, 4, _y, 2); \ 513 R##_f1 = _z[1]; 518 R##_f1 = _z[1]; \ 514 R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); 519 R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \ 515 } while (0) 520 } while (0) 516 521 517 522 518 /* 523 /* 519 * Square root algorithms: 524 * Square root algorithms: 520 * We have just one right now, maybe Newton ap 525 * We have just one right now, maybe Newton approximation 521 * should be added for those machines where di 526 * should be added for those machines where division is fast. 522 */ 527 */ 523 528 524 #define _FP_SQRT_MEAT_2(R, S, T, X, q) 529 #define _FP_SQRT_MEAT_2(R, S, T, X, q) \ 525 do { 530 do { \ 526 while (q) 531 while (q) \ 527 { 532 { \ 528 T##_f1 = S##_f1 + q; 533 T##_f1 = S##_f1 + q; \ 529 if (T##_f1 <= X##_f1) 534 if (T##_f1 <= X##_f1) \ 530 { 535 { \ 531 S##_f1 = T##_f1 + q; 536 S##_f1 = T##_f1 + q; \ 532 X##_f1 -= T##_f1; 537 X##_f1 -= T##_f1; \ 533 R##_f1 += q; 538 R##_f1 += q; \ 534 } 539 } \ 535 _FP_FRAC_SLL_2(X, 1); 540 _FP_FRAC_SLL_2(X, 1); \ 536 q >>= 1; 541 q >>= 1; \ 537 } 542 } \ 538 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1) 543 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ 539 while (q != _FP_WORK_ROUND) 544 while (q != _FP_WORK_ROUND) \ 540 { 545 { \ 541 T##_f0 = S##_f0 + q; 546 T##_f0 = S##_f0 + q; \ 542 T##_f1 = S##_f1; 547 T##_f1 = S##_f1; \ 543 if (T##_f1 < X##_f1 || 548 if (T##_f1 < X##_f1 || \ 544 (T##_f1 == X##_f1 && T##_f0 <= X## 549 (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \ 545 { 550 { \ 546 S##_f0 = T##_f0 + q; 551 S##_f0 = T##_f0 + q; \ 547 S##_f1 += (T##_f0 > S##_f0); 552 S##_f1 += (T##_f0 > S##_f0); \ 548 _FP_FRAC_DEC_2(X, T); 553 _FP_FRAC_DEC_2(X, T); \ 549 R##_f0 += q; 554 R##_f0 += q; \ 550 } 555 } \ 551 _FP_FRAC_SLL_2(X, 1); 556 _FP_FRAC_SLL_2(X, 1); \ 552 q >>= 1; 557 q >>= 1; \ 553 } 558 } \ 554 if (X##_f0 | X##_f1) 559 if (X##_f0 | X##_f1) \ 555 { 560 { \ 556 if (S##_f1 < X##_f1 || 561 if (S##_f1 < X##_f1 || \ 557 (S##_f1 == X##_f1 && S##_f0 < X##_ 562 (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \ 558 R##_f0 |= _FP_WORK_ROUND; 563 R##_f0 |= _FP_WORK_ROUND; \ 559 R##_f0 |= _FP_WORK_STICKY; 564 R##_f0 |= _FP_WORK_STICKY; \ 560 } 565 } \ 561 } while (0) 566 } while (0) 562 567 563 568 564 /* 569 /* 565 * Assembly/disassembly for converting to/from 570 * Assembly/disassembly for converting to/from integral types. 566 * No shifting or overflow handled here. 571 * No shifting or overflow handled here. 567 */ 572 */ 568 573 569 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) 574 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \ 570 (void) (((rsize) <= _FP_W_TYPE_SIZE) !! 575 do { \ 571 ? ({ (r) = X##_f0; }) !! 576 if (rsize <= _FP_W_TYPE_SIZE) \ 572 : ({ !! 577 r = X##_f0; \ 573 (r) = X##_f1; !! 578 else \ 574 (r) <<= _FP_W_TYPE_SIZE; !! 579 { \ 575 (r) += X##_f0; !! 580 r = X##_f1; \ 576 })) !! 581 r <<= _FP_W_TYPE_SIZE; \ >> 582 r += X##_f0; \ >> 583 } \ >> 584 } while (0) 577 585 578 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) 586 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \ 579 do { 587 do { \ 580 X##_f0 = r; 588 X##_f0 = r; \ 581 X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r 589 X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \ 582 } while (0) 590 } while (0) 583 591 584 /* 592 /* 585 * Convert FP values between word sizes 593 * Convert FP values between word sizes 586 */ 594 */ 587 595 588 #define _FP_FRAC_CONV_1_2(dfs, sfs, D, S) 596 #define _FP_FRAC_CONV_1_2(dfs, sfs, D, S) \ 589 do { 597 do { \ 590 if (S##_c != FP_CLS_NAN) 598 if (S##_c != FP_CLS_NAN) \ 591 _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - 599 _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \ 592 _FP_WFRACBITS_##sfs); 600 _FP_WFRACBITS_##sfs); \ 593 else 601 else \ 594 _FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - 602 _FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs)); \ 595 D##_f = S##_f0; 603 D##_f = S##_f0; \ 596 } while (0) 604 } while (0) 597 605 598 #define _FP_FRAC_CONV_2_1(dfs, sfs, D, S) 606 #define _FP_FRAC_CONV_2_1(dfs, sfs, D, S) \ 599 do { 607 do { \ 600 D##_f0 = S##_f; 608 D##_f0 = S##_f; \ 601 D##_f1 = 0; 609 D##_f1 = 0; \ 602 _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _ 610 _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \ 603 } while (0) 611 } while (0) 604 612 605 #endif 613 #endif 606 614
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.