1 // SPDX-License-Identifier: GPL-2.0-or-later 1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* mpihelp-div.c - MPI helper functions 2 /* mpihelp-div.c - MPI helper functions 3 * Copyright (C) 1994, 1996 Free Software 3 * Copyright (C) 1994, 1996 Free Software Foundation, Inc. 4 * Copyright (C) 1998, 1999 Free Software 4 * Copyright (C) 1998, 1999 Free Software Foundation, Inc. 5 * 5 * 6 * This file is part of GnuPG. 6 * This file is part of GnuPG. 7 * 7 * 8 * Note: This code is heavily based on the GNU 8 * Note: This code is heavily based on the GNU MP Library. 9 * Actually it's the same code with only 9 * Actually it's the same code with only minor changes in the 10 * way the data is stored; this is to su 10 * way the data is stored; this is to support the abstraction 11 * of an optional secure memory allocati 11 * of an optional secure memory allocation which may be used 12 * to avoid revealing of sensitive data 12 * to avoid revealing of sensitive data due to paging etc. 13 * The GNU MP Library itself is publishe 13 * The GNU MP Library itself is published under the LGPL; 14 * however I decided to publish this cod 14 * however I decided to publish this code under the plain GPL. 15 */ 15 */ 16 16 17 #include "mpi-internal.h" 17 #include "mpi-internal.h" 18 #include "longlong.h" 18 #include "longlong.h" 19 19 20 #ifndef UMUL_TIME 20 #ifndef UMUL_TIME 21 #define UMUL_TIME 1 21 #define UMUL_TIME 1 22 #endif 22 #endif 23 #ifndef UDIV_TIME 23 #ifndef UDIV_TIME 24 #define UDIV_TIME UMUL_TIME 24 #define UDIV_TIME UMUL_TIME 25 #endif 25 #endif 26 26 27 27 28 mpi_limb_t 28 mpi_limb_t 29 mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size 29 mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, 30 mpi_limb_t divisor_lim 30 mpi_limb_t divisor_limb) 31 { 31 { 32 mpi_size_t i; 32 mpi_size_t i; 33 mpi_limb_t n1, n0, r; 33 mpi_limb_t n1, n0, r; 34 mpi_limb_t dummy __maybe_unused; 34 mpi_limb_t dummy __maybe_unused; 35 35 36 /* Botch: Should this be handled at al 36 /* Botch: Should this be handled at all? Rely on callers? */ 37 if (!dividend_size) 37 if (!dividend_size) 38 return 0; 38 return 0; 39 39 40 /* If multiplication is much faster th 40 /* If multiplication is much faster than division, and the 41 * dividend is large, pre-invert the d 41 * dividend is large, pre-invert the divisor, and use 42 * only multiplications in the inner l 42 * only multiplications in the inner loop. 43 * 43 * 44 * This test should be read: 44 * This test should be read: 45 * Does it ever help to use udiv 45 * Does it ever help to use udiv_qrnnd_preinv? 46 * && Does what we save compen 46 * && Does what we save compensate for the inversion overhead? 47 */ 47 */ 48 if (UDIV_TIME > (2 * UMUL_TIME + 6) 48 if (UDIV_TIME > (2 * UMUL_TIME + 6) 49 && (UDIV_TIME - (2 * U 49 && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) { 50 int normalization_steps; 50 int normalization_steps; 51 51 52 normalization_steps = count_le 52 normalization_steps = count_leading_zeros(divisor_limb); 53 if (normalization_steps) { 53 if (normalization_steps) { 54 mpi_limb_t divisor_lim 54 mpi_limb_t divisor_limb_inverted; 55 55 56 divisor_limb <<= norma 56 divisor_limb <<= normalization_steps; 57 57 58 /* Compute (2**2N - 2* 58 /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The 59 * result is a (N+1)-b 59 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the 60 * most significant bi 60 * most significant bit (with weight 2**N) implicit. 61 * 61 * 62 * Special case for DI 62 * Special case for DIVISOR_LIMB == 100...000. 63 */ 63 */ 64 if (!(divisor_limb << 64 if (!(divisor_limb << 1)) 65 divisor_limb_i 65 divisor_limb_inverted = ~(mpi_limb_t)0; 66 else 66 else 67 udiv_qrnnd(div 67 udiv_qrnnd(divisor_limb_inverted, dummy, 68 68 -divisor_limb, 0, divisor_limb); 69 69 70 n1 = dividend_ptr[divi 70 n1 = dividend_ptr[dividend_size - 1]; 71 r = n1 >> (BITS_PER_MP 71 r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); 72 72 73 /* Possible optimizati 73 /* Possible optimization: 74 * if (r == 0 74 * if (r == 0 75 * && divisor_limb > ( 75 * && divisor_limb > ((n1 << normalization_steps) 76 * 76 * | (dividend_ptr[dividend_size - 2] >> ...))) 77 * ...one division les 77 * ...one division less... 78 */ 78 */ 79 for (i = dividend_size 79 for (i = dividend_size - 2; i >= 0; i--) { 80 n0 = dividend_ 80 n0 = dividend_ptr[i]; 81 UDIV_QRNND_PRE 81 UDIV_QRNND_PREINV(dummy, r, r, 82 82 ((n1 << normalization_steps) 83 83 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), 84 84 divisor_limb, divisor_limb_inverted); 85 n1 = n0; 85 n1 = n0; 86 } 86 } 87 UDIV_QRNND_PREINV(dumm 87 UDIV_QRNND_PREINV(dummy, r, r, 88 n1 << 88 n1 << normalization_steps, 89 diviso 89 divisor_limb, divisor_limb_inverted); 90 return r >> normalizat 90 return r >> normalization_steps; 91 } else { 91 } else { 92 mpi_limb_t divisor_lim 92 mpi_limb_t divisor_limb_inverted; 93 93 94 /* Compute (2**2N - 2* 94 /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The 95 * result is a (N+1)-b 95 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the 96 * most significant bi 96 * most significant bit (with weight 2**N) implicit. 97 * 97 * 98 * Special case for DI 98 * Special case for DIVISOR_LIMB == 100...000. 99 */ 99 */ 100 if (!(divisor_limb << 100 if (!(divisor_limb << 1)) 101 divisor_limb_i 101 divisor_limb_inverted = ~(mpi_limb_t)0; 102 else 102 else 103 udiv_qrnnd(div 103 udiv_qrnnd(divisor_limb_inverted, dummy, 104 104 -divisor_limb, 0, divisor_limb); 105 105 106 i = dividend_size - 1; 106 i = dividend_size - 1; 107 r = dividend_ptr[i]; 107 r = dividend_ptr[i]; 108 108 109 if (r >= divisor_limb) 109 if (r >= divisor_limb) 110 r = 0; 110 r = 0; 111 else 111 else 112 i--; 112 i--; 113 113 114 for ( ; i >= 0; i--) { 114 for ( ; i >= 0; i--) { 115 n0 = dividend_ 115 n0 = dividend_ptr[i]; 116 UDIV_QRNND_PRE 116 UDIV_QRNND_PREINV(dummy, r, r, 117 117 n0, divisor_limb, divisor_limb_inverted); 118 } 118 } 119 return r; 119 return r; 120 } 120 } 121 } else { 121 } else { 122 if (UDIV_NEEDS_NORMALIZATION) 122 if (UDIV_NEEDS_NORMALIZATION) { 123 int normalization_step 123 int normalization_steps; 124 124 125 normalization_steps = 125 normalization_steps = count_leading_zeros(divisor_limb); 126 if (normalization_step 126 if (normalization_steps) { 127 divisor_limb < 127 divisor_limb <<= normalization_steps; 128 128 129 n1 = dividend_ 129 n1 = dividend_ptr[dividend_size - 1]; 130 r = n1 >> (BIT 130 r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); 131 131 132 /* Possible op 132 /* Possible optimization: 133 * if (r == 0 133 * if (r == 0 134 * && divisor_ 134 * && divisor_limb > ((n1 << normalization_steps) 135 * 135 * | (dividend_ptr[dividend_size - 2] >> ...))) 136 * ...one divi 136 * ...one division less... 137 */ 137 */ 138 for (i = divid 138 for (i = dividend_size - 2; i >= 0; i--) { 139 n0 = d 139 n0 = dividend_ptr[i]; 140 udiv_q 140 udiv_qrnnd(dummy, r, r, 141 141 ((n1 << normalization_steps) 142 142 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), 143 143 divisor_limb); 144 n1 = n 144 n1 = n0; 145 } 145 } 146 udiv_qrnnd(dum 146 udiv_qrnnd(dummy, r, r, 147 147 n1 << normalization_steps, 148 148 divisor_limb); 149 return r >> no 149 return r >> normalization_steps; 150 } 150 } 151 } 151 } 152 /* No normalization needed, ei 152 /* No normalization needed, either because udiv_qrnnd doesn't require 153 * it, or because DIVISOR_LIMB 153 * it, or because DIVISOR_LIMB is already normalized. 154 */ 154 */ 155 i = dividend_size - 1; 155 i = dividend_size - 1; 156 r = dividend_ptr[i]; 156 r = dividend_ptr[i]; 157 157 158 if (r >= divisor_limb) 158 if (r >= divisor_limb) 159 r = 0; 159 r = 0; 160 else 160 else 161 i--; 161 i--; 162 162 163 for (; i >= 0; i--) { 163 for (; i >= 0; i--) { 164 n0 = dividend_ptr[i]; 164 n0 = dividend_ptr[i]; 165 udiv_qrnnd(dummy, r, r 165 udiv_qrnnd(dummy, r, r, n0, divisor_limb); 166 } 166 } 167 return r; 167 return r; 168 } 168 } 169 } 169 } 170 170 171 /* Divide num (NP/NSIZE) by den (DP/DSIZE) and 171 /* Divide num (NP/NSIZE) by den (DP/DSIZE) and write 172 * the NSIZE-DSIZE least significant quotient 172 * the NSIZE-DSIZE least significant quotient limbs at QP 173 * and the DSIZE long remainder at NP. If QEX 173 * and the DSIZE long remainder at NP. If QEXTRA_LIMBS is 174 * non-zero, generate that many fraction bits 174 * non-zero, generate that many fraction bits and append them after the 175 * other quotient limbs. 175 * other quotient limbs. 176 * Return the most significant limb of the quo 176 * Return the most significant limb of the quotient, this is always 0 or 1. 177 * 177 * 178 * Preconditions: 178 * Preconditions: 179 * 0. NSIZE >= DSIZE. 179 * 0. NSIZE >= DSIZE. 180 * 1. The most significant bit of the divisor 180 * 1. The most significant bit of the divisor must be set. 181 * 2. QP must either not overlap with the inpu 181 * 2. QP must either not overlap with the input operands at all, or 182 * QP + DSIZE >= NP must hold true. (This 182 * QP + DSIZE >= NP must hold true. (This means that it's 183 * possible to put the quotient in the high 183 * possible to put the quotient in the high part of NUM, right after the 184 * remainder in NUM. 184 * remainder in NUM. 185 * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is 185 * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero. 186 */ 186 */ 187 187 188 mpi_limb_t 188 mpi_limb_t 189 mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra 189 mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs, 190 mpi_ptr_t np, mpi_size_t nsize, 190 mpi_ptr_t np, mpi_size_t nsize, mpi_ptr_t dp, mpi_size_t dsize) 191 { 191 { 192 mpi_limb_t most_significant_q_limb = 0 192 mpi_limb_t most_significant_q_limb = 0; 193 193 194 switch (dsize) { 194 switch (dsize) { 195 case 0: 195 case 0: 196 /* We are asked to divide by z 196 /* We are asked to divide by zero, so go ahead and do it! (To make 197 the compiler not remove thi 197 the compiler not remove this statement, return the value.) */ 198 /* 198 /* 199 * existing clients of this fu 199 * existing clients of this function have been modified 200 * not to call it with dsize = 200 * not to call it with dsize == 0, so this should not happen 201 */ 201 */ 202 return 1 / dsize; 202 return 1 / dsize; 203 203 204 case 1: 204 case 1: 205 { 205 { 206 mpi_size_t i; 206 mpi_size_t i; 207 mpi_limb_t n1; 207 mpi_limb_t n1; 208 mpi_limb_t d; 208 mpi_limb_t d; 209 209 210 d = dp[0]; 210 d = dp[0]; 211 n1 = np[nsize - 1]; 211 n1 = np[nsize - 1]; 212 212 213 if (n1 >= d) { 213 if (n1 >= d) { 214 n1 -= d; 214 n1 -= d; 215 most_significa 215 most_significant_q_limb = 1; 216 } 216 } 217 217 218 qp += qextra_limbs; 218 qp += qextra_limbs; 219 for (i = nsize - 2; i 219 for (i = nsize - 2; i >= 0; i--) 220 udiv_qrnnd(qp[ 220 udiv_qrnnd(qp[i], n1, n1, np[i], d); 221 qp -= qextra_limbs; 221 qp -= qextra_limbs; 222 222 223 for (i = qextra_limbs 223 for (i = qextra_limbs - 1; i >= 0; i--) 224 udiv_qrnnd(qp[ 224 udiv_qrnnd(qp[i], n1, n1, 0, d); 225 225 226 np[0] = n1; 226 np[0] = n1; 227 } 227 } 228 break; 228 break; 229 229 230 case 2: 230 case 2: 231 { 231 { 232 mpi_size_t i; 232 mpi_size_t i; 233 mpi_limb_t n1, n0, n2; 233 mpi_limb_t n1, n0, n2; 234 mpi_limb_t d1, d0; 234 mpi_limb_t d1, d0; 235 235 236 np += nsize - 2; 236 np += nsize - 2; 237 d1 = dp[1]; 237 d1 = dp[1]; 238 d0 = dp[0]; 238 d0 = dp[0]; 239 n1 = np[1]; 239 n1 = np[1]; 240 n0 = np[0]; 240 n0 = np[0]; 241 241 242 if (n1 >= d1 && (n1 > 242 if (n1 >= d1 && (n1 > d1 || n0 >= d0)) { 243 sub_ddmmss(n1, 243 sub_ddmmss(n1, n0, n1, n0, d1, d0); 244 most_significa 244 most_significant_q_limb = 1; 245 } 245 } 246 246 247 for (i = qextra_limbs 247 for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) { 248 mpi_limb_t q; 248 mpi_limb_t q; 249 mpi_limb_t r; 249 mpi_limb_t r; 250 250 251 if (i >= qextr 251 if (i >= qextra_limbs) 252 np--; 252 np--; 253 else 253 else 254 np[0] 254 np[0] = 0; 255 255 256 if (n1 == d1) 256 if (n1 == d1) { 257 /* Q s 257 /* Q should be either 111..111 or 111..110. Need special 258 * tre 258 * treatment of this rare case as normal division would 259 * giv 259 * give overflow. */ 260 q = ~( 260 q = ~(mpi_limb_t) 0; 261 261 262 r = n0 262 r = n0 + d1; 263 if (r 263 if (r < d1) { /* Carry in the addition? */ 264 264 add_ssaaaa(n1, n0, r - d0, 265 265 np[0], 0, d0); 266 266 qp[i] = q; 267 267 continue; 268 } 268 } 269 n1 = d 269 n1 = d0 - (d0 != 0 ? 1 : 0); 270 n0 = - 270 n0 = -d0; 271 } else { 271 } else { 272 udiv_q 272 udiv_qrnnd(q, r, n1, n0, d1); 273 umul_p 273 umul_ppmm(n1, n0, d0, q); 274 } 274 } 275 275 276 n2 = np[0]; 276 n2 = np[0]; 277 q_test: 277 q_test: 278 if (n1 > r || 278 if (n1 > r || (n1 == r && n0 > n2)) { 279 /* The 279 /* The estimated Q was too large. */ 280 q--; 280 q--; 281 sub_dd 281 sub_ddmmss(n1, n0, n1, n0, 0, d0); 282 r += d 282 r += d1; 283 if (r 283 if (r >= d1) /* If not carry, test Q again. */ 284 284 goto q_test; 285 } 285 } 286 286 287 qp[i] = q; 287 qp[i] = q; 288 sub_ddmmss(n1, 288 sub_ddmmss(n1, n0, r, n2, n1, n0); 289 } 289 } 290 np[1] = n1; 290 np[1] = n1; 291 np[0] = n0; 291 np[0] = n0; 292 } 292 } 293 break; 293 break; 294 294 295 default: 295 default: 296 { 296 { 297 mpi_size_t i; 297 mpi_size_t i; 298 mpi_limb_t dX, d1, n0; 298 mpi_limb_t dX, d1, n0; 299 299 300 np += nsize - dsize; 300 np += nsize - dsize; 301 dX = dp[dsize - 1]; 301 dX = dp[dsize - 1]; 302 d1 = dp[dsize - 2]; 302 d1 = dp[dsize - 2]; 303 n0 = np[dsize - 1]; 303 n0 = np[dsize - 1]; 304 304 305 if (n0 >= dX) { 305 if (n0 >= dX) { 306 if (n0 > dX 306 if (n0 > dX 307 || mpihelp 307 || mpihelp_cmp(np, dp, dsize - 1) >= 0) { 308 mpihel 308 mpihelp_sub_n(np, np, dp, dsize); 309 n0 = n 309 n0 = np[dsize - 1]; 310 most_s 310 most_significant_q_limb = 1; 311 } 311 } 312 } 312 } 313 313 314 for (i = qextra_limbs 314 for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) { 315 mpi_limb_t q; 315 mpi_limb_t q; 316 mpi_limb_t n1, 316 mpi_limb_t n1, n2; 317 mpi_limb_t cy_ 317 mpi_limb_t cy_limb; 318 318 319 if (i >= qextr 319 if (i >= qextra_limbs) { 320 np--; 320 np--; 321 n2 = n 321 n2 = np[dsize]; 322 } else { 322 } else { 323 n2 = n 323 n2 = np[dsize - 1]; 324 MPN_CO 324 MPN_COPY_DECR(np + 1, np, dsize - 1); 325 np[0] 325 np[0] = 0; 326 } 326 } 327 327 328 if (n0 == dX) 328 if (n0 == dX) { 329 /* Thi 329 /* This might over-estimate q, but it's probably not worth 330 * the 330 * the extra code here to find out. */ 331 q = ~( 331 q = ~(mpi_limb_t) 0; 332 } else { 332 } else { 333 mpi_li 333 mpi_limb_t r; 334 334 335 udiv_q 335 udiv_qrnnd(q, r, n0, np[dsize - 1], dX); 336 umul_p 336 umul_ppmm(n1, n0, d1, q); 337 337 338 while 338 while (n1 > r 339 339 || (n1 == r 340 340 && n0 > np[dsize - 2])) { 341 341 q--; 342 342 r += dX; 343 343 if (r < dX) /* I.e. "carry in previous addition?" */ 344 344 break; 345 345 n1 -= n0 < d1; 346 346 n0 -= d1; 347 } 347 } 348 } 348 } 349 349 350 /* Possible op 350 /* Possible optimization: We already have (q * n0) and (1 * n1) 351 * after the c 351 * after the calculation of q. Taking advantage of that, we 352 * could make 352 * could make this loop make two iterations less. */ 353 cy_limb = mpih 353 cy_limb = mpihelp_submul_1(np, dp, dsize, q); 354 354 355 if (n2 != cy_l 355 if (n2 != cy_limb) { 356 mpihel 356 mpihelp_add_n(np, np, dp, dsize); 357 q--; 357 q--; 358 } 358 } 359 359 360 qp[i] = q; 360 qp[i] = q; 361 n0 = np[dsize 361 n0 = np[dsize - 1]; 362 } 362 } 363 } 363 } 364 } 364 } 365 365 366 return most_significant_q_limb; 366 return most_significant_q_limb; 367 } 367 } 368 368 369 /**************** 369 /**************** 370 * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIV 370 * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. 371 * Write DIVIDEND_SIZE limbs of quotient at QU 371 * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. 372 * Return the single-limb remainder. 372 * Return the single-limb remainder. 373 * There are no constraints on the value of th 373 * There are no constraints on the value of the divisor. 374 * 374 * 375 * QUOT_PTR and DIVIDEND_PTR might point to th 375 * QUOT_PTR and DIVIDEND_PTR might point to the same limb. 376 */ 376 */ 377 377 378 mpi_limb_t 378 mpi_limb_t 379 mpihelp_divmod_1(mpi_ptr_t quot_ptr, 379 mpihelp_divmod_1(mpi_ptr_t quot_ptr, 380 mpi_ptr_t dividend_ptr, mpi_si 380 mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, 381 mpi_limb_t divisor_limb) 381 mpi_limb_t divisor_limb) 382 { 382 { 383 mpi_size_t i; 383 mpi_size_t i; 384 mpi_limb_t n1, n0, r; 384 mpi_limb_t n1, n0, r; 385 mpi_limb_t dummy __maybe_unused; 385 mpi_limb_t dummy __maybe_unused; 386 386 387 if (!dividend_size) 387 if (!dividend_size) 388 return 0; 388 return 0; 389 389 390 /* If multiplication is much faster th 390 /* If multiplication is much faster than division, and the 391 * dividend is large, pre-invert the d 391 * dividend is large, pre-invert the divisor, and use 392 * only multiplications in the inner l 392 * only multiplications in the inner loop. 393 * 393 * 394 * This test should be read: 394 * This test should be read: 395 * Does it ever help to use udiv_qrnnd 395 * Does it ever help to use udiv_qrnnd_preinv? 396 * && Does what we save compensate for 396 * && Does what we save compensate for the inversion overhead? 397 */ 397 */ 398 if (UDIV_TIME > (2 * UMUL_TIME + 6) 398 if (UDIV_TIME > (2 * UMUL_TIME + 6) 399 && (UDIV_TIME - (2 * U 399 && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) { 400 int normalization_steps; 400 int normalization_steps; 401 401 402 normalization_steps = count_le 402 normalization_steps = count_leading_zeros(divisor_limb); 403 if (normalization_steps) { 403 if (normalization_steps) { 404 mpi_limb_t divisor_lim 404 mpi_limb_t divisor_limb_inverted; 405 405 406 divisor_limb <<= norma 406 divisor_limb <<= normalization_steps; 407 407 408 /* Compute (2**2N - 2* 408 /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The 409 * result is a (N+1)-b 409 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the 410 * most significant bi 410 * most significant bit (with weight 2**N) implicit. 411 */ 411 */ 412 /* Special case for DI 412 /* Special case for DIVISOR_LIMB == 100...000. */ 413 if (!(divisor_limb << 413 if (!(divisor_limb << 1)) 414 divisor_limb_i 414 divisor_limb_inverted = ~(mpi_limb_t)0; 415 else 415 else 416 udiv_qrnnd(div 416 udiv_qrnnd(divisor_limb_inverted, dummy, 417 417 -divisor_limb, 0, divisor_limb); 418 418 419 n1 = dividend_ptr[divi 419 n1 = dividend_ptr[dividend_size - 1]; 420 r = n1 >> (BITS_PER_MP 420 r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); 421 421 422 /* Possible optimizati 422 /* Possible optimization: 423 * if (r == 0 423 * if (r == 0 424 * && divisor_limb > ( 424 * && divisor_limb > ((n1 << normalization_steps) 425 * 425 * | (dividend_ptr[dividend_size - 2] >> ...))) 426 * ...one division les 426 * ...one division less... 427 */ 427 */ 428 for (i = dividend_size 428 for (i = dividend_size - 2; i >= 0; i--) { 429 n0 = dividend_ 429 n0 = dividend_ptr[i]; 430 UDIV_QRNND_PRE 430 UDIV_QRNND_PREINV(quot_ptr[i + 1], r, r, 431 431 ((n1 << normalization_steps) 432 432 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), 433 433 divisor_limb, divisor_limb_inverted); 434 n1 = n0; 434 n1 = n0; 435 } 435 } 436 UDIV_QRNND_PREINV(quot 436 UDIV_QRNND_PREINV(quot_ptr[0], r, r, 437 n1 << 437 n1 << normalization_steps, 438 diviso 438 divisor_limb, divisor_limb_inverted); 439 return r >> normalizat 439 return r >> normalization_steps; 440 } else { 440 } else { 441 mpi_limb_t divisor_lim 441 mpi_limb_t divisor_limb_inverted; 442 442 443 /* Compute (2**2N - 2* 443 /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The 444 * result is a (N+1)-b 444 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the 445 * most significant bi 445 * most significant bit (with weight 2**N) implicit. 446 */ 446 */ 447 /* Special case for DI 447 /* Special case for DIVISOR_LIMB == 100...000. */ 448 if (!(divisor_limb << 448 if (!(divisor_limb << 1)) 449 divisor_limb_i 449 divisor_limb_inverted = ~(mpi_limb_t) 0; 450 else 450 else 451 udiv_qrnnd(div 451 udiv_qrnnd(divisor_limb_inverted, dummy, 452 452 -divisor_limb, 0, divisor_limb); 453 453 454 i = dividend_size - 1; 454 i = dividend_size - 1; 455 r = dividend_ptr[i]; 455 r = dividend_ptr[i]; 456 456 457 if (r >= divisor_limb) 457 if (r >= divisor_limb) 458 r = 0; 458 r = 0; 459 else 459 else 460 quot_ptr[i--] 460 quot_ptr[i--] = 0; 461 461 462 for ( ; i >= 0; i--) { 462 for ( ; i >= 0; i--) { 463 n0 = dividend_ 463 n0 = dividend_ptr[i]; 464 UDIV_QRNND_PRE 464 UDIV_QRNND_PREINV(quot_ptr[i], r, r, 465 465 n0, divisor_limb, divisor_limb_inverted); 466 } 466 } 467 return r; 467 return r; 468 } 468 } 469 } else { 469 } else { 470 if (UDIV_NEEDS_NORMALIZATION) 470 if (UDIV_NEEDS_NORMALIZATION) { 471 int normalization_step 471 int normalization_steps; 472 472 473 normalization_steps = 473 normalization_steps = count_leading_zeros(divisor_limb); 474 if (normalization_step 474 if (normalization_steps) { 475 divisor_limb < 475 divisor_limb <<= normalization_steps; 476 476 477 n1 = dividend_ 477 n1 = dividend_ptr[dividend_size - 1]; 478 r = n1 >> (BIT 478 r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); 479 479 480 /* Possible op 480 /* Possible optimization: 481 * if (r == 0 481 * if (r == 0 482 * && divisor_ 482 * && divisor_limb > ((n1 << normalization_steps) 483 * 483 * | (dividend_ptr[dividend_size - 2] >> ...))) 484 * ...one divi 484 * ...one division less... 485 */ 485 */ 486 for (i = divid 486 for (i = dividend_size - 2; i >= 0; i--) { 487 n0 = d 487 n0 = dividend_ptr[i]; 488 udiv_q 488 udiv_qrnnd(quot_ptr[i + 1], r, r, 489 489 ((n1 << normalization_steps) 490 490 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), 491 491 divisor_limb); 492 n1 = n 492 n1 = n0; 493 } 493 } 494 udiv_qrnnd(quo 494 udiv_qrnnd(quot_ptr[0], r, r, 495 495 n1 << normalization_steps, 496 496 divisor_limb); 497 return r >> no 497 return r >> normalization_steps; 498 } 498 } 499 } 499 } 500 /* No normalization needed, ei 500 /* No normalization needed, either because udiv_qrnnd doesn't require 501 * it, or because DIVISOR_LIMB 501 * it, or because DIVISOR_LIMB is already normalized. 502 */ 502 */ 503 i = dividend_size - 1; 503 i = dividend_size - 1; 504 r = dividend_ptr[i]; 504 r = dividend_ptr[i]; 505 505 506 if (r >= divisor_limb) 506 if (r >= divisor_limb) 507 r = 0; 507 r = 0; 508 else 508 else 509 quot_ptr[i--] = 0; 509 quot_ptr[i--] = 0; 510 510 511 for (; i >= 0; i--) { 511 for (; i >= 0; i--) { 512 n0 = dividend_ptr[i]; 512 n0 = dividend_ptr[i]; 513 udiv_qrnnd(quot_ptr[i] 513 udiv_qrnnd(quot_ptr[i], r, r, n0, divisor_limb); 514 } 514 } 515 return r; 515 return r; 516 } 516 } 517 } 517 } 518 518
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.