~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/include/linux/reciprocal_div.h

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0 */
  2 #ifndef _LINUX_RECIPROCAL_DIV_H
  3 #define _LINUX_RECIPROCAL_DIV_H
  4 
  5 #include <linux/types.h>
  6 
  7 /*
  8  * This algorithm is based on the paper "Division by Invariant
  9  * Integers Using Multiplication" by Torbjörn Granlund and Peter
 10  * L. Montgomery.
 11  *
 12  * The assembler implementation from Agner Fog, which this code is
 13  * based on, can be found here:
 14  * http://www.agner.org/optimize/asmlib.zip
 15  *
 16  * This optimization for A/B is helpful if the divisor B is mostly
 17  * runtime invariant. The reciprocal of B is calculated in the
 18  * slow-path with reciprocal_value(). The fast-path can then just use
 19  * a much faster multiplication operation with a variable dividend A
 20  * to calculate the division A/B.
 21  */
 22 
 23 struct reciprocal_value {
 24         u32 m;
 25         u8 sh1, sh2;
 26 };
 27 
 28 /* "reciprocal_value" and "reciprocal_divide" together implement the basic
 29  * version of the algorithm described in Figure 4.1 of the paper.
 30  */
 31 struct reciprocal_value reciprocal_value(u32 d);
 32 
 33 static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
 34 {
 35         u32 t = (u32)(((u64)a * R.m) >> 32);
 36         return (t + ((a - t) >> R.sh1)) >> R.sh2;
 37 }
 38 
 39 struct reciprocal_value_adv {
 40         u32 m;
 41         u8 sh, exp;
 42         bool is_wide_m;
 43 };
 44 
 45 /* "reciprocal_value_adv" implements the advanced version of the algorithm
 46  * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose
 47  * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The
 48  * exception case could be easily handled before calling "reciprocal_value_adv".
 49  *
 50  * The advanced version requires more complex calculation to get the reciprocal
 51  * multiplier and other control variables, but then could reduce the required
 52  * emulation operations.
 53  *
 54  * It makes no sense to use this advanced version for host divide emulation,
 55  * those extra complexities for calculating multiplier etc could completely
 56  * waive our saving on emulation operations.
 57  *
 58  * However, it makes sense to use it for JIT divide code generation for which
 59  * we are willing to trade performance of JITed code with that of host. As shown
 60  * by the following pseudo code, the required emulation operations could go down
 61  * from 6 (the basic version) to 3 or 4.
 62  *
 63  * To use the result of "reciprocal_value_adv", suppose we want to calculate
 64  * n/d, the pseudo C code will be:
 65  *
 66  *   struct reciprocal_value_adv rvalue;
 67  *   u8 pre_shift, exp;
 68  *
 69  *   // handle exception case.
 70  *   if (d >= (1U << 31)) {
 71  *     result = n >= d;
 72  *     return;
 73  *   }
 74  *
 75  *   rvalue = reciprocal_value_adv(d, 32)
 76  *   exp = rvalue.exp;
 77  *   if (rvalue.is_wide_m && !(d & 1)) {
 78  *     // floor(log2(d & (2^32 -d)))
 79  *     pre_shift = fls(d & -d) - 1;
 80  *     rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift);
 81  *   } else {
 82  *     pre_shift = 0;
 83  *   }
 84  *
 85  *   // code generation starts.
 86  *   if (imm == 1U << exp) {
 87  *     result = n >> exp;
 88  *   } else if (rvalue.is_wide_m) {
 89  *     // pre_shift must be zero when reached here.
 90  *     t = (n * rvalue.m) >> 32;
 91  *     result = n - t;
 92  *     result >>= 1;
 93  *     result += t;
 94  *     result >>= rvalue.sh - 1;
 95  *   } else {
 96  *     if (pre_shift)
 97  *       result = n >> pre_shift;
 98  *     result = ((u64)result * rvalue.m) >> 32;
 99  *     result >>= rvalue.sh;
100  *   }
101  */
102 struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec);
103 
104 #endif /* _LINUX_RECIPROCAL_DIV_H */
105 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php