1 /* SPDX-License-Identifier: GPL-2.0 */ << 2 /* 1 /* 3 * arch/alpha/lib/divide.S 2 * arch/alpha/lib/divide.S 4 * 3 * 5 * (C) 1995 Linus Torvalds 4 * (C) 1995 Linus Torvalds 6 * 5 * 7 * Alpha division.. 6 * Alpha division.. 8 */ 7 */ 9 8 10 /* 9 /* 11 * The alpha chip doesn't provide hardware div 10 * The alpha chip doesn't provide hardware division, so we have to do it 12 * by hand. The compiler expects the function 11 * by hand. The compiler expects the functions 13 * 12 * 14 * __divqu: 64-bit unsigned long divide 13 * __divqu: 64-bit unsigned long divide 15 * __remqu: 64-bit unsigned long remainde 14 * __remqu: 64-bit unsigned long remainder 16 * __divqs/__remqs: signed 64-bit 15 * __divqs/__remqs: signed 64-bit 17 * __divlu/__remlu: unsigned 32-bit 16 * __divlu/__remlu: unsigned 32-bit 18 * __divls/__remls: signed 32-bit 17 * __divls/__remls: signed 32-bit 19 * 18 * 20 * These are not normal C functions: instead o 19 * These are not normal C functions: instead of the normal 21 * calling sequence, these expect their argume 20 * calling sequence, these expect their arguments in registers 22 * $24 and $25, and return the result in $27. 21 * $24 and $25, and return the result in $27. Register $28 may 23 * be clobbered (assembly temporary), anything 22 * be clobbered (assembly temporary), anything else must be saved. 24 * 23 * 25 * In short: painful. 24 * In short: painful. 26 * 25 * 27 * This is a rather simple bit-at-a-time algor 26 * This is a rather simple bit-at-a-time algorithm: it's very good 28 * at dividing random 64-bit numbers, but the 27 * at dividing random 64-bit numbers, but the more usual case where 29 * the divisor is small is handled better by t 28 * the divisor is small is handled better by the DEC algorithm 30 * using lookup tables. This uses much less me 29 * using lookup tables. This uses much less memory, though, and is 31 * nicer on the cache.. Besides, I don't know 30 * nicer on the cache.. Besides, I don't know the copyright status 32 * of the DEC code. 31 * of the DEC code. 33 */ 32 */ 34 33 35 /* 34 /* 36 * My temporaries: 35 * My temporaries: 37 * $0 - current bit 36 * $0 - current bit 38 * $1 - shifted divisor 37 * $1 - shifted divisor 39 * $2 - modulus/quotient 38 * $2 - modulus/quotient 40 * 39 * 41 * $23 - return address 40 * $23 - return address 42 * $24 - dividend 41 * $24 - dividend 43 * $25 - divisor 42 * $25 - divisor 44 * 43 * 45 * $27 - quotient/modulus 44 * $27 - quotient/modulus 46 * $28 - compare status 45 * $28 - compare status 47 */ 46 */ 48 47 49 #include <linux/export.h> << 50 #define halt .long 0 48 #define halt .long 0 51 49 52 /* 50 /* 53 * Select function type and registers 51 * Select function type and registers 54 */ 52 */ 55 #define mask $0 53 #define mask $0 56 #define divisor $1 54 #define divisor $1 57 #define compare $28 55 #define compare $28 58 #define tmp1 $3 56 #define tmp1 $3 59 #define tmp2 $4 57 #define tmp2 $4 60 58 61 #ifdef DIV 59 #ifdef DIV 62 #define DIV_ONLY(x,y...) x,##y 60 #define DIV_ONLY(x,y...) x,##y 63 #define MOD_ONLY(x,y...) 61 #define MOD_ONLY(x,y...) 64 #define func(x) __div##x 62 #define func(x) __div##x 65 #define modulus $2 63 #define modulus $2 66 #define quotient $27 64 #define quotient $27 67 #define GETSIGN(x) xor $24,$25,x 65 #define GETSIGN(x) xor $24,$25,x 68 #define STACK 48 66 #define STACK 48 69 #else 67 #else 70 #define DIV_ONLY(x,y...) 68 #define DIV_ONLY(x,y...) 71 #define MOD_ONLY(x,y...) x,##y 69 #define MOD_ONLY(x,y...) x,##y 72 #define func(x) __rem##x 70 #define func(x) __rem##x 73 #define modulus $27 71 #define modulus $27 74 #define quotient $2 72 #define quotient $2 75 #define GETSIGN(x) bis $24,$24,x 73 #define GETSIGN(x) bis $24,$24,x 76 #define STACK 32 74 #define STACK 32 77 #endif 75 #endif 78 76 79 /* 77 /* 80 * For 32-bit operations, we need to extend to 78 * For 32-bit operations, we need to extend to 64-bit 81 */ 79 */ 82 #ifdef INTSIZE 80 #ifdef INTSIZE 83 #define ufunction func(lu) 81 #define ufunction func(lu) 84 #define sfunction func(l) 82 #define sfunction func(l) 85 #define LONGIFY(x) zapnot x,15,x 83 #define LONGIFY(x) zapnot x,15,x 86 #define SLONGIFY(x) addl x,0,x 84 #define SLONGIFY(x) addl x,0,x 87 #else 85 #else 88 #define ufunction func(qu) 86 #define ufunction func(qu) 89 #define sfunction func(q) 87 #define sfunction func(q) 90 #define LONGIFY(x) 88 #define LONGIFY(x) 91 #define SLONGIFY(x) 89 #define SLONGIFY(x) 92 #endif 90 #endif 93 91 94 .set noat 92 .set noat 95 .align 3 93 .align 3 96 .globl ufunction 94 .globl ufunction 97 .ent ufunction 95 .ent ufunction 98 ufunction: 96 ufunction: 99 subq $30,STACK,$30 97 subq $30,STACK,$30 100 .frame $30,STACK,$23 98 .frame $30,STACK,$23 101 .prologue 0 99 .prologue 0 102 100 103 7: stq $1, 0($30) 101 7: stq $1, 0($30) 104 bis $25,$25,divisor 102 bis $25,$25,divisor 105 stq $2, 8($30) 103 stq $2, 8($30) 106 bis $24,$24,modulus 104 bis $24,$24,modulus 107 stq $0,16($30) 105 stq $0,16($30) 108 bis $31,$31,quotient 106 bis $31,$31,quotient 109 LONGIFY(divisor) 107 LONGIFY(divisor) 110 stq tmp1,24($30) 108 stq tmp1,24($30) 111 LONGIFY(modulus) 109 LONGIFY(modulus) 112 bis $31,1,mask 110 bis $31,1,mask 113 DIV_ONLY(stq tmp2,32($30)) 111 DIV_ONLY(stq tmp2,32($30)) 114 beq divisor, 9f 112 beq divisor, 9f /* div by zero */ 115 113 116 #ifdef INTSIZE 114 #ifdef INTSIZE 117 /* 115 /* 118 * shift divisor left, using 3-bit shi 116 * shift divisor left, using 3-bit shifts for 119 * 32-bit divides as we can't overflow 117 * 32-bit divides as we can't overflow. Three-bit 120 * shifts will result in looping three 118 * shifts will result in looping three times less 121 * here, but can result in two loops m 119 * here, but can result in two loops more later. 122 * Thus using a large shift isn't wort 120 * Thus using a large shift isn't worth it (and 123 * s8add pairs better than a sll..) 121 * s8add pairs better than a sll..) 124 */ 122 */ 125 1: cmpult divisor,modulus,compare 123 1: cmpult divisor,modulus,compare 126 s8addq divisor,$31,divisor 124 s8addq divisor,$31,divisor 127 s8addq mask,$31,mask 125 s8addq mask,$31,mask 128 bne compare,1b 126 bne compare,1b 129 #else 127 #else 130 1: cmpult divisor,modulus,compare 128 1: cmpult divisor,modulus,compare 131 blt divisor, 2f 129 blt divisor, 2f 132 addq divisor,divisor,divisor 130 addq divisor,divisor,divisor 133 addq mask,mask,mask 131 addq mask,mask,mask 134 bne compare,1b 132 bne compare,1b 135 unop 133 unop 136 #endif 134 #endif 137 135 138 /* ok, start to go right again.. */ 136 /* ok, start to go right again.. */ 139 2: DIV_ONLY(addq quotient,mask,tmp2) 137 2: DIV_ONLY(addq quotient,mask,tmp2) 140 srl mask,1,mask 138 srl mask,1,mask 141 cmpule divisor,modulus,compare 139 cmpule divisor,modulus,compare 142 subq modulus,divisor,tmp1 140 subq modulus,divisor,tmp1 143 DIV_ONLY(cmovne compare,tmp2,quotient) 141 DIV_ONLY(cmovne compare,tmp2,quotient) 144 srl divisor,1,divisor 142 srl divisor,1,divisor 145 cmovne compare,tmp1,modulus 143 cmovne compare,tmp1,modulus 146 bne mask,2b 144 bne mask,2b 147 145 148 9: ldq $1, 0($30) 146 9: ldq $1, 0($30) 149 ldq $2, 8($30) 147 ldq $2, 8($30) 150 ldq $0,16($30) 148 ldq $0,16($30) 151 ldq tmp1,24($30) 149 ldq tmp1,24($30) 152 DIV_ONLY(ldq tmp2,32($30)) 150 DIV_ONLY(ldq tmp2,32($30)) 153 addq $30,STACK,$30 151 addq $30,STACK,$30 154 ret $31,($23),1 152 ret $31,($23),1 155 .end ufunction 153 .end ufunction 156 EXPORT_SYMBOL(ufunction) << 157 154 158 /* 155 /* 159 * Uhh.. Ugly signed division. I'd rather not 156 * Uhh.. Ugly signed division. I'd rather not have it at all, but 160 * it's needed in some circumstances. There ar 157 * it's needed in some circumstances. There are different ways to 161 * handle this, really. This does: 158 * handle this, really. This does: 162 * -a / b = a / -b = -(a / b) 159 * -a / b = a / -b = -(a / b) 163 * -a % b = -(a % b) 160 * -a % b = -(a % b) 164 * a % -b = a % b 161 * a % -b = a % b 165 * which is probably not the best solution, bu 162 * which is probably not the best solution, but at least should 166 * have the property that (x/y)*y + (x%y) = x. 163 * have the property that (x/y)*y + (x%y) = x. 167 */ 164 */ 168 .align 3 165 .align 3 169 .globl sfunction 166 .globl sfunction 170 .ent sfunction 167 .ent sfunction 171 sfunction: 168 sfunction: 172 subq $30,STACK,$30 169 subq $30,STACK,$30 173 .frame $30,STACK,$23 170 .frame $30,STACK,$23 174 .prologue 0 171 .prologue 0 175 bis $24,$25,$28 172 bis $24,$25,$28 176 SLONGIFY($28) 173 SLONGIFY($28) 177 bge $28,7b 174 bge $28,7b 178 stq $24,0($30) 175 stq $24,0($30) 179 subq $31,$24,$28 176 subq $31,$24,$28 180 stq $25,8($30) 177 stq $25,8($30) 181 cmovlt $24,$28,$24 /* abs($24) */ 178 cmovlt $24,$28,$24 /* abs($24) */ 182 stq $23,16($30) 179 stq $23,16($30) 183 subq $31,$25,$28 180 subq $31,$25,$28 184 stq tmp1,24($30) 181 stq tmp1,24($30) 185 cmovlt $25,$28,$25 /* abs($25) */ 182 cmovlt $25,$28,$25 /* abs($25) */ 186 unop 183 unop 187 bsr $23,ufunction 184 bsr $23,ufunction 188 ldq $24,0($30) 185 ldq $24,0($30) 189 ldq $25,8($30) 186 ldq $25,8($30) 190 GETSIGN($28) 187 GETSIGN($28) 191 subq $31,$27,tmp1 188 subq $31,$27,tmp1 192 SLONGIFY($28) 189 SLONGIFY($28) 193 ldq $23,16($30) 190 ldq $23,16($30) 194 cmovlt $28,tmp1,$27 191 cmovlt $28,tmp1,$27 195 ldq tmp1,24($30) 192 ldq tmp1,24($30) 196 addq $30,STACK,$30 193 addq $30,STACK,$30 197 ret $31,($23),1 194 ret $31,($23),1 198 .end sfunction 195 .end sfunction 199 EXPORT_SYMBOL(sfunction) <<
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.