1 /* SPDX-License-Identifier: GPL-2.0 */ 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 2 /* 3 * arch/alpha/lib/divide.S 3 * arch/alpha/lib/divide.S 4 * 4 * 5 * (C) 1995 Linus Torvalds 5 * (C) 1995 Linus Torvalds 6 * 6 * 7 * Alpha division.. 7 * Alpha division.. 8 */ 8 */ 9 9 10 /* 10 /* 11 * The alpha chip doesn't provide hardware div 11 * The alpha chip doesn't provide hardware division, so we have to do it 12 * by hand. The compiler expects the function 12 * by hand. The compiler expects the functions 13 * 13 * 14 * __divqu: 64-bit unsigned long divide 14 * __divqu: 64-bit unsigned long divide 15 * __remqu: 64-bit unsigned long remainde 15 * __remqu: 64-bit unsigned long remainder 16 * __divqs/__remqs: signed 64-bit 16 * __divqs/__remqs: signed 64-bit 17 * __divlu/__remlu: unsigned 32-bit 17 * __divlu/__remlu: unsigned 32-bit 18 * __divls/__remls: signed 32-bit 18 * __divls/__remls: signed 32-bit 19 * 19 * 20 * These are not normal C functions: instead o 20 * These are not normal C functions: instead of the normal 21 * calling sequence, these expect their argume 21 * calling sequence, these expect their arguments in registers 22 * $24 and $25, and return the result in $27. 22 * $24 and $25, and return the result in $27. Register $28 may 23 * be clobbered (assembly temporary), anything 23 * be clobbered (assembly temporary), anything else must be saved. 24 * 24 * 25 * In short: painful. 25 * In short: painful. 26 * 26 * 27 * This is a rather simple bit-at-a-time algor 27 * This is a rather simple bit-at-a-time algorithm: it's very good 28 * at dividing random 64-bit numbers, but the 28 * at dividing random 64-bit numbers, but the more usual case where 29 * the divisor is small is handled better by t 29 * the divisor is small is handled better by the DEC algorithm 30 * using lookup tables. This uses much less me 30 * using lookup tables. This uses much less memory, though, and is 31 * nicer on the cache.. Besides, I don't know 31 * nicer on the cache.. Besides, I don't know the copyright status 32 * of the DEC code. 32 * of the DEC code. 33 */ 33 */ 34 34 35 /* 35 /* 36 * My temporaries: 36 * My temporaries: 37 * $0 - current bit 37 * $0 - current bit 38 * $1 - shifted divisor 38 * $1 - shifted divisor 39 * $2 - modulus/quotient 39 * $2 - modulus/quotient 40 * 40 * 41 * $23 - return address 41 * $23 - return address 42 * $24 - dividend 42 * $24 - dividend 43 * $25 - divisor 43 * $25 - divisor 44 * 44 * 45 * $27 - quotient/modulus 45 * $27 - quotient/modulus 46 * $28 - compare status 46 * $28 - compare status 47 */ 47 */ 48 48 49 #include <linux/export.h> !! 49 #include <asm/export.h> 50 #define halt .long 0 50 #define halt .long 0 51 51 52 /* 52 /* 53 * Select function type and registers 53 * Select function type and registers 54 */ 54 */ 55 #define mask $0 55 #define mask $0 56 #define divisor $1 56 #define divisor $1 57 #define compare $28 57 #define compare $28 58 #define tmp1 $3 58 #define tmp1 $3 59 #define tmp2 $4 59 #define tmp2 $4 60 60 61 #ifdef DIV 61 #ifdef DIV 62 #define DIV_ONLY(x,y...) x,##y 62 #define DIV_ONLY(x,y...) x,##y 63 #define MOD_ONLY(x,y...) 63 #define MOD_ONLY(x,y...) 64 #define func(x) __div##x 64 #define func(x) __div##x 65 #define modulus $2 65 #define modulus $2 66 #define quotient $27 66 #define quotient $27 67 #define GETSIGN(x) xor $24,$25,x 67 #define GETSIGN(x) xor $24,$25,x 68 #define STACK 48 68 #define STACK 48 69 #else 69 #else 70 #define DIV_ONLY(x,y...) 70 #define DIV_ONLY(x,y...) 71 #define MOD_ONLY(x,y...) x,##y 71 #define MOD_ONLY(x,y...) x,##y 72 #define func(x) __rem##x 72 #define func(x) __rem##x 73 #define modulus $27 73 #define modulus $27 74 #define quotient $2 74 #define quotient $2 75 #define GETSIGN(x) bis $24,$24,x 75 #define GETSIGN(x) bis $24,$24,x 76 #define STACK 32 76 #define STACK 32 77 #endif 77 #endif 78 78 79 /* 79 /* 80 * For 32-bit operations, we need to extend to 80 * For 32-bit operations, we need to extend to 64-bit 81 */ 81 */ 82 #ifdef INTSIZE 82 #ifdef INTSIZE 83 #define ufunction func(lu) 83 #define ufunction func(lu) 84 #define sfunction func(l) 84 #define sfunction func(l) 85 #define LONGIFY(x) zapnot x,15,x 85 #define LONGIFY(x) zapnot x,15,x 86 #define SLONGIFY(x) addl x,0,x 86 #define SLONGIFY(x) addl x,0,x 87 #else 87 #else 88 #define ufunction func(qu) 88 #define ufunction func(qu) 89 #define sfunction func(q) 89 #define sfunction func(q) 90 #define LONGIFY(x) 90 #define LONGIFY(x) 91 #define SLONGIFY(x) 91 #define SLONGIFY(x) 92 #endif 92 #endif 93 93 94 .set noat 94 .set noat 95 .align 3 95 .align 3 96 .globl ufunction 96 .globl ufunction 97 .ent ufunction 97 .ent ufunction 98 ufunction: 98 ufunction: 99 subq $30,STACK,$30 99 subq $30,STACK,$30 100 .frame $30,STACK,$23 100 .frame $30,STACK,$23 101 .prologue 0 101 .prologue 0 102 102 103 7: stq $1, 0($30) 103 7: stq $1, 0($30) 104 bis $25,$25,divisor 104 bis $25,$25,divisor 105 stq $2, 8($30) 105 stq $2, 8($30) 106 bis $24,$24,modulus 106 bis $24,$24,modulus 107 stq $0,16($30) 107 stq $0,16($30) 108 bis $31,$31,quotient 108 bis $31,$31,quotient 109 LONGIFY(divisor) 109 LONGIFY(divisor) 110 stq tmp1,24($30) 110 stq tmp1,24($30) 111 LONGIFY(modulus) 111 LONGIFY(modulus) 112 bis $31,1,mask 112 bis $31,1,mask 113 DIV_ONLY(stq tmp2,32($30)) 113 DIV_ONLY(stq tmp2,32($30)) 114 beq divisor, 9f 114 beq divisor, 9f /* div by zero */ 115 115 116 #ifdef INTSIZE 116 #ifdef INTSIZE 117 /* 117 /* 118 * shift divisor left, using 3-bit shi 118 * shift divisor left, using 3-bit shifts for 119 * 32-bit divides as we can't overflow 119 * 32-bit divides as we can't overflow. Three-bit 120 * shifts will result in looping three 120 * shifts will result in looping three times less 121 * here, but can result in two loops m 121 * here, but can result in two loops more later. 122 * Thus using a large shift isn't wort 122 * Thus using a large shift isn't worth it (and 123 * s8add pairs better than a sll..) 123 * s8add pairs better than a sll..) 124 */ 124 */ 125 1: cmpult divisor,modulus,compare 125 1: cmpult divisor,modulus,compare 126 s8addq divisor,$31,divisor 126 s8addq divisor,$31,divisor 127 s8addq mask,$31,mask 127 s8addq mask,$31,mask 128 bne compare,1b 128 bne compare,1b 129 #else 129 #else 130 1: cmpult divisor,modulus,compare 130 1: cmpult divisor,modulus,compare 131 blt divisor, 2f 131 blt divisor, 2f 132 addq divisor,divisor,divisor 132 addq divisor,divisor,divisor 133 addq mask,mask,mask 133 addq mask,mask,mask 134 bne compare,1b 134 bne compare,1b 135 unop 135 unop 136 #endif 136 #endif 137 137 138 /* ok, start to go right again.. */ 138 /* ok, start to go right again.. */ 139 2: DIV_ONLY(addq quotient,mask,tmp2) 139 2: DIV_ONLY(addq quotient,mask,tmp2) 140 srl mask,1,mask 140 srl mask,1,mask 141 cmpule divisor,modulus,compare 141 cmpule divisor,modulus,compare 142 subq modulus,divisor,tmp1 142 subq modulus,divisor,tmp1 143 DIV_ONLY(cmovne compare,tmp2,quotient) 143 DIV_ONLY(cmovne compare,tmp2,quotient) 144 srl divisor,1,divisor 144 srl divisor,1,divisor 145 cmovne compare,tmp1,modulus 145 cmovne compare,tmp1,modulus 146 bne mask,2b 146 bne mask,2b 147 147 148 9: ldq $1, 0($30) 148 9: ldq $1, 0($30) 149 ldq $2, 8($30) 149 ldq $2, 8($30) 150 ldq $0,16($30) 150 ldq $0,16($30) 151 ldq tmp1,24($30) 151 ldq tmp1,24($30) 152 DIV_ONLY(ldq tmp2,32($30)) 152 DIV_ONLY(ldq tmp2,32($30)) 153 addq $30,STACK,$30 153 addq $30,STACK,$30 154 ret $31,($23),1 154 ret $31,($23),1 155 .end ufunction 155 .end ufunction 156 EXPORT_SYMBOL(ufunction) 156 EXPORT_SYMBOL(ufunction) 157 157 158 /* 158 /* 159 * Uhh.. Ugly signed division. I'd rather not 159 * Uhh.. Ugly signed division. I'd rather not have it at all, but 160 * it's needed in some circumstances. There ar 160 * it's needed in some circumstances. There are different ways to 161 * handle this, really. This does: 161 * handle this, really. This does: 162 * -a / b = a / -b = -(a / b) 162 * -a / b = a / -b = -(a / b) 163 * -a % b = -(a % b) 163 * -a % b = -(a % b) 164 * a % -b = a % b 164 * a % -b = a % b 165 * which is probably not the best solution, bu 165 * which is probably not the best solution, but at least should 166 * have the property that (x/y)*y + (x%y) = x. 166 * have the property that (x/y)*y + (x%y) = x. 167 */ 167 */ 168 .align 3 168 .align 3 169 .globl sfunction 169 .globl sfunction 170 .ent sfunction 170 .ent sfunction 171 sfunction: 171 sfunction: 172 subq $30,STACK,$30 172 subq $30,STACK,$30 173 .frame $30,STACK,$23 173 .frame $30,STACK,$23 174 .prologue 0 174 .prologue 0 175 bis $24,$25,$28 175 bis $24,$25,$28 176 SLONGIFY($28) 176 SLONGIFY($28) 177 bge $28,7b 177 bge $28,7b 178 stq $24,0($30) 178 stq $24,0($30) 179 subq $31,$24,$28 179 subq $31,$24,$28 180 stq $25,8($30) 180 stq $25,8($30) 181 cmovlt $24,$28,$24 /* abs($24) */ 181 cmovlt $24,$28,$24 /* abs($24) */ 182 stq $23,16($30) 182 stq $23,16($30) 183 subq $31,$25,$28 183 subq $31,$25,$28 184 stq tmp1,24($30) 184 stq tmp1,24($30) 185 cmovlt $25,$28,$25 /* abs($25) */ 185 cmovlt $25,$28,$25 /* abs($25) */ 186 unop 186 unop 187 bsr $23,ufunction 187 bsr $23,ufunction 188 ldq $24,0($30) 188 ldq $24,0($30) 189 ldq $25,8($30) 189 ldq $25,8($30) 190 GETSIGN($28) 190 GETSIGN($28) 191 subq $31,$27,tmp1 191 subq $31,$27,tmp1 192 SLONGIFY($28) 192 SLONGIFY($28) 193 ldq $23,16($30) 193 ldq $23,16($30) 194 cmovlt $28,tmp1,$27 194 cmovlt $28,tmp1,$27 195 ldq tmp1,24($30) 195 ldq tmp1,24($30) 196 addq $30,STACK,$30 196 addq $30,STACK,$30 197 ret $31,($23),1 197 ret $31,($23),1 198 .end sfunction 198 .end sfunction 199 EXPORT_SYMBOL(sfunction) 199 EXPORT_SYMBOL(sfunction)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.