1 /* 2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines 3 * 4 * Author: Nicolas Pitre <nico@fluxnic.net> 5 * - contributed to gcc-3.4 on Sep 30, 2003 6 * - adapted for the Linux kernel on Oct 2, 2003 7 */ 8 9 /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. 10 11 This file is free software; you can redistribute it and/or modify it 12 under the terms of the GNU General Public License as published by the 13 Free Software Foundation; either version 2, or (at your option) any 14 later version. 15 16 In addition to the permissions in the GNU General Public License, the 17 Free Software Foundation gives you unlimited permission to link the 18 compiled version of this file into combinations with other programs, 19 and to distribute those combinations without any restriction coming 20 from the use of this file. (The General Public License restrictions 21 do apply in other respects; for example, they cover modification of 22 the file, and distribution when not linked into a combine 23 executable.) 24 25 This file is distributed in the hope that it will be useful, but 26 WITHOUT ANY WARRANTY; without even the implied warranty of 27 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 28 General Public License for more details. 29 30 You should have received a copy of the GNU General Public License 31 along with this program; see the file COPYING. If not, write to 32 the Free Software Foundation, 59 Temple Place - Suite 330, 33 Boston, MA 02111-1307, USA. */ 34 35 36 #include <linux/linkage.h> 37 #include <asm/assembler.h> 38 #include <asm/unwind.h> 39 40 .macro ARM_DIV_BODY dividend, divisor, result, curbit 41 42 #if __LINUX_ARM_ARCH__ >= 5 43 44 clz \curbit, \divisor 45 clz \result, \dividend 46 sub \result, \curbit, \result 47 mov \curbit, #1 48 mov \divisor, \divisor, lsl \result 49 mov \curbit, \curbit, lsl \result 50 mov \result, #0 51 52 #else 53 54 @ Initially shift the divisor left 3 bits if possible, 55 @ set curbit accordingly. This allows for curbit to be located 56 @ at the left end of each 4 bit nibbles in the division loop 57 @ to save one loop in most cases. 58 tst \divisor, #0xe0000000 59 moveq \divisor, \divisor, lsl #3 60 moveq \curbit, #8 61 movne \curbit, #1 62 63 @ Unless the divisor is very big, shift it up in multiples of 64 @ four bits, since this is the amount of unwinding in the main 65 @ division loop. Continue shifting until the divisor is 66 @ larger than the dividend. 67 1: cmp \divisor, #0x10000000 68 cmplo \divisor, \dividend 69 movlo \divisor, \divisor, lsl #4 70 movlo \curbit, \curbit, lsl #4 71 blo 1b 72 73 @ For very big divisors, we must shift it a bit at a time, or 74 @ we will be in danger of overflowing. 75 1: cmp \divisor, #0x80000000 76 cmplo \divisor, \dividend 77 movlo \divisor, \divisor, lsl #1 78 movlo \curbit, \curbit, lsl #1 79 blo 1b 80 81 mov \result, #0 82 83 #endif 84 85 @ Division loop 86 1: cmp \dividend, \divisor 87 subhs \dividend, \dividend, \divisor 88 orrhs \result, \result, \curbit 89 cmp \dividend, \divisor, lsr #1 90 subhs \dividend, \dividend, \divisor, lsr #1 91 orrhs \result, \result, \curbit, lsr #1 92 cmp \dividend, \divisor, lsr #2 93 subhs \dividend, \dividend, \divisor, lsr #2 94 orrhs \result, \result, \curbit, lsr #2 95 cmp \dividend, \divisor, lsr #3 96 subhs \dividend, \dividend, \divisor, lsr #3 97 orrhs \result, \result, \curbit, lsr #3 98 cmp \dividend, #0 @ Early termination? 99 movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? 100 movne \divisor, \divisor, lsr #4 101 bne 1b 102 103 .endm 104 105 106 .macro ARM_DIV2_ORDER divisor, order 107 108 #if __LINUX_ARM_ARCH__ >= 5 109 110 clz \order, \divisor 111 rsb \order, \order, #31 112 113 #else 114 115 cmp \divisor, #(1 << 16) 116 movhs \divisor, \divisor, lsr #16 117 movhs \order, #16 118 movlo \order, #0 119 120 cmp \divisor, #(1 << 8) 121 movhs \divisor, \divisor, lsr #8 122 addhs \order, \order, #8 123 124 cmp \divisor, #(1 << 4) 125 movhs \divisor, \divisor, lsr #4 126 addhs \order, \order, #4 127 128 cmp \divisor, #(1 << 2) 129 addhi \order, \order, #3 130 addls \order, \order, \divisor, lsr #1 131 132 #endif 133 134 .endm 135 136 137 .macro ARM_MOD_BODY dividend, divisor, order, spare 138 139 #if __LINUX_ARM_ARCH__ >= 5 140 141 clz \order, \divisor 142 clz \spare, \dividend 143 sub \order, \order, \spare 144 mov \divisor, \divisor, lsl \order 145 146 #else 147 148 mov \order, #0 149 150 @ Unless the divisor is very big, shift it up in multiples of 151 @ four bits, since this is the amount of unwinding in the main 152 @ division loop. Continue shifting until the divisor is 153 @ larger than the dividend. 154 1: cmp \divisor, #0x10000000 155 cmplo \divisor, \dividend 156 movlo \divisor, \divisor, lsl #4 157 addlo \order, \order, #4 158 blo 1b 159 160 @ For very big divisors, we must shift it a bit at a time, or 161 @ we will be in danger of overflowing. 162 1: cmp \divisor, #0x80000000 163 cmplo \divisor, \dividend 164 movlo \divisor, \divisor, lsl #1 165 addlo \order, \order, #1 166 blo 1b 167 168 #endif 169 170 @ Perform all needed subtractions to keep only the reminder. 171 @ Do comparisons in batch of 4 first. 172 subs \order, \order, #3 @ yes, 3 is intended here 173 blt 2f 174 175 1: cmp \dividend, \divisor 176 subhs \dividend, \dividend, \divisor 177 cmp \dividend, \divisor, lsr #1 178 subhs \dividend, \dividend, \divisor, lsr #1 179 cmp \dividend, \divisor, lsr #2 180 subhs \dividend, \dividend, \divisor, lsr #2 181 cmp \dividend, \divisor, lsr #3 182 subhs \dividend, \dividend, \divisor, lsr #3 183 cmp \dividend, #1 184 mov \divisor, \divisor, lsr #4 185 subsge \order, \order, #4 186 bge 1b 187 188 tst \order, #3 189 teqne \dividend, #0 190 beq 5f 191 192 @ Either 1, 2 or 3 comparison/subtractions are left. 193 2: cmn \order, #2 194 blt 4f 195 beq 3f 196 cmp \dividend, \divisor 197 subhs \dividend, \dividend, \divisor 198 mov \divisor, \divisor, lsr #1 199 3: cmp \dividend, \divisor 200 subhs \dividend, \dividend, \divisor 201 mov \divisor, \divisor, lsr #1 202 4: cmp \dividend, \divisor 203 subhs \dividend, \dividend, \divisor 204 5: 205 .endm 206 207 208 #ifdef CONFIG_ARM_PATCH_IDIV 209 .align 3 210 #endif 211 212 ENTRY(__udivsi3) 213 ENTRY(__aeabi_uidiv) 214 UNWIND(.fnstart) 215 216 subs r2, r1, #1 217 reteq lr 218 bcc Ldiv0 219 cmp r0, r1 220 bls 11f 221 tst r1, r2 222 beq 12f 223 224 ARM_DIV_BODY r0, r1, r2, r3 225 226 mov r0, r2 227 ret lr 228 229 11: moveq r0, #1 230 movne r0, #0 231 ret lr 232 233 12: ARM_DIV2_ORDER r1, r2 234 235 mov r0, r0, lsr r2 236 ret lr 237 238 UNWIND(.fnend) 239 ENDPROC(__udivsi3) 240 ENDPROC(__aeabi_uidiv) 241 242 ENTRY(__umodsi3) 243 UNWIND(.fnstart) 244 245 subs r2, r1, #1 @ compare divisor with 1 246 bcc Ldiv0 247 cmpne r0, r1 @ compare dividend with divisor 248 moveq r0, #0 249 tsthi r1, r2 @ see if divisor is power of 2 250 andeq r0, r0, r2 251 retls lr 252 253 ARM_MOD_BODY r0, r1, r2, r3 254 255 ret lr 256 257 UNWIND(.fnend) 258 ENDPROC(__umodsi3) 259 260 #ifdef CONFIG_ARM_PATCH_IDIV 261 .align 3 262 #endif 263 264 ENTRY(__divsi3) 265 ENTRY(__aeabi_idiv) 266 UNWIND(.fnstart) 267 268 cmp r1, #0 269 eor ip, r0, r1 @ save the sign of the result. 270 beq Ldiv0 271 rsbmi r1, r1, #0 @ loops below use unsigned. 272 subs r2, r1, #1 @ division by 1 or -1 ? 273 beq 10f 274 movs r3, r0 275 rsbmi r3, r0, #0 @ positive dividend value 276 cmp r3, r1 277 bls 11f 278 tst r1, r2 @ divisor is power of 2 ? 279 beq 12f 280 281 ARM_DIV_BODY r3, r1, r0, r2 282 283 cmp ip, #0 284 rsbmi r0, r0, #0 285 ret lr 286 287 10: teq ip, r0 @ same sign ? 288 rsbmi r0, r0, #0 289 ret lr 290 291 11: movlo r0, #0 292 moveq r0, ip, asr #31 293 orreq r0, r0, #1 294 ret lr 295 296 12: ARM_DIV2_ORDER r1, r2 297 298 cmp ip, #0 299 mov r0, r3, lsr r2 300 rsbmi r0, r0, #0 301 ret lr 302 303 UNWIND(.fnend) 304 ENDPROC(__divsi3) 305 ENDPROC(__aeabi_idiv) 306 307 ENTRY(__modsi3) 308 UNWIND(.fnstart) 309 310 cmp r1, #0 311 beq Ldiv0 312 rsbmi r1, r1, #0 @ loops below use unsigned. 313 movs ip, r0 @ preserve sign of dividend 314 rsbmi r0, r0, #0 @ if negative make positive 315 subs r2, r1, #1 @ compare divisor with 1 316 cmpne r0, r1 @ compare dividend with divisor 317 moveq r0, #0 318 tsthi r1, r2 @ see if divisor is power of 2 319 andeq r0, r0, r2 320 bls 10f 321 322 ARM_MOD_BODY r0, r1, r2, r3 323 324 10: cmp ip, #0 325 rsbmi r0, r0, #0 326 ret lr 327 328 UNWIND(.fnend) 329 ENDPROC(__modsi3) 330 331 #ifdef CONFIG_AEABI 332 333 ENTRY(__aeabi_uidivmod) 334 UNWIND(.fnstart) 335 UNWIND(.save {r0, r1, ip, lr} ) 336 337 stmfd sp!, {r0, r1, ip, lr} 338 bl __aeabi_uidiv 339 ldmfd sp!, {r1, r2, ip, lr} 340 mul r3, r0, r2 341 sub r1, r1, r3 342 ret lr 343 344 UNWIND(.fnend) 345 ENDPROC(__aeabi_uidivmod) 346 347 ENTRY(__aeabi_idivmod) 348 UNWIND(.fnstart) 349 UNWIND(.save {r0, r1, ip, lr} ) 350 stmfd sp!, {r0, r1, ip, lr} 351 bl __aeabi_idiv 352 ldmfd sp!, {r1, r2, ip, lr} 353 mul r3, r0, r2 354 sub r1, r1, r3 355 ret lr 356 357 UNWIND(.fnend) 358 ENDPROC(__aeabi_idivmod) 359 360 #endif 361 362 Ldiv0: 363 UNWIND(.fnstart) 364 UNWIND(.pad #4) 365 UNWIND(.save {lr}) 366 str lr, [sp, #-8]! 367 bl __div0 368 mov r0, #0 @ About as wrong as it could be. 369 ldr pc, [sp], #8 370 UNWIND(.fnend) 371 ENDPROC(Ldiv0)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.