1 /* SPDX-License-Identifier: GPL-2.0 */ 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* strlen.S: Sparc optimized strlen code !! 2 /* 3 * Hand optimized from GNU libc's strlen !! 3 * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu) 4 * Copyright (C) 1991,1996 Free Software Found !! 4 * 5 * Copyright (C) 1996,2008 David S. Miller (da !! 5 * Finds length of a 0-terminated string. Optimized for the 6 * Copyright (C) 1996, 1997 Jakub Jelinek (jj@ !! 6 * Alpha architecture: >> 7 * >> 8 * - memory accessed as aligned quadwords only >> 9 * - uses bcmpge to compare 8 bytes in parallel >> 10 * - does binary search to find 0 byte in last >> 11 * quadword (HAKMEM needed 12 instructions to >> 12 * do this instead of the 9 instructions that >> 13 * binary search needs). 7 */ 14 */ 8 << 9 #include <linux/export.h> 15 #include <linux/export.h> 10 #include <linux/linkage.h> !! 16 .set noreorder 11 #include <asm/asm.h> !! 17 .set noat >> 18 >> 19 .align 3 >> 20 >> 21 .globl strlen >> 22 .ent strlen >> 23 >> 24 strlen: >> 25 ldq_u $1, 0($16) # load first quadword ($16 may be misaligned) >> 26 lda $2, -1($31) >> 27 insqh $2, $16, $2 >> 28 andnot $16, 7, $0 >> 29 or $2, $1, $1 >> 30 cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0 >> 31 bne $2, found >> 32 >> 33 loop: ldq $1, 8($0) >> 34 addq $0, 8, $0 # addr += 8 >> 35 nop # helps dual issue last two insns >> 36 cmpbge $31, $1, $2 >> 37 beq $2, loop >> 38 >> 39 found: blbs $2, done # make aligned case fast >> 40 negq $2, $3 >> 41 and $2, $3, $2 >> 42 >> 43 and $2, 0x0f, $1 >> 44 addq $0, 4, $3 >> 45 cmoveq $1, $3, $0 >> 46 >> 47 and $2, 0x33, $1 >> 48 addq $0, 2, $3 >> 49 cmoveq $1, $3, $0 >> 50 >> 51 and $2, 0x55, $1 >> 52 addq $0, 1, $3 >> 53 cmoveq $1, $3, $0 12 54 13 #define LO_MAGIC 0x01010101 !! 55 done: subq $0, $16, $0 14 #define HI_MAGIC 0x80808080 !! 56 ret $31, ($26) 15 57 16 .text !! 58 .end strlen 17 ENTRY(strlen) !! 59 EXPORT_SYMBOL(strlen) 18 mov %o0, %o1 << 19 andcc %o0, 3, %g0 << 20 BRANCH32(be, pt, 9f) << 21 sethi %hi(HI_MAGIC), %o4 << 22 ldub [%o0], %o5 << 23 BRANCH_REG_ZERO(pn, %o5, 11f) << 24 add %o0, 1, %o0 << 25 andcc %o0, 3, %g0 << 26 BRANCH32(be, pn, 4f) << 27 or %o4, %lo(HI_MAGIC), %o3 << 28 ldub [%o0], %o5 << 29 BRANCH_REG_ZERO(pn, %o5, 12f) << 30 add %o0, 1, %o0 << 31 andcc %o0, 3, %g0 << 32 BRANCH32(be, pt, 5f) << 33 sethi %hi(LO_MAGIC), %o4 << 34 ldub [%o0], %o5 << 35 BRANCH_REG_ZERO(pn, %o5, 13f) << 36 add %o0, 1, %o0 << 37 BRANCH32(ba, pt, 8f) << 38 or %o4, %lo(LO_MAGIC), %o2 << 39 9: << 40 or %o4, %lo(HI_MAGIC), %o3 << 41 4: << 42 sethi %hi(LO_MAGIC), %o4 << 43 5: << 44 or %o4, %lo(LO_MAGIC), %o2 << 45 8: << 46 ld [%o0], %o5 << 47 2: << 48 sub %o5, %o2, %o4 << 49 andcc %o4, %o3, %g0 << 50 BRANCH32(be, pt, 8b) << 51 add %o0, 4, %o0 << 52 << 53 /* Check every byte. */ << 54 srl %o5, 24, %g7 << 55 andcc %g7, 0xff, %g0 << 56 BRANCH32(be, pn, 1f) << 57 add %o0, -4, %o4 << 58 srl %o5, 16, %g7 << 59 andcc %g7, 0xff, %g0 << 60 BRANCH32(be, pn, 1f) << 61 add %o4, 1, %o4 << 62 srl %o5, 8, %g7 << 63 andcc %g7, 0xff, %g0 << 64 BRANCH32(be, pn, 1f) << 65 add %o4, 1, %o4 << 66 andcc %o5, 0xff, %g0 << 67 BRANCH32_ANNUL(bne, pt, 2b) << 68 ld [%o0], %o5 << 69 add %o4, 1, %o4 << 70 1: << 71 retl << 72 sub %o4, %o1, %o0 << 73 11: << 74 retl << 75 mov 0, %o0 << 76 12: << 77 retl << 78 mov 1, %o0 << 79 13: << 80 retl << 81 mov 2, %o0 << 82 ENDPROC(strlen) << 83 EXPORT_SYMBOL(strlen) <<
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.