~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/lib/strcmp.S

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0-only */
  2 /*
  3  * Copyright (c) 2012-2022, Arm Limited.
  4  *
  5  * Adapted from the original at:
  6  * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strcmp.S
  7  */
  8 
  9 #include <linux/linkage.h>
 10 #include <asm/assembler.h>
 11 
 12 /* Assumptions:
 13  *
 14  * ARMv8-a, AArch64.
 15  * MTE compatible.
 16  */
 17 
 18 #define L(label) .L ## label
 19 
 20 #define REP8_01 0x0101010101010101
 21 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 22 
 23 #define src1            x0
 24 #define src2            x1
 25 #define result          x0
 26 
 27 #define data1           x2
 28 #define data1w          w2
 29 #define data2           x3
 30 #define data2w          w3
 31 #define has_nul         x4
 32 #define diff            x5
 33 #define off1            x5
 34 #define syndrome        x6
 35 #define tmp             x6
 36 #define data3           x7
 37 #define zeroones        x8
 38 #define shift           x9
 39 #define off2            x10
 40 
 41 /* On big-endian early bytes are at MSB and on little-endian LSB.
 42    LS_FW means shifting towards early bytes.  */
 43 #ifdef __AARCH64EB__
 44 # define LS_FW lsl
 45 #else
 46 # define LS_FW lsr
 47 #endif
 48 
 49 /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
 50    (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
 51    can be done in parallel across the entire word.
 52    Since carry propagation makes 0x1 bytes before a NUL byte appear
 53    NUL too in big-endian, byte-reverse the data before the NUL check.  */
 54 
 55 
 56 SYM_FUNC_START(__pi_strcmp)
 57         sub     off2, src2, src1
 58         mov     zeroones, REP8_01
 59         and     tmp, src1, 7
 60         tst     off2, 7
 61         b.ne    L(misaligned8)
 62         cbnz    tmp, L(mutual_align)
 63 
 64         .p2align 4
 65 
 66 L(loop_aligned):
 67         ldr     data2, [src1, off2]
 68         ldr     data1, [src1], 8
 69 L(start_realigned):
 70 #ifdef __AARCH64EB__
 71         rev     tmp, data1
 72         sub     has_nul, tmp, zeroones
 73         orr     tmp, tmp, REP8_7f
 74 #else
 75         sub     has_nul, data1, zeroones
 76         orr     tmp, data1, REP8_7f
 77 #endif
 78         bics    has_nul, has_nul, tmp   /* Non-zero if NUL terminator.  */
 79         ccmp    data1, data2, 0, eq
 80         b.eq    L(loop_aligned)
 81 #ifdef __AARCH64EB__
 82         rev     has_nul, has_nul
 83 #endif
 84         eor     diff, data1, data2
 85         orr     syndrome, diff, has_nul
 86 L(end):
 87 #ifndef __AARCH64EB__
 88         rev     syndrome, syndrome
 89         rev     data1, data1
 90         rev     data2, data2
 91 #endif
 92         clz     shift, syndrome
 93         /* The most-significant-non-zero bit of the syndrome marks either the
 94            first bit that is different, or the top bit of the first zero byte.
 95            Shifting left now will bring the critical information into the
 96            top bits.  */
 97         lsl     data1, data1, shift
 98         lsl     data2, data2, shift
 99         /* But we need to zero-extend (char is unsigned) the value and then
100            perform a signed 32-bit subtraction.  */
101         lsr     data1, data1, 56
102         sub     result, data1, data2, lsr 56
103         ret
104 
105         .p2align 4
106 
107 L(mutual_align):
108         /* Sources are mutually aligned, but are not currently at an
109            alignment boundary.  Round down the addresses and then mask off
110            the bytes that precede the start point.  */
111         bic     src1, src1, 7
112         ldr     data2, [src1, off2]
113         ldr     data1, [src1], 8
114         neg     shift, src2, lsl 3      /* Bits to alignment -64.  */
115         mov     tmp, -1
116         LS_FW   tmp, tmp, shift
117         orr     data1, data1, tmp
118         orr     data2, data2, tmp
119         b       L(start_realigned)
120 
121 L(misaligned8):
122         /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
123            checking to make sure that we don't access beyond the end of SRC2.  */
124         cbz     tmp, L(src1_aligned)
125 L(do_misaligned):
126         ldrb    data1w, [src1], 1
127         ldrb    data2w, [src2], 1
128         cmp     data1w, 0
129         ccmp    data1w, data2w, 0, ne   /* NZCV = 0b0000.  */
130         b.ne    L(done)
131         tst     src1, 7
132         b.ne    L(do_misaligned)
133 
134 L(src1_aligned):
135         neg     shift, src2, lsl 3
136         bic     src2, src2, 7
137         ldr     data3, [src2], 8
138 #ifdef __AARCH64EB__
139         rev     data3, data3
140 #endif
141         lsr     tmp, zeroones, shift
142         orr     data3, data3, tmp
143         sub     has_nul, data3, zeroones
144         orr     tmp, data3, REP8_7f
145         bics    has_nul, has_nul, tmp
146         b.ne    L(tail)
147 
148         sub     off1, src2, src1
149 
150         .p2align 4
151 
152 L(loop_unaligned):
153         ldr     data3, [src1, off1]
154         ldr     data2, [src1, off2]
155 #ifdef __AARCH64EB__
156         rev     data3, data3
157 #endif
158         sub     has_nul, data3, zeroones
159         orr     tmp, data3, REP8_7f
160         ldr     data1, [src1], 8
161         bics    has_nul, has_nul, tmp
162         ccmp    data1, data2, 0, eq
163         b.eq    L(loop_unaligned)
164 
165         lsl     tmp, has_nul, shift
166 #ifdef __AARCH64EB__
167         rev     tmp, tmp
168 #endif
169         eor     diff, data1, data2
170         orr     syndrome, diff, tmp
171         cbnz    syndrome, L(end)
172 L(tail):
173         ldr     data1, [src1]
174         neg     shift, shift
175         lsr     data2, data3, shift
176         lsr     has_nul, has_nul, shift
177 #ifdef __AARCH64EB__
178         rev     data2, data2
179         rev     has_nul, has_nul
180 #endif
181         eor     diff, data1, data2
182         orr     syndrome, diff, has_nul
183         b       L(end)
184 
185 L(done):
186         sub     result, data1, data2
187         ret
188 SYM_FUNC_END(__pi_strcmp)
189 SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp)
190 EXPORT_SYMBOL_NOKASAN(strcmp)

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php