~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/lib/strncmp.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/arm64/lib/strncmp.S (Version linux-6.12-rc7) and /arch/sparc64/lib/strncmp.S (Version linux-2.6.0)


  1 /* SPDX-License-Identifier: GPL-2.0-only */    !!   1 /* $Id: strncmp.S,v 1.2 1997/03/11 17:51:44 jj Exp $
  2 /*                                             !!   2  * Sparc64 optimized strncmp code.
  3  * Copyright (c) 2013-2022, Arm Limited.       << 
  4  *                                                  3  *
  5  * Adapted from the original at:               !!   4  * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  6  * https://github.com/ARM-software/optimized-r << 
  7  */                                                 5  */
  8                                                     6 
  9 #include <linux/linkage.h>                     !!   7 #include <asm/asi.h>
 10 #include <asm/assembler.h>                     << 
 11                                                     8 
 12 /* Assumptions:                                !!   9         .text
 13  *                                             !!  10         .align 4
 14  * ARMv8-a, AArch64.                           !!  11         .global __strncmp, strncmp
 15  * MTE compatible.                             !!  12 __strncmp:
 16  */                                            !!  13 strncmp:
 17                                                !!  14         brlez,pn %o2, 3f
 18 #define L(label) .L ## label                   !!  15          lduba  [%o0] (ASI_PNF), %o3
 19                                                << 
 20 #define REP8_01 0x0101010101010101             << 
 21 #define REP8_7f 0x7f7f7f7f7f7f7f7f             << 
 22                                                << 
 23 /* Parameters and result.  */                  << 
 24 #define src1            x0                     << 
 25 #define src2            x1                     << 
 26 #define limit           x2                     << 
 27 #define result          x0                     << 
 28                                                << 
 29 /* Internal variables.  */                     << 
 30 #define data1           x3                     << 
 31 #define data1w          w3                     << 
 32 #define data2           x4                     << 
 33 #define data2w          w4                     << 
 34 #define has_nul         x5                     << 
 35 #define diff            x6                     << 
 36 #define syndrome        x7                     << 
 37 #define tmp1            x8                     << 
 38 #define tmp2            x9                     << 
 39 #define tmp3            x10                    << 
 40 #define zeroones        x11                    << 
 41 #define pos             x12                    << 
 42 #define mask            x13                    << 
 43 #define endloop         x14                    << 
 44 #define count           mask                   << 
 45 #define offset          pos                    << 
 46 #define neg_offset      x15                    << 
 47                                                << 
 48 /* Define endian dependent shift operations.   << 
 49    On big-endian early bytes are at MSB and on << 
 50    LS_FW means shifting towards early bytes.   << 
 51    LS_BK means shifting towards later bytes.   << 
 52    */                                          << 
 53 #ifdef __AARCH64EB__                           << 
 54 #define LS_FW lsl                              << 
 55 #define LS_BK lsr                              << 
 56 #else                                          << 
 57 #define LS_FW lsr                              << 
 58 #define LS_BK lsl                              << 
 59 #endif                                         << 
 60                                                << 
 61 SYM_FUNC_START(__pi_strncmp)                   << 
 62         cbz     limit, L(ret0)                 << 
 63         eor     tmp1, src1, src2               << 
 64         mov     zeroones, #REP8_01             << 
 65         tst     tmp1, #7                       << 
 66         and     count, src1, #7                << 
 67         b.ne    L(misaligned8)                 << 
 68         cbnz    count, L(mutual_align)         << 
 69                                                << 
 70         /* NUL detection works on the principl << 
 71            (=> (X - 1) & ~(X | 0x7f)) is non-z << 
 72            can be done in parallel across the  << 
 73         .p2align 4                             << 
 74 L(loop_aligned):                               << 
 75         ldr     data1, [src1], #8              << 
 76         ldr     data2, [src2], #8              << 
 77 L(start_realigned):                            << 
 78         subs    limit, limit, #8               << 
 79         sub     tmp1, data1, zeroones          << 
 80         orr     tmp2, data1, #REP8_7f          << 
 81         eor     diff, data1, data2      /* Non << 
 82         csinv   endloop, diff, xzr, hi  /* Las << 
 83         bics    has_nul, tmp1, tmp2     /* Non << 
 84         ccmp    endloop, #0, #0, eq            << 
 85         b.eq    L(loop_aligned)                << 
 86         /* End of main loop */                 << 
 87                                                << 
 88 L(full_check):                                 << 
 89 #ifndef __AARCH64EB__                          << 
 90         orr     syndrome, diff, has_nul        << 
 91         add     limit, limit, 8 /* Rewind limi << 
 92 L(syndrome_check):                             << 
 93         /* Limit was reached. Check if the NUL << 
 94            is before the limit. */             << 
 95         rev     syndrome, syndrome             << 
 96         rev     data1, data1                   << 
 97         clz     pos, syndrome                  << 
 98         rev     data2, data2                   << 
 99         lsl     data1, data1, pos              << 
100         cmp     limit, pos, lsr #3             << 
101         lsl     data2, data2, pos              << 
102         /* But we need to zero-extend (char is << 
103            perform a signed 32-bit subtraction << 
104         lsr     data1, data1, #56              << 
105         sub     result, data1, data2, lsr #56  << 
106         csel result, result, xzr, hi           << 
107         ret                                    << 
108 #else                                          << 
109         /* Not reached the limit, must have fo << 
110         tbz     limit, #63, L(not_limit)       << 
111         add     tmp1, limit, 8                 << 
112         cbz     limit, L(not_limit)            << 
113                                                << 
114         lsl     limit, tmp1, #3 /* Bits -> byt << 
115         mov     mask, #~0                      << 
116         lsr     mask, mask, limit              << 
117         bic     data1, data1, mask             << 
118         bic     data2, data2, mask             << 
119                                                << 
120         /* Make sure that the NUL byte is mark << 
121         orr     has_nul, has_nul, mask         << 
122                                                << 
123 L(not_limit):                                  << 
124         /* For big-endian we cannot use the tr << 
125            as carry-propagation can corrupt th << 
126            bytes in the string contain 0x01.   << 
127         /* However, if there is no NUL byte in << 
128            the result directly.  We can't just << 
129            MSB might be significant.  */       << 
130         cbnz    has_nul, 1f                    << 
131         cmp     data1, data2                   << 
132         cset    result, ne                     << 
133         cneg    result, result, lo             << 
134         ret                                    << 
135 1:                                                 16 1:
136         /* Re-compute the NUL-byte detection,  !!  17         add     %o0, 1, %o0
137         rev     tmp3, data1                    !!  18         ldub    [%o1], %o4
138         sub     tmp1, tmp3, zeroones           !!  19         brz,pn  %o3, 2f
139         orr     tmp2, tmp3, #REP8_7f           !!  20          add    %o1, 1, %o1
140         bic     has_nul, tmp1, tmp2            !!  21         cmp     %o3, %o4
141         rev     has_nul, has_nul               !!  22         bne,pn  %icc, 2f
142         orr     syndrome, diff, has_nul        !!  23          subcc  %o2, 1, %o2
143         clz     pos, syndrome                  !!  24         bne,a,pt %xcc, 1b
144         /* The most-significant-non-zero bit o !!  25          ldub   [%o0], %o3
145            first bit that is different, or the !!  26 2:
146            Shifting left now will bring the cr !!  27         retl
147            top bits.  */                       !!  28          sub    %o3, %o4, %o0
148 L(end_quick):                                  !!  29 3:
149         lsl     data1, data1, pos              !!  30         retl
150         lsl     data2, data2, pos              !!  31          clr    %o0
151         /* But we need to zero-extend (char is << 
152            perform a signed 32-bit subtraction << 
153         lsr     data1, data1, #56              << 
154         sub     result, data1, data2, lsr #56  << 
155         ret                                    << 
156 #endif                                         << 
157                                                << 
158 L(mutual_align):                               << 
159         /* Sources are mutually aligned, but a << 
160            alignment boundary.  Round down the << 
161            the bytes that precede the start po << 
162            We also need to adjust the limit ca << 
163            overflowing if the limit is near UL << 
164         bic     src1, src1, #7                 << 
165         bic     src2, src2, #7                 << 
166         ldr     data1, [src1], #8              << 
167         neg     tmp3, count, lsl #3     /* 64  << 
168         ldr     data2, [src2], #8              << 
169         mov     tmp2, #~0                      << 
170         LS_FW   tmp2, tmp2, tmp3        /* Shi << 
171         /* Adjust the limit and ensure it does << 
172         adds    limit, limit, count            << 
173         csinv   limit, limit, xzr, lo          << 
174         orr     data1, data1, tmp2             << 
175         orr     data2, data2, tmp2             << 
176         b       L(start_realigned)             << 
177                                                << 
178         .p2align 4                             << 
179         /* Don't bother with dwords for up to  << 
180 L(misaligned8):                                << 
181         cmp     limit, #16                     << 
182         b.hs    L(try_misaligned_words)        << 
183                                                << 
184 L(byte_loop):                                  << 
185         /* Perhaps we can do better than this. << 
186         ldrb    data1w, [src1], #1             << 
187         ldrb    data2w, [src2], #1             << 
188         subs    limit, limit, #1               << 
189         ccmp    data1w, #1, #0, hi      /* NZC << 
190         ccmp    data1w, data2w, #0, cs  /* NZC << 
191         b.eq    L(byte_loop)                   << 
192 L(done):                                       << 
193         sub     result, data1, data2           << 
194         ret                                    << 
195         /* Align the SRC1 to a dword by doing  << 
196            the dword loop.  */                 << 
197 L(try_misaligned_words):                       << 
198         cbz     count, L(src1_aligned)         << 
199                                                << 
200         neg     count, count                   << 
201         and     count, count, #7               << 
202         sub     limit, limit, count            << 
203                                                << 
204 L(page_end_loop):                              << 
205         ldrb    data1w, [src1], #1             << 
206         ldrb    data2w, [src2], #1             << 
207         cmp     data1w, #1                     << 
208         ccmp    data1w, data2w, #0, cs  /* NZC << 
209         b.ne    L(done)                        << 
210         subs    count, count, #1               << 
211         b.hi    L(page_end_loop)               << 
212                                                << 
213         /* The following diagram explains the  << 
214            The bytes are shown in natural orde << 
215            reversed in the registers. The "x"  << 
216            The "|" separates data that is load << 
217            src1     | a a a a a a a a | b b b  << 
218            src2     | x x x x x a a a   a a a  << 
219                                                << 
220            After shifting in each step, the da << 
221                         STEP_A              ST << 
222            data1    a a a a a a a a     b b b  << 
223            data2    a a a a a a a a     b b b  << 
224                                                << 
225            The bytes with "0" are eliminated f << 
226                                                << 
227            Align SRC2 down to 16 bytes. This w << 
228            time from SRC2. The comparison happ << 
229            the loop can exit, or read from SRC << 
230 L(src1_aligned):                               << 
231         /* Calculate offset from 8 byte alignm << 
232            need to mask offset since shifts ar << 
233         lsl     offset, src2, #3               << 
234         bic     src2, src2, #0xf               << 
235         mov     mask, -1                       << 
236         neg     neg_offset, offset             << 
237         ldr     data1, [src1], #8              << 
238         ldp     tmp1, tmp2, [src2], #16        << 
239         LS_BK   mask, mask, neg_offset         << 
240         and     neg_offset, neg_offset, #63    << 
241         /* Skip the first compare if data in t << 
242         tbnz    offset, 6, L(misaligned_mid_lo << 
243                                                << 
244 L(loop_misaligned):                            << 
245         /* STEP_A: Compare full 8 bytes when t << 
246         LS_FW   data2, tmp1, offset            << 
247         LS_BK   tmp1, tmp2, neg_offset         << 
248         subs    limit, limit, #8               << 
249         orr     data2, data2, tmp1      /* 8 b << 
250         sub     has_nul, data1, zeroones       << 
251         eor     diff, data1, data2      /* Non << 
252         orr     tmp3, data1, #REP8_7f          << 
253         csinv   endloop, diff, xzr, hi  /* If  << 
254         bic     has_nul, has_nul, tmp3  /* Non << 
255         orr     tmp3, endloop, has_nul         << 
256         cbnz    tmp3, L(full_check)            << 
257                                                << 
258         ldr     data1, [src1], #8              << 
259 L(misaligned_mid_loop):                        << 
260         /* STEP_B: Compare first part of data1 << 
261         LS_FW   data2, tmp2, offset            << 
262 #ifdef __AARCH64EB__                           << 
263         /* For big-endian we do a byte reverse << 
264         problem described above. This way we c << 
265         next step and also use syndrome value  << 
266         rev     tmp3, data1                    << 
267         #define data1_fixed tmp3               << 
268 #else                                          << 
269         #define data1_fixed data1              << 
270 #endif                                         << 
271         sub     has_nul, data1_fixed, zeroones << 
272         orr     tmp3, data1_fixed, #REP8_7f    << 
273         eor     diff, data2, data1      /* Non << 
274         bic     has_nul, has_nul, tmp3  /* Non << 
275 #ifdef __AARCH64EB__                           << 
276         rev     has_nul, has_nul               << 
277 #endif                                         << 
278         cmp     limit, neg_offset, lsr #3      << 
279         orr     syndrome, diff, has_nul        << 
280         bic     syndrome, syndrome, mask       << 
281         csinv   tmp3, syndrome, xzr, hi /* If  << 
282         cbnz    tmp3, L(syndrome_check)        << 
283                                                << 
284         /* STEP_C: Compare second part of data << 
285         ldp     tmp1, tmp2, [src2], #16        << 
286         cmp     limit, #8                      << 
287         LS_BK   data2, tmp1, neg_offset        << 
288         eor     diff, data2, data1      /* Non << 
289         orr     syndrome, diff, has_nul        << 
290         and     syndrome, syndrome, mask       << 
291         csinv   tmp3, syndrome, xzr, hi /* If  << 
292         cbnz    tmp3, L(syndrome_check)        << 
293                                                << 
294         ldr     data1, [src1], #8              << 
295         sub     limit, limit, #8               << 
296         b       L(loop_misaligned)             << 
297                                                << 
298 #ifdef  __AARCH64EB__                          << 
299 L(syndrome_check):                             << 
300         clz     pos, syndrome                  << 
301         cmp     pos, limit, lsl #3             << 
302         b.lo    L(end_quick)                   << 
303 #endif                                         << 
304                                                << 
305 L(ret0):                                       << 
306         mov     result, #0                     << 
307         ret                                    << 
308 SYM_FUNC_END(__pi_strncmp)                     << 
309 SYM_FUNC_ALIAS_WEAK(strncmp, __pi_strncmp)     << 
310 EXPORT_SYMBOL_NOKASAN(strncmp)                 << 
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php