~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/lib/strlen.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/arm64/lib/strlen.S (Version linux-6.12-rc7) and /arch/sparc/lib/strlen.S (Version linux-2.6.0)


  1 /* SPDX-License-Identifier: GPL-2.0-only */    !!   1 /* strlen.S: Sparc optimized strlen code
  2 /*                                             !!   2  * Hand optimized from GNU libc's strlen
  3  * Copyright (c) 2013-2021, Arm Limited.       !!   3  * Copyright (C) 1991,1996 Free Software Foundation
  4  *                                             !!   4  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
  5  * Adapted from the original at:               !!   5  * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  6  * https://github.com/ARM-software/optimized-r << 
  7  */                                                 6  */
  8                                                     7 
  9 #include <linux/linkage.h>                     !!   8 #include <asm/cprefix.h>
 10 #include <asm/assembler.h>                     << 
 11 #include <asm/mte-def.h>                       << 
 12                                                     9 
 13 /* Assumptions:                                !!  10 #define LO_MAGIC 0x01010101
 14  *                                             !!  11 #define HI_MAGIC 0x80808080
 15  * ARMv8-a, AArch64, unaligned accesses, min p << 
 16  */                                            << 
 17                                                << 
 18 #define L(label) .L ## label                   << 
 19                                                << 
 20 /* Arguments and results.  */                  << 
 21 #define srcin           x0                     << 
 22 #define len             x0                     << 
 23                                                << 
 24 /* Locals and temporaries.  */                 << 
 25 #define src             x1                     << 
 26 #define data1           x2                     << 
 27 #define data2           x3                     << 
 28 #define has_nul1        x4                     << 
 29 #define has_nul2        x5                     << 
 30 #define tmp1            x4                     << 
 31 #define tmp2            x5                     << 
 32 #define tmp3            x6                     << 
 33 #define tmp4            x7                     << 
 34 #define zeroones        x8                     << 
 35                                                << 
 36         /* NUL detection works on the principl << 
 37            (=> (X - 1) & ~(X | 0x7f)) is non-z << 
 38            can be done in parallel across the  << 
 39            (X - 1) & 0x80 is zero for non-NUL  << 
 40            false hits for characters 129..255. << 
 41                                                << 
 42 #define REP8_01 0x0101010101010101             << 
 43 #define REP8_7f 0x7f7f7f7f7f7f7f7f             << 
 44 #define REP8_80 0x8080808080808080             << 
 45                                                << 
 46 /*                                             << 
 47  * When KASAN_HW_TAGS is in use, memory is che << 
 48  * (16-byte) granularity, and we must ensure t << 
 49  * alignment boundary.                         << 
 50  */                                            << 
 51 #ifdef CONFIG_KASAN_HW_TAGS                    << 
 52 #define MIN_PAGE_SIZE MTE_GRANULE_SIZE         << 
 53 #else                                          << 
 54 #define MIN_PAGE_SIZE 4096                     << 
 55 #endif                                         << 
 56                                                << 
 57         /* Since strings are short on average, << 
 58            of the string for a NUL character.  << 
 59            safely we have to do a page cross c << 
 60            byte we calculate the length from t << 
 61            conditional select to reduce branch << 
 62            strlen will be repeatedly called on << 
 63                                                    12 
 64            If the string is longer than 16 byt !!  13 0:
 65            further page cross checks, and proc !!  14         ldub    [%o0], %o5
 66            using the fast NUL check.  If we en !!  15         cmp     %o5, 0
 67            fallback to a second loop using the !!  16         be      1f
 68                                                !!  17          add    %o0, 1, %o0
 69            If the page cross check fails, we r !!  18         andcc   %o0, 3, %g0
 70            address, remove any characters befo !!  19         be      4f
 71            in the main loop using aligned load !!  20          or     %o4, %lo(HI_MAGIC), %o3
 72            page in the first 16 bytes are rare !!  21         ldub    [%o0], %o5
 73            16/MIN_PAGE_SIZE ~= 0.4%), this cas !!  22         cmp     %o5, 0
 74                                                !!  23         be      2f
 75            AArch64 systems have a minimum page !!  24          add    %o0, 1, %o0
 76            checking for larger page sizes - th !!  25         andcc   %o0, 3, %g0
 77            page size is just not worth the ext !!  26         be      5f
 78            the cases taking the slow path.  No !!  27          sethi  %hi(LO_MAGIC), %o4
 79            whether the first fetch, which may  !!  28         ldub    [%o0], %o5
 80            boundary.  */                       !!  29         cmp     %o5, 0
 81                                                !!  30         be      3f
 82 SYM_FUNC_START(__pi_strlen)                    !!  31          add    %o0, 1, %o0
 83         and     tmp1, srcin, MIN_PAGE_SIZE - 1 !!  32         b       8f
 84         mov     zeroones, REP8_01              !!  33          or     %o4, %lo(LO_MAGIC), %o2
 85         cmp     tmp1, MIN_PAGE_SIZE - 16       << 
 86         b.gt    L(page_cross)                  << 
 87         ldp     data1, data2, [srcin]          << 
 88 #ifdef __AARCH64EB__                           << 
 89         /* For big-endian, carry propagation ( << 
 90            string is 0x01) means we cannot use << 
 91            Since we expect strings to be small << 
 92            byte-swap the data now so has_null1 << 
 93         rev     data1, data1                   << 
 94         rev     data2, data2                   << 
 95 #endif                                         << 
 96         sub     tmp1, data1, zeroones          << 
 97         orr     tmp2, data1, REP8_7f           << 
 98         sub     tmp3, data2, zeroones          << 
 99         orr     tmp4, data2, REP8_7f           << 
100         bics    has_nul1, tmp1, tmp2           << 
101         bic     has_nul2, tmp3, tmp4           << 
102         ccmp    has_nul2, 0, 0, eq             << 
103         beq     L(main_loop_entry)             << 
104                                                << 
105         /* Enter with C = has_nul1 == 0.  */   << 
106         csel    has_nul1, has_nul1, has_nul2,  << 
107         mov     len, 8                         << 
108         rev     has_nul1, has_nul1             << 
109         clz     tmp1, has_nul1                 << 
110         csel    len, xzr, len, cc              << 
111         add     len, len, tmp1, lsr 3          << 
112         ret                                    << 
113                                                << 
114         /* The inner loop processes 32 bytes p << 
115            NUL check.  If we encounter non-ASC << 
116            loop with the accurate NUL check.   << 
117         .p2align 4                             << 
118 L(main_loop_entry):                            << 
119         bic     src, srcin, 15                 << 
120         sub     src, src, 16                   << 
121 L(main_loop):                                  << 
122         ldp     data1, data2, [src, 32]!       << 
123 L(page_cross_entry):                           << 
124         sub     tmp1, data1, zeroones          << 
125         sub     tmp3, data2, zeroones          << 
126         orr     tmp2, tmp1, tmp3               << 
127         tst     tmp2, zeroones, lsl 7          << 
128         bne     1f                             << 
129         ldp     data1, data2, [src, 16]        << 
130         sub     tmp1, data1, zeroones          << 
131         sub     tmp3, data2, zeroones          << 
132         orr     tmp2, tmp1, tmp3               << 
133         tst     tmp2, zeroones, lsl 7          << 
134         beq     L(main_loop)                   << 
135         add     src, src, 16                   << 
136 1:                                                 34 1:
137         /* The fast check failed, so do the sl !!  35         retl
138         orr     tmp2, data1, REP8_7f           !!  36          mov    0, %o0
139         orr     tmp4, data2, REP8_7f           !!  37 2:
140         bics    has_nul1, tmp1, tmp2           !!  38         retl
141         bic     has_nul2, tmp3, tmp4           !!  39          mov    1, %o0
142         ccmp    has_nul2, 0, 0, eq             !!  40 3:
143         beq     L(nonascii_loop)               !!  41         retl
144                                                !!  42          mov    2, %o0
145         /* Enter with C = has_nul1 == 0.  */   !!  43 
146 L(tail):                                       !!  44         .align 4
147 #ifdef __AARCH64EB__                           !!  45         .global C_LABEL(strlen)
148         /* For big-endian, carry propagation ( !!  46 C_LABEL(strlen):
149            string is 0x01) means we cannot use !!  47         mov     %o0, %o1
150            easiest way to get the correct byte !!  48         andcc   %o0, 3, %g0
151            and calculate the syndrome a second !!  49         bne     0b
152         csel    data1, data1, data2, cc        !!  50          sethi  %hi(HI_MAGIC), %o4
153         rev     data1, data1                   !!  51         or      %o4, %lo(HI_MAGIC), %o3
154         sub     tmp1, data1, zeroones          !!  52 4:
155         orr     tmp2, data1, REP8_7f           !!  53         sethi   %hi(LO_MAGIC), %o4
156         bic     has_nul1, tmp1, tmp2           !!  54 5:
157 #else                                          !!  55         or      %o4, %lo(LO_MAGIC), %o2
158         csel    has_nul1, has_nul1, has_nul2,  !!  56 8:
159 #endif                                         !!  57         ld      [%o0], %o5
160         sub     len, src, srcin                !!  58 2:
161         rev     has_nul1, has_nul1             !!  59         sub     %o5, %o2, %o4
162         add     tmp2, len, 8                   !!  60         andcc   %o4, %o3, %g0
163         clz     tmp1, has_nul1                 !!  61         be      8b
164         csel    len, len, tmp2, cc             !!  62          add    %o0, 4, %o0
165         add     len, len, tmp1, lsr 3          !!  63 
166         ret                                    !!  64         /* Check every byte. */
167                                                !!  65         srl     %o5, 24, %g5
168 L(nonascii_loop):                              !!  66         andcc   %g5, 0xff, %g0
169         ldp     data1, data2, [src, 16]!       !!  67         be      1f
170         sub     tmp1, data1, zeroones          !!  68          add    %o0, -4, %o4
171         orr     tmp2, data1, REP8_7f           !!  69         srl     %o5, 16, %g5
172         sub     tmp3, data2, zeroones          !!  70         andcc   %g5, 0xff, %g0
173         orr     tmp4, data2, REP8_7f           !!  71         be      1f
174         bics    has_nul1, tmp1, tmp2           !!  72          add    %o4, 1, %o4
175         bic     has_nul2, tmp3, tmp4           !!  73         srl     %o5, 8, %g5
176         ccmp    has_nul2, 0, 0, eq             !!  74         andcc   %g5, 0xff, %g0
177         bne     L(tail)                        !!  75         be      1f
178         ldp     data1, data2, [src, 16]!       !!  76          add    %o4, 1, %o4
179         sub     tmp1, data1, zeroones          !!  77         andcc   %o5, 0xff, %g0
180         orr     tmp2, data1, REP8_7f           !!  78         bne,a   2b
181         sub     tmp3, data2, zeroones          !!  79          ld     [%o0], %o5
182         orr     tmp4, data2, REP8_7f           !!  80         add     %o4, 1, %o4
183         bics    has_nul1, tmp1, tmp2           !!  81 1:
184         bic     has_nul2, tmp3, tmp4           !!  82         retl
185         ccmp    has_nul2, 0, 0, eq             !!  83          sub    %o4, %o1, %o0
186         beq     L(nonascii_loop)               << 
187         b       L(tail)                        << 
188                                                << 
189         /* Load 16 bytes from [srcin & ~15] an << 
190            srcin to 0x7f, so we ignore any NUL << 
191            Then continue in the aligned loop.  << 
192 L(page_cross):                                 << 
193         bic     src, srcin, 15                 << 
194         ldp     data1, data2, [src]            << 
195         lsl     tmp1, srcin, 3                 << 
196         mov     tmp4, -1                       << 
197 #ifdef __AARCH64EB__                           << 
198         /* Big-endian.  Early bytes are at MSB << 
199         lsr     tmp1, tmp4, tmp1        /* Shi << 
200 #else                                          << 
201         /* Little-endian.  Early bytes are at  << 
202         lsl     tmp1, tmp4, tmp1        /* Shi << 
203 #endif                                         << 
204         orr     tmp1, tmp1, REP8_80            << 
205         orn     data1, data1, tmp1             << 
206         orn     tmp2, data2, tmp1              << 
207         tst     srcin, 8                       << 
208         csel    data1, data1, tmp4, eq         << 
209         csel    data2, data2, tmp2, eq         << 
210         b       L(page_cross_entry)            << 
211 SYM_FUNC_END(__pi_strlen)                      << 
212 SYM_FUNC_ALIAS_WEAK(strlen, __pi_strlen)       << 
213 EXPORT_SYMBOL_NOKASAN(strlen)                  << 
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php