~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/lib/strlen.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/arm64/lib/strlen.S (Version linux-6.12-rc7) and /arch/sparc64/lib/strlen.S (Version linux-2.6.0)


  1 /* SPDX-License-Identifier: GPL-2.0-only */    !!   1 /* strlen.S: Sparc64 optimized strlen code
  2 /*                                             !!   2  * Hand optimized from GNU libc's strlen
  3  * Copyright (c) 2013-2021, Arm Limited.       !!   3  * Copyright (C) 1991,1996 Free Software Foundation
  4  *                                             !!   4  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
  5  * Adapted from the original at:               !!   5  * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  6  * https://github.com/ARM-software/optimized-r << 
  7  */                                                 6  */
  8                                                     7 
  9 #include <linux/linkage.h>                     !!   8 #define LO_MAGIC 0x01010101
 10 #include <asm/assembler.h>                     !!   9 #define HI_MAGIC 0x80808080
 11 #include <asm/mte-def.h>                       << 
 12                                                    10 
 13 /* Assumptions:                                !!  11         .align  32
 14  *                                             !!  12         .global strlen, __strlen
 15  * ARMv8-a, AArch64, unaligned accesses, min p !!  13 strlen:
 16  */                                            !!  14 __strlen:
 17                                                !!  15         mov     %o0, %o1
 18 #define L(label) .L ## label                   !!  16         andcc   %o0, 3, %g0
 19                                                !!  17         be,pt   %icc, 9f
 20 /* Arguments and results.  */                  !!  18          sethi  %hi(HI_MAGIC), %o4
 21 #define srcin           x0                     !!  19         ldub    [%o0], %o5
 22 #define len             x0                     !!  20         brz,pn  %o5, 11f
 23                                                !!  21          add    %o0, 1, %o0
 24 /* Locals and temporaries.  */                 !!  22         andcc   %o0, 3, %g0
 25 #define src             x1                     !!  23         be,pn   %icc, 4f
 26 #define data1           x2                     !!  24          or     %o4, %lo(HI_MAGIC), %o3
 27 #define data2           x3                     !!  25         ldub    [%o0], %o5
 28 #define has_nul1        x4                     !!  26         brz,pn  %o5, 12f
 29 #define has_nul2        x5                     !!  27          add    %o0, 1, %o0
 30 #define tmp1            x4                     !!  28         andcc   %o0, 3, %g0
 31 #define tmp2            x5                     !!  29         be,pt   %icc, 5f
 32 #define tmp3            x6                     !!  30          sethi  %hi(LO_MAGIC), %o4
 33 #define tmp4            x7                     !!  31         ldub    [%o0], %o5
 34 #define zeroones        x8                     !!  32         brz,pn  %o5, 13f
 35                                                !!  33          add    %o0, 1, %o0
 36         /* NUL detection works on the principl !!  34         ba,pt   %icc, 8f
 37            (=> (X - 1) & ~(X | 0x7f)) is non-z !!  35          or     %o4, %lo(LO_MAGIC), %o2
 38            can be done in parallel across the  !!  36 9:
 39            (X - 1) & 0x80 is zero for non-NUL  !!  37         or      %o4, %lo(HI_MAGIC), %o3
 40            false hits for characters 129..255. !!  38 4:
 41                                                !!  39         sethi   %hi(LO_MAGIC), %o4
 42 #define REP8_01 0x0101010101010101             !!  40 5:
 43 #define REP8_7f 0x7f7f7f7f7f7f7f7f             !!  41         or      %o4, %lo(LO_MAGIC), %o2
 44 #define REP8_80 0x8080808080808080             !!  42 8:
 45                                                !!  43         ld      [%o0], %o5
 46 /*                                             !!  44 2:
 47  * When KASAN_HW_TAGS is in use, memory is che !!  45         sub     %o5, %o2, %o4
 48  * (16-byte) granularity, and we must ensure t !!  46         andcc   %o4, %o3, %g0
 49  * alignment boundary.                         !!  47         be,pt   %icc, 8b
 50  */                                            !!  48          add    %o0, 4, %o0
 51 #ifdef CONFIG_KASAN_HW_TAGS                    !!  49 
 52 #define MIN_PAGE_SIZE MTE_GRANULE_SIZE         !!  50         /* Check every byte. */
 53 #else                                          !!  51         srl     %o5, 24, %g5
 54 #define MIN_PAGE_SIZE 4096                     !!  52         andcc   %g5, 0xff, %g0
 55 #endif                                         !!  53         be,pn   %icc, 1f
 56                                                !!  54          add    %o0, -4, %o4
 57         /* Since strings are short on average, !!  55         srl     %o5, 16, %g5
 58            of the string for a NUL character.  !!  56         andcc   %g5, 0xff, %g0
 59            safely we have to do a page cross c !!  57         be,pn   %icc, 1f
 60            byte we calculate the length from t !!  58          add    %o4, 1, %o4
 61            conditional select to reduce branch !!  59         srl     %o5, 8, %g5
 62            strlen will be repeatedly called on !!  60         andcc   %g5, 0xff, %g0
 63                                                !!  61         be,pn   %icc, 1f
 64            If the string is longer than 16 byt !!  62          add    %o4, 1, %o4
 65            further page cross checks, and proc !!  63         andcc   %o5, 0xff, %g0
 66            using the fast NUL check.  If we en !!  64         bne,a,pt %icc, 2b
 67            fallback to a second loop using the !!  65          ld     [%o0], %o5
 68                                                !!  66         add     %o4, 1, %o4
 69            If the page cross check fails, we r << 
 70            address, remove any characters befo << 
 71            in the main loop using aligned load << 
 72            page in the first 16 bytes are rare << 
 73            16/MIN_PAGE_SIZE ~= 0.4%), this cas << 
 74                                                << 
 75            AArch64 systems have a minimum page << 
 76            checking for larger page sizes - th << 
 77            page size is just not worth the ext << 
 78            the cases taking the slow path.  No << 
 79            whether the first fetch, which may  << 
 80            boundary.  */                       << 
 81                                                << 
 82 SYM_FUNC_START(__pi_strlen)                    << 
 83         and     tmp1, srcin, MIN_PAGE_SIZE - 1 << 
 84         mov     zeroones, REP8_01              << 
 85         cmp     tmp1, MIN_PAGE_SIZE - 16       << 
 86         b.gt    L(page_cross)                  << 
 87         ldp     data1, data2, [srcin]          << 
 88 #ifdef __AARCH64EB__                           << 
 89         /* For big-endian, carry propagation ( << 
 90            string is 0x01) means we cannot use << 
 91            Since we expect strings to be small << 
 92            byte-swap the data now so has_null1 << 
 93         rev     data1, data1                   << 
 94         rev     data2, data2                   << 
 95 #endif                                         << 
 96         sub     tmp1, data1, zeroones          << 
 97         orr     tmp2, data1, REP8_7f           << 
 98         sub     tmp3, data2, zeroones          << 
 99         orr     tmp4, data2, REP8_7f           << 
100         bics    has_nul1, tmp1, tmp2           << 
101         bic     has_nul2, tmp3, tmp4           << 
102         ccmp    has_nul2, 0, 0, eq             << 
103         beq     L(main_loop_entry)             << 
104                                                << 
105         /* Enter with C = has_nul1 == 0.  */   << 
106         csel    has_nul1, has_nul1, has_nul2,  << 
107         mov     len, 8                         << 
108         rev     has_nul1, has_nul1             << 
109         clz     tmp1, has_nul1                 << 
110         csel    len, xzr, len, cc              << 
111         add     len, len, tmp1, lsr 3          << 
112         ret                                    << 
113                                                << 
114         /* The inner loop processes 32 bytes p << 
115            NUL check.  If we encounter non-ASC << 
116            loop with the accurate NUL check.   << 
117         .p2align 4                             << 
118 L(main_loop_entry):                            << 
119         bic     src, srcin, 15                 << 
120         sub     src, src, 16                   << 
121 L(main_loop):                                  << 
122         ldp     data1, data2, [src, 32]!       << 
123 L(page_cross_entry):                           << 
124         sub     tmp1, data1, zeroones          << 
125         sub     tmp3, data2, zeroones          << 
126         orr     tmp2, tmp1, tmp3               << 
127         tst     tmp2, zeroones, lsl 7          << 
128         bne     1f                             << 
129         ldp     data1, data2, [src, 16]        << 
130         sub     tmp1, data1, zeroones          << 
131         sub     tmp3, data2, zeroones          << 
132         orr     tmp2, tmp1, tmp3               << 
133         tst     tmp2, zeroones, lsl 7          << 
134         beq     L(main_loop)                   << 
135         add     src, src, 16                   << 
136 1:                                                 67 1:
137         /* The fast check failed, so do the sl !!  68         retl
138         orr     tmp2, data1, REP8_7f           !!  69          sub    %o4, %o1, %o0
139         orr     tmp4, data2, REP8_7f           !!  70 11:
140         bics    has_nul1, tmp1, tmp2           !!  71         retl
141         bic     has_nul2, tmp3, tmp4           !!  72          mov    0, %o0
142         ccmp    has_nul2, 0, 0, eq             !!  73 12:
143         beq     L(nonascii_loop)               !!  74         retl
144                                                !!  75          mov    1, %o0
145         /* Enter with C = has_nul1 == 0.  */   !!  76 13:
146 L(tail):                                       !!  77         retl
147 #ifdef __AARCH64EB__                           !!  78          mov    2, %o0
148         /* For big-endian, carry propagation ( << 
149            string is 0x01) means we cannot use << 
150            easiest way to get the correct byte << 
151            and calculate the syndrome a second << 
152         csel    data1, data1, data2, cc        << 
153         rev     data1, data1                   << 
154         sub     tmp1, data1, zeroones          << 
155         orr     tmp2, data1, REP8_7f           << 
156         bic     has_nul1, tmp1, tmp2           << 
157 #else                                          << 
158         csel    has_nul1, has_nul1, has_nul2,  << 
159 #endif                                         << 
160         sub     len, src, srcin                << 
161         rev     has_nul1, has_nul1             << 
162         add     tmp2, len, 8                   << 
163         clz     tmp1, has_nul1                 << 
164         csel    len, len, tmp2, cc             << 
165         add     len, len, tmp1, lsr 3          << 
166         ret                                    << 
167                                                << 
168 L(nonascii_loop):                              << 
169         ldp     data1, data2, [src, 16]!       << 
170         sub     tmp1, data1, zeroones          << 
171         orr     tmp2, data1, REP8_7f           << 
172         sub     tmp3, data2, zeroones          << 
173         orr     tmp4, data2, REP8_7f           << 
174         bics    has_nul1, tmp1, tmp2           << 
175         bic     has_nul2, tmp3, tmp4           << 
176         ccmp    has_nul2, 0, 0, eq             << 
177         bne     L(tail)                        << 
178         ldp     data1, data2, [src, 16]!       << 
179         sub     tmp1, data1, zeroones          << 
180         orr     tmp2, data1, REP8_7f           << 
181         sub     tmp3, data2, zeroones          << 
182         orr     tmp4, data2, REP8_7f           << 
183         bics    has_nul1, tmp1, tmp2           << 
184         bic     has_nul2, tmp3, tmp4           << 
185         ccmp    has_nul2, 0, 0, eq             << 
186         beq     L(nonascii_loop)               << 
187         b       L(tail)                        << 
188                                                << 
189         /* Load 16 bytes from [srcin & ~15] an << 
190            srcin to 0x7f, so we ignore any NUL << 
191            Then continue in the aligned loop.  << 
192 L(page_cross):                                 << 
193         bic     src, srcin, 15                 << 
194         ldp     data1, data2, [src]            << 
195         lsl     tmp1, srcin, 3                 << 
196         mov     tmp4, -1                       << 
197 #ifdef __AARCH64EB__                           << 
198         /* Big-endian.  Early bytes are at MSB << 
199         lsr     tmp1, tmp4, tmp1        /* Shi << 
200 #else                                          << 
201         /* Little-endian.  Early bytes are at  << 
202         lsl     tmp1, tmp4, tmp1        /* Shi << 
203 #endif                                         << 
204         orr     tmp1, tmp1, REP8_80            << 
205         orn     data1, data1, tmp1             << 
206         orn     tmp2, data2, tmp1              << 
207         tst     srcin, 8                       << 
208         csel    data1, data1, tmp4, eq         << 
209         csel    data2, data2, tmp2, eq         << 
210         b       L(page_cross_entry)            << 
211 SYM_FUNC_END(__pi_strlen)                      << 
212 SYM_FUNC_ALIAS_WEAK(strlen, __pi_strlen)       << 
213 EXPORT_SYMBOL_NOKASAN(strlen)                  << 
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php