~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/lib/strlen.S

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/arm64/lib/strlen.S (Architecture sparc64) and /arch/sparc/lib/strlen.S (Architecture sparc)


  1 /* SPDX-License-Identifier: GPL-2.0-only */    !!   1 /* SPDX-License-Identifier: GPL-2.0 */
  2 /*                                             !!   2 /* strlen.S: Sparc optimized strlen code
  3  * Copyright (c) 2013-2021, Arm Limited.       !!   3  * Hand optimized from GNU libc's strlen
  4  *                                             !!   4  * Copyright (C) 1991,1996 Free Software Foundation
  5  * Adapted from the original at:               !!   5  * Copyright (C) 1996,2008 David S. Miller (davem@davemloft.net)
  6  * https://github.com/ARM-software/optimized-r !!   6  * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  7  */                                                 7  */
  8                                                     8 
                                                   >>   9 #include <linux/export.h>
  9 #include <linux/linkage.h>                         10 #include <linux/linkage.h>
 10 #include <asm/assembler.h>                     !!  11 #include <asm/asm.h>
 11 #include <asm/mte-def.h>                       << 
 12                                                    12 
 13 /* Assumptions:                                !!  13 #define LO_MAGIC 0x01010101
 14  *                                             !!  14 #define HI_MAGIC 0x80808080
 15  * ARMv8-a, AArch64, unaligned accesses, min p << 
 16  */                                            << 
 17                                                << 
 18 #define L(label) .L ## label                   << 
 19                                                << 
 20 /* Arguments and results.  */                  << 
 21 #define srcin           x0                     << 
 22 #define len             x0                     << 
 23                                                << 
 24 /* Locals and temporaries.  */                 << 
 25 #define src             x1                     << 
 26 #define data1           x2                     << 
 27 #define data2           x3                     << 
 28 #define has_nul1        x4                     << 
 29 #define has_nul2        x5                     << 
 30 #define tmp1            x4                     << 
 31 #define tmp2            x5                     << 
 32 #define tmp3            x6                     << 
 33 #define tmp4            x7                     << 
 34 #define zeroones        x8                     << 
 35                                                << 
 36         /* NUL detection works on the principl << 
 37            (=> (X - 1) & ~(X | 0x7f)) is non-z << 
 38            can be done in parallel across the  << 
 39            (X - 1) & 0x80 is zero for non-NUL  << 
 40            false hits for characters 129..255. << 
 41                                                << 
 42 #define REP8_01 0x0101010101010101             << 
 43 #define REP8_7f 0x7f7f7f7f7f7f7f7f             << 
 44 #define REP8_80 0x8080808080808080             << 
 45                                                << 
 46 /*                                             << 
 47  * When KASAN_HW_TAGS is in use, memory is che << 
 48  * (16-byte) granularity, and we must ensure t << 
 49  * alignment boundary.                         << 
 50  */                                            << 
 51 #ifdef CONFIG_KASAN_HW_TAGS                    << 
 52 #define MIN_PAGE_SIZE MTE_GRANULE_SIZE         << 
 53 #else                                          << 
 54 #define MIN_PAGE_SIZE 4096                     << 
 55 #endif                                         << 
 56                                                << 
 57         /* Since strings are short on average, << 
 58            of the string for a NUL character.  << 
 59            safely we have to do a page cross c << 
 60            byte we calculate the length from t << 
 61            conditional select to reduce branch << 
 62            strlen will be repeatedly called on << 
 63                                                    15 
 64            If the string is longer than 16 byt !!  16         .text
 65            further page cross checks, and proc !!  17 ENTRY(strlen)
 66            using the fast NUL check.  If we en !!  18         mov     %o0, %o1
 67            fallback to a second loop using the !!  19         andcc   %o0, 3, %g0
 68                                                !!  20         BRANCH32(be, pt, 9f)
 69            If the page cross check fails, we r !!  21          sethi  %hi(HI_MAGIC), %o4
 70            address, remove any characters befo !!  22         ldub    [%o0], %o5
 71            in the main loop using aligned load !!  23         BRANCH_REG_ZERO(pn, %o5, 11f)
 72            page in the first 16 bytes are rare !!  24          add    %o0, 1, %o0
 73            16/MIN_PAGE_SIZE ~= 0.4%), this cas !!  25         andcc   %o0, 3, %g0
 74                                                !!  26         BRANCH32(be, pn, 4f)
 75            AArch64 systems have a minimum page !!  27          or     %o4, %lo(HI_MAGIC), %o3
 76            checking for larger page sizes - th !!  28         ldub    [%o0], %o5
 77            page size is just not worth the ext !!  29         BRANCH_REG_ZERO(pn, %o5, 12f)
 78            the cases taking the slow path.  No !!  30          add    %o0, 1, %o0
 79            whether the first fetch, which may  !!  31         andcc   %o0, 3, %g0
 80            boundary.  */                       !!  32         BRANCH32(be, pt, 5f)
 81                                                !!  33          sethi  %hi(LO_MAGIC), %o4
 82 SYM_FUNC_START(__pi_strlen)                    !!  34         ldub    [%o0], %o5
 83         and     tmp1, srcin, MIN_PAGE_SIZE - 1 !!  35         BRANCH_REG_ZERO(pn, %o5, 13f)
 84         mov     zeroones, REP8_01              !!  36          add    %o0, 1, %o0
 85         cmp     tmp1, MIN_PAGE_SIZE - 16       !!  37         BRANCH32(ba, pt, 8f)
 86         b.gt    L(page_cross)                  !!  38          or     %o4, %lo(LO_MAGIC), %o2
 87         ldp     data1, data2, [srcin]          !!  39 9:
 88 #ifdef __AARCH64EB__                           !!  40         or      %o4, %lo(HI_MAGIC), %o3
 89         /* For big-endian, carry propagation ( !!  41 4:
 90            string is 0x01) means we cannot use !!  42         sethi   %hi(LO_MAGIC), %o4
 91            Since we expect strings to be small !!  43 5:
 92            byte-swap the data now so has_null1 !!  44         or      %o4, %lo(LO_MAGIC), %o2
 93         rev     data1, data1                   !!  45 8:
 94         rev     data2, data2                   !!  46         ld      [%o0], %o5
 95 #endif                                         !!  47 2:
 96         sub     tmp1, data1, zeroones          !!  48         sub     %o5, %o2, %o4
 97         orr     tmp2, data1, REP8_7f           !!  49         andcc   %o4, %o3, %g0
 98         sub     tmp3, data2, zeroones          !!  50         BRANCH32(be, pt, 8b)
 99         orr     tmp4, data2, REP8_7f           !!  51          add    %o0, 4, %o0
100         bics    has_nul1, tmp1, tmp2           !!  52 
101         bic     has_nul2, tmp3, tmp4           !!  53         /* Check every byte. */
102         ccmp    has_nul2, 0, 0, eq             !!  54         srl     %o5, 24, %g7
103         beq     L(main_loop_entry)             !!  55         andcc   %g7, 0xff, %g0
104                                                !!  56         BRANCH32(be, pn, 1f)
105         /* Enter with C = has_nul1 == 0.  */   !!  57          add    %o0, -4, %o4
106         csel    has_nul1, has_nul1, has_nul2,  !!  58         srl     %o5, 16, %g7
107         mov     len, 8                         !!  59         andcc   %g7, 0xff, %g0
108         rev     has_nul1, has_nul1             !!  60         BRANCH32(be, pn, 1f)
109         clz     tmp1, has_nul1                 !!  61          add    %o4, 1, %o4
110         csel    len, xzr, len, cc              !!  62         srl     %o5, 8, %g7
111         add     len, len, tmp1, lsr 3          !!  63         andcc   %g7, 0xff, %g0
112         ret                                    !!  64         BRANCH32(be, pn, 1f)
113                                                !!  65          add    %o4, 1, %o4
114         /* The inner loop processes 32 bytes p !!  66         andcc   %o5, 0xff, %g0
115            NUL check.  If we encounter non-ASC !!  67         BRANCH32_ANNUL(bne, pt, 2b)
116            loop with the accurate NUL check.   !!  68          ld     [%o0], %o5
117         .p2align 4                             !!  69         add     %o4, 1, %o4
118 L(main_loop_entry):                            << 
119         bic     src, srcin, 15                 << 
120         sub     src, src, 16                   << 
121 L(main_loop):                                  << 
122         ldp     data1, data2, [src, 32]!       << 
123 L(page_cross_entry):                           << 
124         sub     tmp1, data1, zeroones          << 
125         sub     tmp3, data2, zeroones          << 
126         orr     tmp2, tmp1, tmp3               << 
127         tst     tmp2, zeroones, lsl 7          << 
128         bne     1f                             << 
129         ldp     data1, data2, [src, 16]        << 
130         sub     tmp1, data1, zeroones          << 
131         sub     tmp3, data2, zeroones          << 
132         orr     tmp2, tmp1, tmp3               << 
133         tst     tmp2, zeroones, lsl 7          << 
134         beq     L(main_loop)                   << 
135         add     src, src, 16                   << 
136 1:                                                 70 1:
137         /* The fast check failed, so do the sl !!  71         retl
138         orr     tmp2, data1, REP8_7f           !!  72          sub    %o4, %o1, %o0
139         orr     tmp4, data2, REP8_7f           !!  73 11:
140         bics    has_nul1, tmp1, tmp2           !!  74         retl
141         bic     has_nul2, tmp3, tmp4           !!  75          mov    0, %o0
142         ccmp    has_nul2, 0, 0, eq             !!  76 12:
143         beq     L(nonascii_loop)               !!  77         retl
144                                                !!  78          mov    1, %o0
145         /* Enter with C = has_nul1 == 0.  */   !!  79 13:
146 L(tail):                                       !!  80         retl
147 #ifdef __AARCH64EB__                           !!  81          mov    2, %o0
148         /* For big-endian, carry propagation ( !!  82 ENDPROC(strlen)
149            string is 0x01) means we cannot use !!  83 EXPORT_SYMBOL(strlen)
150            easiest way to get the correct byte << 
151            and calculate the syndrome a second << 
152         csel    data1, data1, data2, cc        << 
153         rev     data1, data1                   << 
154         sub     tmp1, data1, zeroones          << 
155         orr     tmp2, data1, REP8_7f           << 
156         bic     has_nul1, tmp1, tmp2           << 
157 #else                                          << 
158         csel    has_nul1, has_nul1, has_nul2,  << 
159 #endif                                         << 
160         sub     len, src, srcin                << 
161         rev     has_nul1, has_nul1             << 
162         add     tmp2, len, 8                   << 
163         clz     tmp1, has_nul1                 << 
164         csel    len, len, tmp2, cc             << 
165         add     len, len, tmp1, lsr 3          << 
166         ret                                    << 
167                                                << 
168 L(nonascii_loop):                              << 
169         ldp     data1, data2, [src, 16]!       << 
170         sub     tmp1, data1, zeroones          << 
171         orr     tmp2, data1, REP8_7f           << 
172         sub     tmp3, data2, zeroones          << 
173         orr     tmp4, data2, REP8_7f           << 
174         bics    has_nul1, tmp1, tmp2           << 
175         bic     has_nul2, tmp3, tmp4           << 
176         ccmp    has_nul2, 0, 0, eq             << 
177         bne     L(tail)                        << 
178         ldp     data1, data2, [src, 16]!       << 
179         sub     tmp1, data1, zeroones          << 
180         orr     tmp2, data1, REP8_7f           << 
181         sub     tmp3, data2, zeroones          << 
182         orr     tmp4, data2, REP8_7f           << 
183         bics    has_nul1, tmp1, tmp2           << 
184         bic     has_nul2, tmp3, tmp4           << 
185         ccmp    has_nul2, 0, 0, eq             << 
186         beq     L(nonascii_loop)               << 
187         b       L(tail)                        << 
188                                                << 
189         /* Load 16 bytes from [srcin & ~15] an << 
190            srcin to 0x7f, so we ignore any NUL << 
191            Then continue in the aligned loop.  << 
192 L(page_cross):                                 << 
193         bic     src, srcin, 15                 << 
194         ldp     data1, data2, [src]            << 
195         lsl     tmp1, srcin, 3                 << 
196         mov     tmp4, -1                       << 
197 #ifdef __AARCH64EB__                           << 
198         /* Big-endian.  Early bytes are at MSB << 
199         lsr     tmp1, tmp4, tmp1        /* Shi << 
200 #else                                          << 
201         /* Little-endian.  Early bytes are at  << 
202         lsl     tmp1, tmp4, tmp1        /* Shi << 
203 #endif                                         << 
204         orr     tmp1, tmp1, REP8_80            << 
205         orn     data1, data1, tmp1             << 
206         orn     tmp2, data2, tmp1              << 
207         tst     srcin, 8                       << 
208         csel    data1, data1, tmp4, eq         << 
209         csel    data2, data2, tmp2, eq         << 
210         b       L(page_cross_entry)            << 
211 SYM_FUNC_END(__pi_strlen)                      << 
212 SYM_FUNC_ALIAS_WEAK(strlen, __pi_strlen)       << 
213 EXPORT_SYMBOL_NOKASAN(strlen)                  << 
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php