~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/lib/strnlen.S

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0-only */
  2 /*
  3  * Copyright (C) 2013 ARM Ltd.
  4  * Copyright (C) 2013 Linaro.
  5  *
  6  * This code is based on glibc cortex strings work originally authored by Linaro
  7  * be found @
  8  *
  9  * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
 10  * files/head:/src/aarch64/
 11  */
 12 
 13 #include <linux/linkage.h>
 14 #include <asm/assembler.h>
 15 
 16 /*
 17  * determine the length of a fixed-size string
 18  *
 19  * Parameters:
 20  *      x0 - const string pointer
 21  *      x1 - maximal string length
 22  * Returns:
 23  *      x0 - the return length of specific string
 24  */
 25 
 26 /* Arguments and results.  */
 27 srcin           .req    x0
 28 len             .req    x0
 29 limit           .req    x1
 30 
 31 /* Locals and temporaries.  */
 32 src             .req    x2
 33 data1           .req    x3
 34 data2           .req    x4
 35 data2a          .req    x5
 36 has_nul1        .req    x6
 37 has_nul2        .req    x7
 38 tmp1            .req    x8
 39 tmp2            .req    x9
 40 tmp3            .req    x10
 41 tmp4            .req    x11
 42 zeroones        .req    x12
 43 pos             .req    x13
 44 limit_wd        .req    x14
 45 
 46 #define REP8_01 0x0101010101010101
 47 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 48 #define REP8_80 0x8080808080808080
 49 
 50 SYM_FUNC_START(__pi_strnlen)
 51         cbz     limit, .Lhit_limit
 52         mov     zeroones, #REP8_01
 53         bic     src, srcin, #15
 54         ands    tmp1, srcin, #15
 55         b.ne    .Lmisaligned
 56         /* Calculate the number of full and partial words -1.  */
 57         sub     limit_wd, limit, #1 /* Limit != 0, so no underflow.  */
 58         lsr     limit_wd, limit_wd, #4  /* Convert to Qwords.  */
 59 
 60         /*
 61         * NUL detection works on the principle that (X - 1) & (~X) & 0x80
 62         * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
 63         * can be done in parallel across the entire word.
 64         */
 65         /*
 66         * The inner loop deals with two Dwords at a time.  This has a
 67         * slightly higher start-up cost, but we should win quite quickly,
 68         * especially on cores with a high number of issue slots per
 69         * cycle, as we get much better parallelism out of the operations.
 70         */
 71 .Lloop:
 72         ldp     data1, data2, [src], #16
 73 .Lrealigned:
 74         sub     tmp1, data1, zeroones
 75         orr     tmp2, data1, #REP8_7f
 76         sub     tmp3, data2, zeroones
 77         orr     tmp4, data2, #REP8_7f
 78         bic     has_nul1, tmp1, tmp2
 79         bic     has_nul2, tmp3, tmp4
 80         subs    limit_wd, limit_wd, #1
 81         orr     tmp1, has_nul1, has_nul2
 82         ccmp    tmp1, #0, #0, pl    /* NZCV = 0000  */
 83         b.eq    .Lloop
 84 
 85         cbz     tmp1, .Lhit_limit   /* No null in final Qword.  */
 86 
 87         /*
 88         * We know there's a null in the final Qword. The easiest thing
 89         * to do now is work out the length of the string and return
 90         * MIN (len, limit).
 91         */
 92         sub     len, src, srcin
 93         cbz     has_nul1, .Lnul_in_data2
 94 CPU_BE( mov     data2, data1 )  /*perpare data to re-calculate the syndrome*/
 95 
 96         sub     len, len, #8
 97         mov     has_nul2, has_nul1
 98 .Lnul_in_data2:
 99         /*
100         * For big-endian, carry propagation (if the final byte in the
101         * string is 0x01) means we cannot use has_nul directly.  The
102         * easiest way to get the correct byte is to byte-swap the data
103         * and calculate the syndrome a second time.
104         */
105 CPU_BE( rev     data2, data2 )
106 CPU_BE( sub     tmp1, data2, zeroones )
107 CPU_BE( orr     tmp2, data2, #REP8_7f )
108 CPU_BE( bic     has_nul2, tmp1, tmp2 )
109 
110         sub     len, len, #8
111         rev     has_nul2, has_nul2
112         clz     pos, has_nul2
113         add     len, len, pos, lsr #3       /* Bits to bytes.  */
114         cmp     len, limit
115         csel    len, len, limit, ls     /* Return the lower value.  */
116         ret
117 
118 .Lmisaligned:
119         /*
120         * Deal with a partial first word.
121         * We're doing two things in parallel here;
122         * 1) Calculate the number of words (but avoiding overflow if
123         * limit is near ULONG_MAX) - to do this we need to work out
124         * limit + tmp1 - 1 as a 65-bit value before shifting it;
125         * 2) Load and mask the initial data words - we force the bytes
126         * before the ones we are interested in to 0xff - this ensures
127         * early bytes will not hit any zero detection.
128         */
129         ldp     data1, data2, [src], #16
130 
131         sub     limit_wd, limit, #1
132         and     tmp3, limit_wd, #15
133         lsr     limit_wd, limit_wd, #4
134 
135         add     tmp3, tmp3, tmp1
136         add     limit_wd, limit_wd, tmp3, lsr #4
137 
138         neg     tmp4, tmp1
139         lsl     tmp4, tmp4, #3  /* Bytes beyond alignment -> bits.  */
140 
141         mov     tmp2, #~0
142         /* Big-endian.  Early bytes are at MSB.  */
143 CPU_BE( lsl     tmp2, tmp2, tmp4 )      /* Shift (tmp1 & 63).  */
144         /* Little-endian.  Early bytes are at LSB.  */
145 CPU_LE( lsr     tmp2, tmp2, tmp4 )      /* Shift (tmp1 & 63).  */
146 
147         cmp     tmp1, #8
148 
149         orr     data1, data1, tmp2
150         orr     data2a, data2, tmp2
151 
152         csinv   data1, data1, xzr, le
153         csel    data2, data2, data2a, le
154         b       .Lrealigned
155 
156 .Lhit_limit:
157         mov     len, limit
158         ret
159 SYM_FUNC_END(__pi_strnlen)
160 
161 SYM_FUNC_ALIAS_WEAK(strnlen, __pi_strnlen)
162 EXPORT_SYMBOL_NOKASAN(strnlen)

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php