1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (c) 2012-2022, Arm Limited. 4 * 5 * Adapted from the original at: 6 * https://github.com/ARM-software/optimized-r 7 */ 8 9 #include <linux/linkage.h> 10 #include <asm/assembler.h> 11 12 /* Assumptions: 13 * 14 * ARMv8-a, AArch64. 15 * MTE compatible. 16 */ 17 18 #define L(label) .L ## label 19 20 #define REP8_01 0x0101010101010101 21 #define REP8_7f 0x7f7f7f7f7f7f7f7f 22 23 #define src1 x0 24 #define src2 x1 25 #define result x0 26 27 #define data1 x2 28 #define data1w w2 29 #define data2 x3 30 #define data2w w3 31 #define has_nul x4 32 #define diff x5 33 #define off1 x5 34 #define syndrome x6 35 #define tmp x6 36 #define data3 x7 37 #define zeroones x8 38 #define shift x9 39 #define off2 x10 40 41 /* On big-endian early bytes are at MSB and on 42 LS_FW means shifting towards early bytes. 43 #ifdef __AARCH64EB__ 44 # define LS_FW lsl 45 #else 46 # define LS_FW lsr 47 #endif 48 49 /* NUL detection works on the principle that ( 50 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff 51 can be done in parallel across the entire w 52 Since carry propagation makes 0x1 bytes bef 53 NUL too in big-endian, byte-reverse the dat 54 55 56 SYM_FUNC_START(__pi_strcmp) 57 sub off2, src2, src1 58 mov zeroones, REP8_01 59 and tmp, src1, 7 60 tst off2, 7 61 b.ne L(misaligned8) 62 cbnz tmp, L(mutual_align) 63 64 .p2align 4 65 66 L(loop_aligned): 67 ldr data2, [src1, off2] 68 ldr data1, [src1], 8 69 L(start_realigned): 70 #ifdef __AARCH64EB__ 71 rev tmp, data1 72 sub has_nul, tmp, zeroones 73 orr tmp, tmp, REP8_7f 74 #else 75 sub has_nul, data1, zeroones 76 orr tmp, data1, REP8_7f 77 #endif 78 bics has_nul, has_nul, tmp /* Non 79 ccmp data1, data2, 0, eq 80 b.eq L(loop_aligned) 81 #ifdef __AARCH64EB__ 82 rev has_nul, has_nul 83 #endif 84 eor diff, data1, data2 85 orr syndrome, diff, has_nul 86 L(end): 87 #ifndef __AARCH64EB__ 88 rev syndrome, syndrome 89 rev data1, data1 90 rev data2, data2 91 #endif 92 clz shift, syndrome 93 /* The most-significant-non-zero bit o 94 first bit that is different, or the 95 Shifting left now will bring the cr 96 top bits. */ 97 lsl data1, data1, shift 98 lsl data2, data2, shift 99 /* But we need to zero-extend (char is 100 perform a signed 32-bit subtraction 101 lsr data1, data1, 56 102 sub result, data1, data2, lsr 56 103 ret 104 105 .p2align 4 106 107 L(mutual_align): 108 /* Sources are mutually aligned, but a 109 alignment boundary. Round down the 110 the bytes that precede the start po 111 bic src1, src1, 7 112 ldr data2, [src1, off2] 113 ldr data1, [src1], 8 114 neg shift, src2, lsl 3 /* Bit 115 mov tmp, -1 116 LS_FW tmp, tmp, shift 117 orr data1, data1, tmp 118 orr data2, data2, tmp 119 b L(start_realigned) 120 121 L(misaligned8): 122 /* Align SRC1 to 8 bytes and then comp 123 checking to make sure that we don't 124 cbz tmp, L(src1_aligned) 125 L(do_misaligned): 126 ldrb data1w, [src1], 1 127 ldrb data2w, [src2], 1 128 cmp data1w, 0 129 ccmp data1w, data2w, 0, ne /* NZC 130 b.ne L(done) 131 tst src1, 7 132 b.ne L(do_misaligned) 133 134 L(src1_aligned): 135 neg shift, src2, lsl 3 136 bic src2, src2, 7 137 ldr data3, [src2], 8 138 #ifdef __AARCH64EB__ 139 rev data3, data3 140 #endif 141 lsr tmp, zeroones, shift 142 orr data3, data3, tmp 143 sub has_nul, data3, zeroones 144 orr tmp, data3, REP8_7f 145 bics has_nul, has_nul, tmp 146 b.ne L(tail) 147 148 sub off1, src2, src1 149 150 .p2align 4 151 152 L(loop_unaligned): 153 ldr data3, [src1, off1] 154 ldr data2, [src1, off2] 155 #ifdef __AARCH64EB__ 156 rev data3, data3 157 #endif 158 sub has_nul, data3, zeroones 159 orr tmp, data3, REP8_7f 160 ldr data1, [src1], 8 161 bics has_nul, has_nul, tmp 162 ccmp data1, data2, 0, eq 163 b.eq L(loop_unaligned) 164 165 lsl tmp, has_nul, shift 166 #ifdef __AARCH64EB__ 167 rev tmp, tmp 168 #endif 169 eor diff, data1, data2 170 orr syndrome, diff, tmp 171 cbnz syndrome, L(end) 172 L(tail): 173 ldr data1, [src1] 174 neg shift, shift 175 lsr data2, data3, shift 176 lsr has_nul, has_nul, shift 177 #ifdef __AARCH64EB__ 178 rev data2, data2 179 rev has_nul, has_nul 180 #endif 181 eor diff, data1, data2 182 orr syndrome, diff, has_nul 183 b L(end) 184 185 L(done): 186 sub result, data1, data2 187 ret 188 SYM_FUNC_END(__pi_strcmp) 189 SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp) 190 EXPORT_SYMBOL_NOKASAN(strcmp)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.