1 /* SPDX-License-Identifier: GPL-2.0-only */ !! 1 #include <asm/cprefix.h> 2 /* << 3 * Copyright (c) 2013-2021, Arm Limited. << 4 * << 5 * Adapted from the original at: << 6 * https://github.com/ARM-software/optimized-r << 7 */ << 8 << 9 #include <linux/linkage.h> << 10 #include <asm/assembler.h> << 11 << 12 /* Assumptions: << 13 * << 14 * ARMv8-a, AArch64, unaligned accesses. << 15 */ << 16 << 17 #define L(label) .L ## label << 18 << 19 /* Parameters and result. */ << 20 #define src1 x0 << 21 #define src2 x1 << 22 #define limit x2 << 23 #define result w0 << 24 << 25 /* Internal variables. */ << 26 #define data1 x3 << 27 #define data1w w3 << 28 #define data1h x4 << 29 #define data2 x5 << 30 #define data2w w5 << 31 #define data2h x6 << 32 #define tmp1 x7 << 33 #define tmp2 x8 << 34 << 35 SYM_FUNC_START(__pi_memcmp) << 36 subs limit, limit, 8 << 37 b.lo L(less8) << 38 << 39 ldr data1, [src1], 8 << 40 ldr data2, [src2], 8 << 41 cmp data1, data2 << 42 b.ne L(return) << 43 << 44 subs limit, limit, 8 << 45 b.gt L(more16) << 46 << 47 ldr data1, [src1, limit] << 48 ldr data2, [src2, limit] << 49 b L(return) << 50 << 51 L(more16): << 52 ldr data1, [src1], 8 << 53 ldr data2, [src2], 8 << 54 cmp data1, data2 << 55 bne L(return) << 56 << 57 /* Jump directly to comparing the last << 58 strings. */ << 59 subs limit, limit, 16 << 60 b.ls L(last_bytes) << 61 << 62 /* We overlap loads between 0-32 bytes << 63 try to align, so limit it only to s << 64 cmp limit, 96 << 65 b.ls L(loop16) << 66 << 67 /* Align src1 and adjust src2 with byt << 68 and tmp1, src1, 15 << 69 add limit, limit, tmp1 << 70 sub src1, src1, tmp1 << 71 sub src2, src2, tmp1 << 72 << 73 /* Loop performing 16 bytes per iterat << 74 Limit is pre-decremented by 16 and << 75 Exit if <= 16 bytes left to do or i << 76 .p2align 4 << 77 L(loop16): << 78 ldp data1, data1h, [src1], 16 << 79 ldp data2, data2h, [src2], 16 << 80 subs limit, limit, 16 << 81 ccmp data1, data2, 0, hi << 82 ccmp data1h, data2h, 0, eq << 83 b.eq L(loop16) << 84 << 85 cmp data1, data2 << 86 bne L(return) << 87 mov data1, data1h << 88 mov data2, data2h << 89 cmp data1, data2 << 90 bne L(return) << 91 << 92 /* Compare last 1-16 bytes using unali << 93 L(last_bytes): << 94 add src1, src1, limit << 95 add src2, src2, limit << 96 ldp data1, data1h, [src1] << 97 ldp data2, data2h, [src2] << 98 cmp data1, data2 << 99 bne L(return) << 100 mov data1, data1h << 101 mov data2, data2h << 102 cmp data1, data2 << 103 << 104 /* Compare data bytes and set return v << 105 L(return): << 106 #ifndef __AARCH64EB__ << 107 rev data1, data1 << 108 rev data2, data2 << 109 #endif << 110 cmp data1, data2 << 111 L(ret_eq): << 112 cset result, ne << 113 cneg result, result, lo << 114 ret << 115 2 116 .p2align 4 !! 3 .text 117 /* Compare up to 8 bytes. Limit is [- !! 4 .align 4 118 L(less8): !! 5 .global C_LABEL(__memcmp), C_LABEL(memcmp) 119 adds limit, limit, 4 !! 6 C_LABEL(__memcmp): 120 b.lo L(less4) !! 7 C_LABEL(memcmp): 121 ldr data1w, [src1], 4 !! 8 #if 1 122 ldr data2w, [src2], 4 !! 9 cmp %o2, 0 123 cmp data1w, data2w !! 10 ble L3 124 b.ne L(return) !! 11 mov 0, %g3 125 sub limit, limit, 4 !! 12 L5: 126 L(less4): !! 13 ldub [%o0], %g2 127 adds limit, limit, 4 !! 14 ldub [%o1], %g3 128 beq L(ret_eq) !! 15 sub %g2, %g3, %g2 129 L(byte_loop): !! 16 mov %g2, %g3 130 ldrb data1w, [src1], 1 !! 17 sll %g2, 24, %g2 131 ldrb data2w, [src2], 1 !! 18 132 subs limit, limit, 1 !! 19 cmp %g2, 0 133 ccmp data1w, data2w, 0, ne /* NZC !! 20 bne L3 134 b.eq L(byte_loop) !! 21 add %o0, 1, %o0 135 sub result, data1w, data2w !! 22 >> 23 add %o2, -1, %o2 >> 24 >> 25 cmp %o2, 0 >> 26 bg L5 >> 27 add %o1, 1, %o1 >> 28 L3: >> 29 sll %g3, 24, %o0 >> 30 sra %o0, 24, %o0 >> 31 >> 32 retl >> 33 nop >> 34 #else >> 35 save %sp, -104, %sp >> 36 mov %i2, %o4 >> 37 mov %i0, %o0 >> 38 >> 39 cmp %o4, 15 >> 40 ble L72 >> 41 mov %i1, %i2 >> 42 >> 43 andcc %i2, 3, %g0 >> 44 be L161 >> 45 andcc %o0, 3, %g2 >> 46 L75: >> 47 ldub [%o0], %g3 >> 48 ldub [%i2], %g2 >> 49 add %o0,1, %o0 >> 50 >> 51 subcc %g3, %g2, %i0 >> 52 bne L156 >> 53 add %i2, 1, %i2 >> 54 >> 55 andcc %i2, 3, %g0 >> 56 bne L75 >> 57 add %o4, -1, %o4 >> 58 >> 59 andcc %o0, 3, %g2 >> 60 L161: >> 61 bne,a L78 >> 62 mov %i2, %i1 >> 63 >> 64 mov %o0, %i5 >> 65 mov %i2, %i3 >> 66 srl %o4, 2, %i4 >> 67 >> 68 cmp %i4, 0 >> 69 bge L93 >> 70 mov %i4, %g2 >> 71 >> 72 add %i4, 3, %g2 >> 73 L93: >> 74 sra %g2, 2, %g2 >> 75 sll %g2, 2, %g2 >> 76 sub %i4, %g2, %g2 >> 77 >> 78 cmp %g2, 1 >> 79 be,a L88 >> 80 add %o0, 4, %i5 >> 81 >> 82 bg L94 >> 83 cmp %g2, 2 >> 84 >> 85 cmp %g2, 0 >> 86 be,a L86 >> 87 ld [%o0], %g3 >> 88 >> 89 b L162 >> 90 ld [%i5], %g3 >> 91 L94: >> 92 be L81 >> 93 cmp %g2, 3 >> 94 >> 95 be,a L83 >> 96 add %o0, -4, %i5 >> 97 >> 98 b L162 >> 99 ld [%i5], %g3 >> 100 L81: >> 101 add %o0, -8, %i5 >> 102 ld [%o0], %g3 >> 103 add %i2, -8, %i3 >> 104 ld [%i2], %g2 >> 105 >> 106 b L82 >> 107 add %i4, 2, %i4 >> 108 L83: >> 109 ld [%o0], %g4 >> 110 add %i2, -4, %i3 >> 111 ld [%i2], %g1 >> 112 >> 113 b L84 >> 114 add %i4, 1, %i4 >> 115 L86: >> 116 b L87 >> 117 ld [%i2], %g2 >> 118 L88: >> 119 add %i2, 4, %i3 >> 120 ld [%o0], %g4 >> 121 add %i4, -1, %i4 >> 122 ld [%i2], %g1 >> 123 L95: >> 124 ld [%i5], %g3 >> 125 L162: >> 126 cmp %g4, %g1 >> 127 be L87 >> 128 ld [%i3], %g2 >> 129 >> 130 cmp %g4, %g1 >> 131 L163: >> 132 bleu L114 >> 133 mov -1, %i0 >> 134 >> 135 b L114 >> 136 mov 1, %i0 >> 137 L87: >> 138 ld [%i5 + 4], %g4 >> 139 cmp %g3, %g2 >> 140 bne L163 >> 141 ld [%i3 + 4], %g1 >> 142 L84: >> 143 ld [%i5 + 8], %g3 >> 144 >> 145 cmp %g4, %g1 >> 146 bne L163 >> 147 ld [%i3 + 8], %g2 >> 148 L82: >> 149 ld [%i5 + 12], %g4 >> 150 cmp %g3, %g2 >> 151 bne L163 >> 152 ld [%i3 + 12], %g1 >> 153 >> 154 add %i5, 16, %i5 >> 155 >> 156 addcc %i4, -4, %i4 >> 157 bne L95 >> 158 add %i3, 16, %i3 >> 159 >> 160 cmp %g4, %g1 >> 161 bne L163 >> 162 nop >> 163 >> 164 b L114 >> 165 mov 0, %i0 >> 166 L78: >> 167 srl %o4, 2, %i0 >> 168 and %o0, -4, %i3 >> 169 orcc %i0, %g0, %g3 >> 170 sll %g2, 3, %o7 >> 171 mov 32, %g2 >> 172 >> 173 bge L129 >> 174 sub %g2, %o7, %o1 >> 175 >> 176 add %i0, 3, %g3 >> 177 L129: >> 178 sra %g3, 2, %g2 >> 179 sll %g2, 2, %g2 >> 180 sub %i0, %g2, %g2 >> 181 >> 182 cmp %g2, 1 >> 183 be,a L124 >> 184 ld [%i3], %o3 >> 185 >> 186 bg L130 >> 187 cmp %g2, 2 >> 188 >> 189 cmp %g2, 0 >> 190 be,a L122 >> 191 ld [%i3], %o2 >> 192 >> 193 b L164 >> 194 sll %o3, %o7, %g3 >> 195 L130: >> 196 be L117 >> 197 cmp %g2, 3 >> 198 >> 199 be,a L119 >> 200 ld [%i3], %g1 >> 201 >> 202 b L164 >> 203 sll %o3, %o7, %g3 >> 204 L117: >> 205 ld [%i3], %g4 >> 206 add %i2, -8, %i1 >> 207 ld [%i3 + 4], %o3 >> 208 add %i0, 2, %i0 >> 209 ld [%i2], %i4 >> 210 >> 211 b L118 >> 212 add %i3, -4, %i3 >> 213 L119: >> 214 ld [%i3 + 4], %g4 >> 215 add %i2, -4, %i1 >> 216 ld [%i2], %i5 >> 217 >> 218 b L120 >> 219 add %i0, 1, %i0 >> 220 L122: >> 221 ld [%i3 + 4], %g1 >> 222 ld [%i2], %i4 >> 223 >> 224 b L123 >> 225 add %i3, 4, %i3 >> 226 L124: >> 227 add %i2, 4, %i1 >> 228 ld [%i3 + 4], %o2 >> 229 add %i0, -1, %i0 >> 230 ld [%i2], %i5 >> 231 add %i3, 8, %i3 >> 232 L131: >> 233 sll %o3, %o7, %g3 >> 234 L164: >> 235 srl %o2, %o1, %g2 >> 236 ld [%i3], %g1 >> 237 or %g3, %g2, %g3 >> 238 >> 239 cmp %g3, %i5 >> 240 bne L163 >> 241 ld [%i1], %i4 >> 242 L123: >> 243 sll %o2, %o7, %g3 >> 244 srl %g1, %o1, %g2 >> 245 ld [%i3 + 4], %g4 >> 246 or %g3, %g2, %g3 >> 247 >> 248 cmp %g3, %i4 >> 249 bne L163 >> 250 ld [%i1 + 4], %i5 >> 251 L120: >> 252 sll %g1, %o7, %g3 >> 253 srl %g4, %o1, %g2 >> 254 ld [%i3 + 8], %o3 >> 255 or %g3, %g2, %g3 >> 256 >> 257 cmp %g3, %i5 >> 258 bne L163 >> 259 ld [%i1 + 8], %i4 >> 260 L118: >> 261 sll %g4, %o7, %g3 >> 262 srl %o3, %o1, %g2 >> 263 ld [%i3 + 12], %o2 >> 264 or %g3, %g2, %g3 >> 265 >> 266 cmp %g3, %i4 >> 267 bne L163 >> 268 ld [%i1 + 12], %i5 >> 269 >> 270 add %i3, 16, %i3 >> 271 addcc %i0, -4, %i0 >> 272 bne L131 >> 273 add %i1, 16, %i1 >> 274 >> 275 sll %o3, %o7, %g3 >> 276 srl %o2, %o1, %g2 >> 277 or %g3, %g2, %g3 >> 278 >> 279 cmp %g3, %i5 >> 280 be,a L114 >> 281 mov 0, %i0 >> 282 >> 283 b,a L163 >> 284 L114: >> 285 cmp %i0, 0 >> 286 bne L156 >> 287 and %o4, -4, %g2 >> 288 >> 289 add %o0, %g2, %o0 >> 290 add %i2, %g2, %i2 >> 291 and %o4, 3, %o4 >> 292 L72: >> 293 cmp %o4, 0 >> 294 be L156 >> 295 mov 0, %i0 >> 296 >> 297 ldub [%o0], %g3 >> 298 L165: >> 299 ldub [%i2], %g2 >> 300 add %o0, 1, %o0 >> 301 >> 302 subcc %g3, %g2, %i0 >> 303 bne L156 >> 304 add %i2, 1, %i2 >> 305 >> 306 addcc %o4, -1, %o4 >> 307 bne,a L165 >> 308 ldub [%o0], %g3 >> 309 >> 310 mov 0, %i0 >> 311 L156: 136 ret 312 ret 137 SYM_FUNC_END(__pi_memcmp) !! 313 restore 138 SYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp) !! 314 #endif 139 EXPORT_SYMBOL_NOKASAN(memcmp) <<
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.