1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (c) 2013-2021, Arm Limited. 4 * 5 * Adapted from the original at: 6 * https://github.com/ARM-software/optimized-r 7 */ 8 9 #include <linux/linkage.h> 10 #include <asm/assembler.h> 11 12 /* Assumptions: 13 * 14 * ARMv8-a, AArch64, unaligned accesses. 15 */ 16 17 #define L(label) .L ## label 18 19 /* Parameters and result. */ 20 #define src1 x0 21 #define src2 x1 22 #define limit x2 23 #define result w0 24 25 /* Internal variables. */ 26 #define data1 x3 27 #define data1w w3 28 #define data1h x4 29 #define data2 x5 30 #define data2w w5 31 #define data2h x6 32 #define tmp1 x7 33 #define tmp2 x8 34 35 SYM_FUNC_START(__pi_memcmp) 36 subs limit, limit, 8 37 b.lo L(less8) 38 39 ldr data1, [src1], 8 40 ldr data2, [src2], 8 41 cmp data1, data2 42 b.ne L(return) 43 44 subs limit, limit, 8 45 b.gt L(more16) 46 47 ldr data1, [src1, limit] 48 ldr data2, [src2, limit] 49 b L(return) 50 51 L(more16): 52 ldr data1, [src1], 8 53 ldr data2, [src2], 8 54 cmp data1, data2 55 bne L(return) 56 57 /* Jump directly to comparing the last 58 strings. */ 59 subs limit, limit, 16 60 b.ls L(last_bytes) 61 62 /* We overlap loads between 0-32 bytes 63 try to align, so limit it only to s 64 cmp limit, 96 65 b.ls L(loop16) 66 67 /* Align src1 and adjust src2 with byt 68 and tmp1, src1, 15 69 add limit, limit, tmp1 70 sub src1, src1, tmp1 71 sub src2, src2, tmp1 72 73 /* Loop performing 16 bytes per iterat 74 Limit is pre-decremented by 16 and 75 Exit if <= 16 bytes left to do or i 76 .p2align 4 77 L(loop16): 78 ldp data1, data1h, [src1], 16 79 ldp data2, data2h, [src2], 16 80 subs limit, limit, 16 81 ccmp data1, data2, 0, hi 82 ccmp data1h, data2h, 0, eq 83 b.eq L(loop16) 84 85 cmp data1, data2 86 bne L(return) 87 mov data1, data1h 88 mov data2, data2h 89 cmp data1, data2 90 bne L(return) 91 92 /* Compare last 1-16 bytes using unali 93 L(last_bytes): 94 add src1, src1, limit 95 add src2, src2, limit 96 ldp data1, data1h, [src1] 97 ldp data2, data2h, [src2] 98 cmp data1, data2 99 bne L(return) 100 mov data1, data1h 101 mov data2, data2h 102 cmp data1, data2 103 104 /* Compare data bytes and set return v 105 L(return): 106 #ifndef __AARCH64EB__ 107 rev data1, data1 108 rev data2, data2 109 #endif 110 cmp data1, data2 111 L(ret_eq): 112 cset result, ne 113 cneg result, result, lo 114 ret 115 116 .p2align 4 117 /* Compare up to 8 bytes. Limit is [- 118 L(less8): 119 adds limit, limit, 4 120 b.lo L(less4) 121 ldr data1w, [src1], 4 122 ldr data2w, [src2], 4 123 cmp data1w, data2w 124 b.ne L(return) 125 sub limit, limit, 4 126 L(less4): 127 adds limit, limit, 4 128 beq L(ret_eq) 129 L(byte_loop): 130 ldrb data1w, [src1], 1 131 ldrb data2w, [src2], 1 132 subs limit, limit, 1 133 ccmp data1w, data2w, 0, ne /* NZC 134 b.eq L(byte_loop) 135 sub result, data1w, data2w 136 ret 137 SYM_FUNC_END(__pi_memcmp) 138 SYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp) 139 EXPORT_SYMBOL_NOKASAN(memcmp)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.