1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (c) 2012-2021, Arm Limited. 4 * 5 * Adapted from the original at: 6 * https://github.com/ARM-software/optimized-r 7 */ 8 9 #include <linux/linkage.h> 10 #include <asm/assembler.h> 11 12 /* Assumptions: 13 * 14 * ARMv8-a, AArch64, unaligned accesses. 15 * 16 */ 17 18 #define L(label) .L ## label 19 20 #define dstin x0 21 #define src x1 22 #define count x2 23 #define dst x3 24 #define srcend x4 25 #define dstend x5 26 #define A_l x6 27 #define A_lw w6 28 #define A_h x7 29 #define B_l x8 30 #define B_lw w8 31 #define B_h x9 32 #define C_l x10 33 #define C_lw w10 34 #define C_h x11 35 #define D_l x12 36 #define D_h x13 37 #define E_l x14 38 #define E_h x15 39 #define F_l x16 40 #define F_h x17 41 #define G_l count 42 #define G_h dst 43 #define H_l src 44 #define H_h srcend 45 #define tmp1 x14 46 47 /* This implementation handles overlaps and su 48 from a single entry point. It uses unalign 49 sequences to keep the code small, simple an 50 51 Copies are split into 3 main cases: small c 52 copies of up to 128 bytes, and large copies 53 check is negligible since it is only requir 54 55 Large copies use a software pipelined loop 56 The destination pointer is 16-byte aligned 57 The loop tail is handled by always copying 58 */ 59 60 SYM_FUNC_START(__pi_memcpy) 61 add srcend, src, count 62 add dstend, dstin, count 63 cmp count, 128 64 b.hi L(copy_long) 65 cmp count, 32 66 b.hi L(copy32_128) 67 68 /* Small copies: 0..32 bytes. */ 69 cmp count, 16 70 b.lo L(copy16) 71 ldp A_l, A_h, [src] 72 ldp D_l, D_h, [srcend, -16] 73 stp A_l, A_h, [dstin] 74 stp D_l, D_h, [dstend, -16] 75 ret 76 77 /* Copy 8-15 bytes. */ 78 L(copy16): 79 tbz count, 3, L(copy8) 80 ldr A_l, [src] 81 ldr A_h, [srcend, -8] 82 str A_l, [dstin] 83 str A_h, [dstend, -8] 84 ret 85 86 .p2align 3 87 /* Copy 4-7 bytes. */ 88 L(copy8): 89 tbz count, 2, L(copy4) 90 ldr A_lw, [src] 91 ldr B_lw, [srcend, -4] 92 str A_lw, [dstin] 93 str B_lw, [dstend, -4] 94 ret 95 96 /* Copy 0..3 bytes using a branchless 97 L(copy4): 98 cbz count, L(copy0) 99 lsr tmp1, count, 1 100 ldrb A_lw, [src] 101 ldrb C_lw, [srcend, -1] 102 ldrb B_lw, [src, tmp1] 103 strb A_lw, [dstin] 104 strb B_lw, [dstin, tmp1] 105 strb C_lw, [dstend, -1] 106 L(copy0): 107 ret 108 109 .p2align 4 110 /* Medium copies: 33..128 bytes. */ 111 L(copy32_128): 112 ldp A_l, A_h, [src] 113 ldp B_l, B_h, [src, 16] 114 ldp C_l, C_h, [srcend, -32] 115 ldp D_l, D_h, [srcend, -16] 116 cmp count, 64 117 b.hi L(copy128) 118 stp A_l, A_h, [dstin] 119 stp B_l, B_h, [dstin, 16] 120 stp C_l, C_h, [dstend, -32] 121 stp D_l, D_h, [dstend, -16] 122 ret 123 124 .p2align 4 125 /* Copy 65..128 bytes. */ 126 L(copy128): 127 ldp E_l, E_h, [src, 32] 128 ldp F_l, F_h, [src, 48] 129 cmp count, 96 130 b.ls L(copy96) 131 ldp G_l, G_h, [srcend, -64] 132 ldp H_l, H_h, [srcend, -48] 133 stp G_l, G_h, [dstend, -64] 134 stp H_l, H_h, [dstend, -48] 135 L(copy96): 136 stp A_l, A_h, [dstin] 137 stp B_l, B_h, [dstin, 16] 138 stp E_l, E_h, [dstin, 32] 139 stp F_l, F_h, [dstin, 48] 140 stp C_l, C_h, [dstend, -32] 141 stp D_l, D_h, [dstend, -16] 142 ret 143 144 .p2align 4 145 /* Copy more than 128 bytes. */ 146 L(copy_long): 147 /* Use backwards copy if there is an o 148 sub tmp1, dstin, src 149 cbz tmp1, L(copy0) 150 cmp tmp1, count 151 b.lo L(copy_long_backwards) 152 153 /* Copy 16 bytes and then align dst to 154 155 ldp D_l, D_h, [src] 156 and tmp1, dstin, 15 157 bic dst, dstin, 15 158 sub src, src, tmp1 159 add count, count, tmp1 /* Cou 160 ldp A_l, A_h, [src, 16] 161 stp D_l, D_h, [dstin] 162 ldp B_l, B_h, [src, 32] 163 ldp C_l, C_h, [src, 48] 164 ldp D_l, D_h, [src, 64]! 165 subs count, count, 128 + 16 /* Tes 166 b.ls L(copy64_from_end) 167 168 L(loop64): 169 stp A_l, A_h, [dst, 16] 170 ldp A_l, A_h, [src, 16] 171 stp B_l, B_h, [dst, 32] 172 ldp B_l, B_h, [src, 32] 173 stp C_l, C_h, [dst, 48] 174 ldp C_l, C_h, [src, 48] 175 stp D_l, D_h, [dst, 64]! 176 ldp D_l, D_h, [src, 64]! 177 subs count, count, 64 178 b.hi L(loop64) 179 180 /* Write the last iteration and copy 6 181 L(copy64_from_end): 182 ldp E_l, E_h, [srcend, -64] 183 stp A_l, A_h, [dst, 16] 184 ldp A_l, A_h, [srcend, -48] 185 stp B_l, B_h, [dst, 32] 186 ldp B_l, B_h, [srcend, -32] 187 stp C_l, C_h, [dst, 48] 188 ldp C_l, C_h, [srcend, -16] 189 stp D_l, D_h, [dst, 64] 190 stp E_l, E_h, [dstend, -64] 191 stp A_l, A_h, [dstend, -48] 192 stp B_l, B_h, [dstend, -32] 193 stp C_l, C_h, [dstend, -16] 194 ret 195 196 .p2align 4 197 198 /* Large backwards copy for overlappin 199 Copy 16 bytes and then align dst to 200 L(copy_long_backwards): 201 ldp D_l, D_h, [srcend, -16] 202 and tmp1, dstend, 15 203 sub srcend, srcend, tmp1 204 sub count, count, tmp1 205 ldp A_l, A_h, [srcend, -16] 206 stp D_l, D_h, [dstend, -16] 207 ldp B_l, B_h, [srcend, -32] 208 ldp C_l, C_h, [srcend, -48] 209 ldp D_l, D_h, [srcend, -64]! 210 sub dstend, dstend, tmp1 211 subs count, count, 128 212 b.ls L(copy64_from_start) 213 214 L(loop64_backwards): 215 stp A_l, A_h, [dstend, -16] 216 ldp A_l, A_h, [srcend, -16] 217 stp B_l, B_h, [dstend, -32] 218 ldp B_l, B_h, [srcend, -32] 219 stp C_l, C_h, [dstend, -48] 220 ldp C_l, C_h, [srcend, -48] 221 stp D_l, D_h, [dstend, -64]! 222 ldp D_l, D_h, [srcend, -64]! 223 subs count, count, 64 224 b.hi L(loop64_backwards) 225 226 /* Write the last iteration and copy 6 227 L(copy64_from_start): 228 ldp G_l, G_h, [src, 48] 229 stp A_l, A_h, [dstend, -16] 230 ldp A_l, A_h, [src, 32] 231 stp B_l, B_h, [dstend, -32] 232 ldp B_l, B_h, [src, 16] 233 stp C_l, C_h, [dstend, -48] 234 ldp C_l, C_h, [src] 235 stp D_l, D_h, [dstend, -64] 236 stp G_l, G_h, [dstin, 48] 237 stp A_l, A_h, [dstin, 32] 238 stp B_l, B_h, [dstin, 16] 239 stp C_l, C_h, [dstin] 240 ret 241 SYM_FUNC_END(__pi_memcpy) 242 243 SYM_FUNC_ALIAS(__memcpy, __pi_memcpy) 244 EXPORT_SYMBOL(__memcpy) 245 SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) 246 EXPORT_SYMBOL(memcpy) 247 248 SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy) 249 250 SYM_FUNC_ALIAS(__memmove, __pi_memmove) 251 EXPORT_SYMBOL(__memmove) 252 SYM_FUNC_ALIAS_WEAK(memmove, __memmove) 253 EXPORT_SYMBOL(memmove)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.