1 /* SPDX-License-Identifier: GPL-2.0-only */ !! 1 /* memcpy.S: Sparc optimized memcpy and memmove code 2 /* !! 2 * Hand optimized from GNU libc's memcpy and memmove 3 * Copyright (c) 2012-2021, Arm Limited. !! 3 * Copyright (C) 1991,1996 Free Software Foundation 4 * !! 4 * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi) 5 * Adapted from the original at: !! 5 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 6 * https://github.com/ARM-software/optimized-r !! 6 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) >> 7 * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 7 */ 8 */ 8 9 9 #include <linux/linkage.h> !! 10 #define FUNC(x) \ 10 #include <asm/assembler.h> !! 11 .globl x; \ 11 !! 12 .type x,@function; \ 12 /* Assumptions: !! 13 .align 4; \ 13 * !! 14 x: 14 * ARMv8-a, AArch64, unaligned accesses. !! 15 15 * !! 16 /* Both these macros have to start with exactly the same insn */ >> 17 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 18 ldd [%src + (offset) + 0x00], %t0; \ >> 19 ldd [%src + (offset) + 0x08], %t2; \ >> 20 ldd [%src + (offset) + 0x10], %t4; \ >> 21 ldd [%src + (offset) + 0x18], %t6; \ >> 22 st %t0, [%dst + (offset) + 0x00]; \ >> 23 st %t1, [%dst + (offset) + 0x04]; \ >> 24 st %t2, [%dst + (offset) + 0x08]; \ >> 25 st %t3, [%dst + (offset) + 0x0c]; \ >> 26 st %t4, [%dst + (offset) + 0x10]; \ >> 27 st %t5, [%dst + (offset) + 0x14]; \ >> 28 st %t6, [%dst + (offset) + 0x18]; \ >> 29 st %t7, [%dst + (offset) + 0x1c]; >> 30 >> 31 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 32 ldd [%src + (offset) + 0x00], %t0; \ >> 33 ldd [%src + (offset) + 0x08], %t2; \ >> 34 ldd [%src + (offset) + 0x10], %t4; \ >> 35 ldd [%src + (offset) + 0x18], %t6; \ >> 36 std %t0, [%dst + (offset) + 0x00]; \ >> 37 std %t2, [%dst + (offset) + 0x08]; \ >> 38 std %t4, [%dst + (offset) + 0x10]; \ >> 39 std %t6, [%dst + (offset) + 0x18]; >> 40 >> 41 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ >> 42 ldd [%src - (offset) - 0x10], %t0; \ >> 43 ldd [%src - (offset) - 0x08], %t2; \ >> 44 st %t0, [%dst - (offset) - 0x10]; \ >> 45 st %t1, [%dst - (offset) - 0x0c]; \ >> 46 st %t2, [%dst - (offset) - 0x08]; \ >> 47 st %t3, [%dst - (offset) - 0x04]; >> 48 >> 49 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ >> 50 ldd [%src - (offset) - 0x10], %t0; \ >> 51 ldd [%src - (offset) - 0x08], %t2; \ >> 52 std %t0, [%dst - (offset) - 0x10]; \ >> 53 std %t2, [%dst - (offset) - 0x08]; >> 54 >> 55 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ >> 56 ldub [%src - (offset) - 0x02], %t0; \ >> 57 ldub [%src - (offset) - 0x01], %t1; \ >> 58 stb %t0, [%dst - (offset) - 0x02]; \ >> 59 stb %t1, [%dst - (offset) - 0x01]; >> 60 >> 61 /* Both these macros have to start with exactly the same insn */ >> 62 #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 63 ldd [%src - (offset) - 0x20], %t0; \ >> 64 ldd [%src - (offset) - 0x18], %t2; \ >> 65 ldd [%src - (offset) - 0x10], %t4; \ >> 66 ldd [%src - (offset) - 0x08], %t6; \ >> 67 st %t0, [%dst - (offset) - 0x20]; \ >> 68 st %t1, [%dst - (offset) - 0x1c]; \ >> 69 st %t2, [%dst - (offset) - 0x18]; \ >> 70 st %t3, [%dst - (offset) - 0x14]; \ >> 71 st %t4, [%dst - (offset) - 0x10]; \ >> 72 st %t5, [%dst - (offset) - 0x0c]; \ >> 73 st %t6, [%dst - (offset) - 0x08]; \ >> 74 st %t7, [%dst - (offset) - 0x04]; >> 75 >> 76 #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 77 ldd [%src - (offset) - 0x20], %t0; \ >> 78 ldd [%src - (offset) - 0x18], %t2; \ >> 79 ldd [%src - (offset) - 0x10], %t4; \ >> 80 ldd [%src - (offset) - 0x08], %t6; \ >> 81 std %t0, [%dst - (offset) - 0x20]; \ >> 82 std %t2, [%dst - (offset) - 0x18]; \ >> 83 std %t4, [%dst - (offset) - 0x10]; \ >> 84 std %t6, [%dst - (offset) - 0x08]; >> 85 >> 86 #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ >> 87 ldd [%src + (offset) + 0x00], %t0; \ >> 88 ldd [%src + (offset) + 0x08], %t2; \ >> 89 st %t0, [%dst + (offset) + 0x00]; \ >> 90 st %t1, [%dst + (offset) + 0x04]; \ >> 91 st %t2, [%dst + (offset) + 0x08]; \ >> 92 st %t3, [%dst + (offset) + 0x0c]; >> 93 >> 94 #define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ >> 95 ldub [%src + (offset) + 0x00], %t0; \ >> 96 ldub [%src + (offset) + 0x01], %t1; \ >> 97 stb %t0, [%dst + (offset) + 0x00]; \ >> 98 stb %t1, [%dst + (offset) + 0x01]; >> 99 >> 100 #define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ >> 101 ldd [%src + (offset) + 0x00], %t0; \ >> 102 ldd [%src + (offset) + 0x08], %t2; \ >> 103 srl %t0, shir, %t5; \ >> 104 srl %t1, shir, %t6; \ >> 105 sll %t0, shil, %t0; \ >> 106 or %t5, %prev, %t5; \ >> 107 sll %t1, shil, %prev; \ >> 108 or %t6, %t0, %t0; \ >> 109 srl %t2, shir, %t1; \ >> 110 srl %t3, shir, %t6; \ >> 111 sll %t2, shil, %t2; \ >> 112 or %t1, %prev, %t1; \ >> 113 std %t4, [%dst + (offset) + (offset2) - 0x04]; \ >> 114 std %t0, [%dst + (offset) + (offset2) + 0x04]; \ >> 115 sll %t3, shil, %prev; \ >> 116 or %t6, %t2, %t4; >> 117 >> 118 #define SMOVE_ALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ >> 119 ldd [%src + (offset) + 0x00], %t0; \ >> 120 ldd [%src + (offset) + 0x08], %t2; \ >> 121 srl %t0, shir, %t4; \ >> 122 srl %t1, shir, %t5; \ >> 123 sll %t0, shil, %t6; \ >> 124 or %t4, %prev, %t0; \ >> 125 sll %t1, shil, %prev; \ >> 126 or %t5, %t6, %t1; \ >> 127 srl %t2, shir, %t4; \ >> 128 srl %t3, shir, %t5; \ >> 129 sll %t2, shil, %t6; \ >> 130 or %t4, %prev, %t2; \ >> 131 sll %t3, shil, %prev; \ >> 132 or %t5, %t6, %t3; \ >> 133 std %t0, [%dst + (offset) + (offset2) + 0x00]; \ >> 134 std %t2, [%dst + (offset) + (offset2) + 0x08]; >> 135 >> 136 .text >> 137 .align 4 >> 138 >> 139 0: >> 140 retl >> 141 nop ! Only bcopy returns here and it retuns void... >> 142 >> 143 #ifdef __KERNEL__ >> 144 FUNC(amemmove) >> 145 FUNC(__memmove) >> 146 #endif >> 147 FUNC(memmove) >> 148 cmp %o0, %o1 >> 149 mov %o0, %g7 >> 150 bleu 9f >> 151 sub %o0, %o1, %o4 >> 152 >> 153 add %o1, %o2, %o3 >> 154 cmp %o3, %o0 >> 155 bleu 0f >> 156 andcc %o4, 3, %o5 >> 157 >> 158 add %o1, %o2, %o1 >> 159 add %o0, %o2, %o0 >> 160 sub %o1, 1, %o1 >> 161 sub %o0, 1, %o0 >> 162 >> 163 1: /* reverse_bytes */ >> 164 >> 165 ldub [%o1], %o4 >> 166 subcc %o2, 1, %o2 >> 167 stb %o4, [%o0] >> 168 sub %o1, 1, %o1 >> 169 bne 1b >> 170 sub %o0, 1, %o0 >> 171 >> 172 retl >> 173 mov %g7, %o0 >> 174 >> 175 /* NOTE: This code is executed just for the cases, >> 176 where %src (=%o1) & 3 is != 0. >> 177 We need to align it to 4. So, for (%src & 3) >> 178 1 we need to do ldub,lduh >> 179 2 lduh >> 180 3 just ldub >> 181 so even if it looks weird, the branches >> 182 are correct here. -jj 16 */ 183 */ >> 184 78: /* dword_align */ 17 185 18 #define L(label) .L ## label !! 186 andcc %o1, 1, %g0 19 !! 187 be 4f 20 #define dstin x0 !! 188 andcc %o1, 2, %g0 21 #define src x1 !! 189 22 #define count x2 !! 190 ldub [%o1], %g2 23 #define dst x3 !! 191 add %o1, 1, %o1 24 #define srcend x4 !! 192 stb %g2, [%o0] 25 #define dstend x5 !! 193 sub %o2, 1, %o2 26 #define A_l x6 !! 194 bne 3f 27 #define A_lw w6 !! 195 add %o0, 1, %o0 28 #define A_h x7 !! 196 4: 29 #define B_l x8 !! 197 lduh [%o1], %g2 30 #define B_lw w8 !! 198 add %o1, 2, %o1 31 #define B_h x9 !! 199 sth %g2, [%o0] 32 #define C_l x10 !! 200 sub %o2, 2, %o2 33 #define C_lw w10 !! 201 b 3f 34 #define C_h x11 !! 202 add %o0, 2, %o0 35 #define D_l x12 !! 203 36 #define D_h x13 !! 204 FUNC(memcpy) /* %o0=dst %o1=src %o2=len */ 37 #define E_l x14 !! 205 38 #define E_h x15 !! 206 sub %o0, %o1, %o4 39 #define F_l x16 !! 207 mov %o0, %g7 40 #define F_h x17 !! 208 9: 41 #define G_l count !! 209 andcc %o4, 3, %o5 42 #define G_h dst !! 210 0: 43 #define H_l src !! 211 bne 86f 44 #define H_h srcend !! 212 cmp %o2, 15 45 #define tmp1 x14 !! 213 46 !! 214 bleu 90f 47 /* This implementation handles overlaps and su !! 215 andcc %o1, 3, %g0 48 from a single entry point. It uses unalign !! 216 49 sequences to keep the code small, simple an !! 217 bne 78b 50 !! 218 3: 51 Copies are split into 3 main cases: small c !! 219 andcc %o1, 4, %g0 52 copies of up to 128 bytes, and large copies !! 220 53 check is negligible since it is only requir !! 221 be 2f 54 !! 222 mov %o2, %g1 55 Large copies use a software pipelined loop !! 223 56 The destination pointer is 16-byte aligned !! 224 ld [%o1], %o4 57 The loop tail is handled by always copying !! 225 sub %g1, 4, %g1 58 */ !! 226 st %o4, [%o0] 59 !! 227 add %o1, 4, %o1 60 SYM_FUNC_START(__pi_memcpy) !! 228 add %o0, 4, %o0 61 add srcend, src, count !! 229 2: 62 add dstend, dstin, count !! 230 andcc %g1, 0xffffff80, %g0 63 cmp count, 128 !! 231 be 3f 64 b.hi L(copy_long) !! 232 andcc %o0, 4, %g0 65 cmp count, 32 !! 233 66 b.hi L(copy32_128) !! 234 be 82f + 4 67 !! 235 5: 68 /* Small copies: 0..32 bytes. */ !! 236 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 69 cmp count, 16 !! 237 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 70 b.lo L(copy16) !! 238 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 71 ldp A_l, A_h, [src] !! 239 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 72 ldp D_l, D_h, [srcend, -16] !! 240 sub %g1, 128, %g1 73 stp A_l, A_h, [dstin] !! 241 add %o1, 128, %o1 74 stp D_l, D_h, [dstend, -16] !! 242 cmp %g1, 128 75 ret !! 243 bge 5b 76 !! 244 add %o0, 128, %o0 77 /* Copy 8-15 bytes. */ !! 245 3: 78 L(copy16): !! 246 andcc %g1, 0x70, %g4 79 tbz count, 3, L(copy8) !! 247 be 80f 80 ldr A_l, [src] !! 248 andcc %g1, 8, %g0 81 ldr A_h, [srcend, -8] !! 249 82 str A_l, [dstin] !! 250 sethi %hi(80f), %o5 83 str A_h, [dstend, -8] !! 251 srl %g4, 1, %o4 84 ret !! 252 add %g4, %o4, %o4 85 !! 253 add %o1, %g4, %o1 86 .p2align 3 !! 254 sub %o5, %o4, %o5 87 /* Copy 4-7 bytes. */ !! 255 jmpl %o5 + %lo(80f), %g0 88 L(copy8): !! 256 add %o0, %g4, %o0 89 tbz count, 2, L(copy4) !! 257 90 ldr A_lw, [src] !! 258 79: /* memcpy_table */ 91 ldr B_lw, [srcend, -4] !! 259 92 str A_lw, [dstin] !! 260 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) 93 str B_lw, [dstend, -4] !! 261 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) 94 ret !! 262 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) 95 !! 263 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) 96 /* Copy 0..3 bytes using a branchless !! 264 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) 97 L(copy4): !! 265 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) 98 cbz count, L(copy0) !! 266 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 99 lsr tmp1, count, 1 !! 267 100 ldrb A_lw, [src] !! 268 80: /* memcpy_table_end */ 101 ldrb C_lw, [srcend, -1] !! 269 be 81f 102 ldrb B_lw, [src, tmp1] !! 270 andcc %g1, 4, %g0 103 strb A_lw, [dstin] !! 271 104 strb B_lw, [dstin, tmp1] !! 272 ldd [%o1], %g2 105 strb C_lw, [dstend, -1] !! 273 add %o0, 8, %o0 106 L(copy0): !! 274 st %g2, [%o0 - 0x08] >> 275 add %o1, 8, %o1 >> 276 st %g3, [%o0 - 0x04] >> 277 >> 278 81: /* memcpy_last7 */ >> 279 >> 280 be 1f >> 281 andcc %g1, 2, %g0 >> 282 >> 283 ld [%o1], %g2 >> 284 add %o1, 4, %o1 >> 285 st %g2, [%o0] >> 286 add %o0, 4, %o0 >> 287 1: >> 288 be 1f >> 289 andcc %g1, 1, %g0 >> 290 >> 291 lduh [%o1], %g2 >> 292 add %o1, 2, %o1 >> 293 sth %g2, [%o0] >> 294 add %o0, 2, %o0 >> 295 1: >> 296 be 1f >> 297 nop >> 298 >> 299 ldub [%o1], %g2 >> 300 stb %g2, [%o0] >> 301 1: >> 302 retl >> 303 mov %g7, %o0 >> 304 >> 305 82: /* ldd_std */ >> 306 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 307 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 308 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 309 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 310 subcc %g1, 128, %g1 >> 311 add %o1, 128, %o1 >> 312 cmp %g1, 128 >> 313 bge 82b >> 314 add %o0, 128, %o0 >> 315 >> 316 andcc %g1, 0x70, %g4 >> 317 be 84f >> 318 andcc %g1, 8, %g0 >> 319 >> 320 sethi %hi(84f), %o5 >> 321 add %o1, %g4, %o1 >> 322 sub %o5, %g4, %o5 >> 323 jmpl %o5 + %lo(84f), %g0 >> 324 add %o0, %g4, %o0 >> 325 >> 326 83: /* amemcpy_table */ >> 327 >> 328 MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 329 MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 330 MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 331 MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 332 MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 333 MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 334 MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 335 >> 336 84: /* amemcpy_table_end */ >> 337 be 85f >> 338 andcc %g1, 4, %g0 >> 339 >> 340 ldd [%o1], %g2 >> 341 add %o0, 8, %o0 >> 342 std %g2, [%o0 - 0x08] >> 343 add %o1, 8, %o1 >> 344 85: /* amemcpy_last7 */ >> 345 be 1f >> 346 andcc %g1, 2, %g0 >> 347 >> 348 ld [%o1], %g2 >> 349 add %o1, 4, %o1 >> 350 st %g2, [%o0] >> 351 add %o0, 4, %o0 >> 352 1: >> 353 be 1f >> 354 andcc %g1, 1, %g0 >> 355 >> 356 lduh [%o1], %g2 >> 357 add %o1, 2, %o1 >> 358 sth %g2, [%o0] >> 359 add %o0, 2, %o0 >> 360 1: >> 361 be 1f >> 362 nop >> 363 >> 364 ldub [%o1], %g2 >> 365 stb %g2, [%o0] >> 366 1: >> 367 retl >> 368 mov %g7, %o0 >> 369 >> 370 86: /* non_aligned */ >> 371 cmp %o2, 6 >> 372 bleu 88f >> 373 nop >> 374 >> 375 save %sp, -96, %sp >> 376 andcc %i0, 3, %g0 >> 377 be 61f >> 378 andcc %i0, 1, %g0 >> 379 be 60f >> 380 andcc %i0, 2, %g0 >> 381 >> 382 ldub [%i1], %g5 >> 383 add %i1, 1, %i1 >> 384 stb %g5, [%i0] >> 385 sub %i2, 1, %i2 >> 386 bne 61f >> 387 add %i0, 1, %i0 >> 388 60: >> 389 ldub [%i1], %g3 >> 390 add %i1, 2, %i1 >> 391 stb %g3, [%i0] >> 392 sub %i2, 2, %i2 >> 393 ldub [%i1 - 1], %g3 >> 394 add %i0, 2, %i0 >> 395 stb %g3, [%i0 - 1] >> 396 61: >> 397 and %i1, 3, %g2 >> 398 and %i2, 0xc, %g3 >> 399 and %i1, -4, %i1 >> 400 cmp %g3, 4 >> 401 sll %g2, 3, %g4 >> 402 mov 32, %g2 >> 403 be 4f >> 404 sub %g2, %g4, %l0 >> 405 >> 406 blu 3f >> 407 cmp %g3, 0x8 >> 408 >> 409 be 2f >> 410 srl %i2, 2, %g3 >> 411 >> 412 ld [%i1], %i3 >> 413 add %i0, -8, %i0 >> 414 ld [%i1 + 4], %i4 >> 415 b 8f >> 416 add %g3, 1, %g3 >> 417 2: >> 418 ld [%i1], %i4 >> 419 add %i0, -12, %i0 >> 420 ld [%i1 + 4], %i5 >> 421 add %g3, 2, %g3 >> 422 b 9f >> 423 add %i1, -4, %i1 >> 424 3: >> 425 ld [%i1], %g1 >> 426 add %i0, -4, %i0 >> 427 ld [%i1 + 4], %i3 >> 428 srl %i2, 2, %g3 >> 429 b 7f >> 430 add %i1, 4, %i1 >> 431 4: >> 432 ld [%i1], %i5 >> 433 cmp %i2, 7 >> 434 ld [%i1 + 4], %g1 >> 435 srl %i2, 2, %g3 >> 436 bleu 10f >> 437 add %i1, 8, %i1 >> 438 >> 439 ld [%i1], %i3 >> 440 add %g3, -1, %g3 >> 441 5: >> 442 sll %i5, %g4, %g2 >> 443 srl %g1, %l0, %g5 >> 444 or %g2, %g5, %g2 >> 445 st %g2, [%i0] >> 446 7: >> 447 ld [%i1 + 4], %i4 >> 448 sll %g1, %g4, %g2 >> 449 srl %i3, %l0, %g5 >> 450 or %g2, %g5, %g2 >> 451 st %g2, [%i0 + 4] >> 452 8: >> 453 ld [%i1 + 8], %i5 >> 454 sll %i3, %g4, %g2 >> 455 srl %i4, %l0, %g5 >> 456 or %g2, %g5, %g2 >> 457 st %g2, [%i0 + 8] >> 458 9: >> 459 ld [%i1 + 12], %g1 >> 460 sll %i4, %g4, %g2 >> 461 srl %i5, %l0, %g5 >> 462 addcc %g3, -4, %g3 >> 463 or %g2, %g5, %g2 >> 464 add %i1, 16, %i1 >> 465 st %g2, [%i0 + 12] >> 466 add %i0, 16, %i0 >> 467 bne,a 5b >> 468 ld [%i1], %i3 >> 469 10: >> 470 sll %i5, %g4, %g2 >> 471 srl %g1, %l0, %g5 >> 472 srl %l0, 3, %g3 >> 473 or %g2, %g5, %g2 >> 474 sub %i1, %g3, %i1 >> 475 andcc %i2, 2, %g0 >> 476 st %g2, [%i0] >> 477 be 1f >> 478 andcc %i2, 1, %g0 >> 479 >> 480 ldub [%i1], %g2 >> 481 add %i1, 2, %i1 >> 482 stb %g2, [%i0 + 4] >> 483 add %i0, 2, %i0 >> 484 ldub [%i1 - 1], %g2 >> 485 stb %g2, [%i0 + 3] >> 486 1: >> 487 be 1f >> 488 nop >> 489 ldub [%i1], %g2 >> 490 stb %g2, [%i0 + 4] >> 491 1: 107 ret 492 ret >> 493 restore %g7, %g0, %o0 108 494 109 .p2align 4 !! 495 88: /* short_end */ 110 /* Medium copies: 33..128 bytes. */ << 111 L(copy32_128): << 112 ldp A_l, A_h, [src] << 113 ldp B_l, B_h, [src, 16] << 114 ldp C_l, C_h, [srcend, -32] << 115 ldp D_l, D_h, [srcend, -16] << 116 cmp count, 64 << 117 b.hi L(copy128) << 118 stp A_l, A_h, [dstin] << 119 stp B_l, B_h, [dstin, 16] << 120 stp C_l, C_h, [dstend, -32] << 121 stp D_l, D_h, [dstend, -16] << 122 ret << 123 << 124 .p2align 4 << 125 /* Copy 65..128 bytes. */ << 126 L(copy128): << 127 ldp E_l, E_h, [src, 32] << 128 ldp F_l, F_h, [src, 48] << 129 cmp count, 96 << 130 b.ls L(copy96) << 131 ldp G_l, G_h, [srcend, -64] << 132 ldp H_l, H_h, [srcend, -48] << 133 stp G_l, G_h, [dstend, -64] << 134 stp H_l, H_h, [dstend, -48] << 135 L(copy96): << 136 stp A_l, A_h, [dstin] << 137 stp B_l, B_h, [dstin, 16] << 138 stp E_l, E_h, [dstin, 32] << 139 stp F_l, F_h, [dstin, 48] << 140 stp C_l, C_h, [dstend, -32] << 141 stp D_l, D_h, [dstend, -16] << 142 ret << 143 << 144 .p2align 4 << 145 /* Copy more than 128 bytes. */ << 146 L(copy_long): << 147 /* Use backwards copy if there is an o << 148 sub tmp1, dstin, src << 149 cbz tmp1, L(copy0) << 150 cmp tmp1, count << 151 b.lo L(copy_long_backwards) << 152 << 153 /* Copy 16 bytes and then align dst to << 154 << 155 ldp D_l, D_h, [src] << 156 and tmp1, dstin, 15 << 157 bic dst, dstin, 15 << 158 sub src, src, tmp1 << 159 add count, count, tmp1 /* Cou << 160 ldp A_l, A_h, [src, 16] << 161 stp D_l, D_h, [dstin] << 162 ldp B_l, B_h, [src, 32] << 163 ldp C_l, C_h, [src, 48] << 164 ldp D_l, D_h, [src, 64]! << 165 subs count, count, 128 + 16 /* Tes << 166 b.ls L(copy64_from_end) << 167 << 168 L(loop64): << 169 stp A_l, A_h, [dst, 16] << 170 ldp A_l, A_h, [src, 16] << 171 stp B_l, B_h, [dst, 32] << 172 ldp B_l, B_h, [src, 32] << 173 stp C_l, C_h, [dst, 48] << 174 ldp C_l, C_h, [src, 48] << 175 stp D_l, D_h, [dst, 64]! << 176 ldp D_l, D_h, [src, 64]! << 177 subs count, count, 64 << 178 b.hi L(loop64) << 179 << 180 /* Write the last iteration and copy 6 << 181 L(copy64_from_end): << 182 ldp E_l, E_h, [srcend, -64] << 183 stp A_l, A_h, [dst, 16] << 184 ldp A_l, A_h, [srcend, -48] << 185 stp B_l, B_h, [dst, 32] << 186 ldp B_l, B_h, [srcend, -32] << 187 stp C_l, C_h, [dst, 48] << 188 ldp C_l, C_h, [srcend, -16] << 189 stp D_l, D_h, [dst, 64] << 190 stp E_l, E_h, [dstend, -64] << 191 stp A_l, A_h, [dstend, -48] << 192 stp B_l, B_h, [dstend, -32] << 193 stp C_l, C_h, [dstend, -16] << 194 ret << 195 << 196 .p2align 4 << 197 << 198 /* Large backwards copy for overlappin << 199 Copy 16 bytes and then align dst to << 200 L(copy_long_backwards): << 201 ldp D_l, D_h, [srcend, -16] << 202 and tmp1, dstend, 15 << 203 sub srcend, srcend, tmp1 << 204 sub count, count, tmp1 << 205 ldp A_l, A_h, [srcend, -16] << 206 stp D_l, D_h, [dstend, -16] << 207 ldp B_l, B_h, [srcend, -32] << 208 ldp C_l, C_h, [srcend, -48] << 209 ldp D_l, D_h, [srcend, -64]! << 210 sub dstend, dstend, tmp1 << 211 subs count, count, 128 << 212 b.ls L(copy64_from_start) << 213 << 214 L(loop64_backwards): << 215 stp A_l, A_h, [dstend, -16] << 216 ldp A_l, A_h, [srcend, -16] << 217 stp B_l, B_h, [dstend, -32] << 218 ldp B_l, B_h, [srcend, -32] << 219 stp C_l, C_h, [dstend, -48] << 220 ldp C_l, C_h, [srcend, -48] << 221 stp D_l, D_h, [dstend, -64]! << 222 ldp D_l, D_h, [srcend, -64]! << 223 subs count, count, 64 << 224 b.hi L(loop64_backwards) << 225 << 226 /* Write the last iteration and copy 6 << 227 L(copy64_from_start): << 228 ldp G_l, G_h, [src, 48] << 229 stp A_l, A_h, [dstend, -16] << 230 ldp A_l, A_h, [src, 32] << 231 stp B_l, B_h, [dstend, -32] << 232 ldp B_l, B_h, [src, 16] << 233 stp C_l, C_h, [dstend, -48] << 234 ldp C_l, C_h, [src] << 235 stp D_l, D_h, [dstend, -64] << 236 stp G_l, G_h, [dstin, 48] << 237 stp A_l, A_h, [dstin, 32] << 238 stp B_l, B_h, [dstin, 16] << 239 stp C_l, C_h, [dstin] << 240 ret << 241 SYM_FUNC_END(__pi_memcpy) << 242 496 243 SYM_FUNC_ALIAS(__memcpy, __pi_memcpy) !! 497 and %o2, 0xe, %o3 244 EXPORT_SYMBOL(__memcpy) !! 498 20: 245 SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) !! 499 sethi %hi(89f), %o5 246 EXPORT_SYMBOL(memcpy) !! 500 sll %o3, 3, %o4 247 !! 501 add %o0, %o3, %o0 248 SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy) !! 502 sub %o5, %o4, %o5 249 !! 503 add %o1, %o3, %o1 250 SYM_FUNC_ALIAS(__memmove, __pi_memmove) !! 504 jmpl %o5 + %lo(89f), %g0 251 EXPORT_SYMBOL(__memmove) !! 505 andcc %o2, 1, %g0 252 SYM_FUNC_ALIAS_WEAK(memmove, __memmove) !! 506 253 EXPORT_SYMBOL(memmove) !! 507 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) >> 508 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) >> 509 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) >> 510 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) >> 511 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) >> 512 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) >> 513 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) >> 514 >> 515 89: /* short_table_end */ >> 516 >> 517 be 1f >> 518 nop >> 519 >> 520 ldub [%o1], %g2 >> 521 stb %g2, [%o0] >> 522 1: >> 523 retl >> 524 mov %g7, %o0 >> 525 >> 526 90: /* short_aligned_end */ >> 527 bne 88b >> 528 andcc %o2, 8, %g0 >> 529 >> 530 be 1f >> 531 andcc %o2, 4, %g0 >> 532 >> 533 ld [%o1 + 0x00], %g2 >> 534 ld [%o1 + 0x04], %g3 >> 535 add %o1, 8, %o1 >> 536 st %g2, [%o0 + 0x00] >> 537 st %g3, [%o0 + 0x04] >> 538 add %o0, 8, %o0 >> 539 1: >> 540 b 81b >> 541 mov %o2, %g1
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.