Linux/arch/arm64/lib/memcpy.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

1 /* SPDX-License-Identifier: GPL-2.0-only */ !! 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* !! 2 /* memcpy.S: Sparc optimized memcpy and memmove code 3 * Copyright (c) 2012-2021, Arm Limited. !! 3 * Hand optimized from GNU libc's memcpy and memmove 4 * !! 4 * Copyright (C) 1991,1996 Free Software Foundation 5 * Adapted from the original at: !! 5 * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi) 6 * https://github.com/ARM-software/optimized-r !! 6 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) >> 7 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) >> 8 * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 7 */ 9 */ 8 10 9 #include <linux/linkage.h> !! 11 #include <linux/export.h> 10 #include <asm/assembler.h> << 11 12 12 /* Assumptions: !! 13 #define FUNC(x) \ 13 * !! 14 .globl x; \ 14 * ARMv8-a, AArch64, unaligned accesses. !! 15 .type x,@function; \ 15 * !! 16 .align 4; \ 16 */ !! 17 x: 17 !! 18 18 #define L(label) .L ## label !! 19 /* Both these macros have to start with exactly the same insn */ 19 !! 20 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 20 #define dstin x0 !! 21 ldd [%src + (offset) + 0x00], %t0; \ 21 #define src x1 !! 22 ldd [%src + (offset) + 0x08], %t2; \ 22 #define count x2 !! 23 ldd [%src + (offset) + 0x10], %t4; \ 23 #define dst x3 !! 24 ldd [%src + (offset) + 0x18], %t6; \ 24 #define srcend x4 !! 25 st %t0, [%dst + (offset) + 0x00]; \ 25 #define dstend x5 !! 26 st %t1, [%dst + (offset) + 0x04]; \ 26 #define A_l x6 !! 27 st %t2, [%dst + (offset) + 0x08]; \ 27 #define A_lw w6 !! 28 st %t3, [%dst + (offset) + 0x0c]; \ 28 #define A_h x7 !! 29 st %t4, [%dst + (offset) + 0x10]; \ 29 #define B_l x8 !! 30 st %t5, [%dst + (offset) + 0x14]; \ 30 #define B_lw w8 !! 31 st %t6, [%dst + (offset) + 0x18]; \ 31 #define B_h x9 !! 32 st %t7, [%dst + (offset) + 0x1c]; 32 #define C_l x10 !! 33 33 #define C_lw w10 !! 34 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 34 #define C_h x11 !! 35 ldd [%src + (offset) + 0x00], %t0; \ 35 #define D_l x12 !! 36 ldd [%src + (offset) + 0x08], %t2; \ 36 #define D_h x13 !! 37 ldd [%src + (offset) + 0x10], %t4; \ 37 #define E_l x14 !! 38 ldd [%src + (offset) + 0x18], %t6; \ 38 #define E_h x15 !! 39 std %t0, [%dst + (offset) + 0x00]; \ 39 #define F_l x16 !! 40 std %t2, [%dst + (offset) + 0x08]; \ 40 #define F_h x17 !! 41 std %t4, [%dst + (offset) + 0x10]; \ 41 #define G_l count !! 42 std %t6, [%dst + (offset) + 0x18]; 42 #define G_h dst !! 43 43 #define H_l src !! 44 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ 44 #define H_h srcend !! 45 ldd [%src - (offset) - 0x10], %t0; \ 45 #define tmp1 x14 !! 46 ldd [%src - (offset) - 0x08], %t2; \ 46 !! 47 st %t0, [%dst - (offset) - 0x10]; \ 47 /* This implementation handles overlaps and su !! 48 st %t1, [%dst - (offset) - 0x0c]; \ 48 from a single entry point. It uses unalign !! 49 st %t2, [%dst - (offset) - 0x08]; \ 49 sequences to keep the code small, simple an !! 50 st %t3, [%dst - (offset) - 0x04]; 50 !! 51 51 Copies are split into 3 main cases: small c !! 52 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ 52 copies of up to 128 bytes, and large copies !! 53 ldd [%src - (offset) - 0x10], %t0; \ 53 check is negligible since it is only requir !! 54 ldd [%src - (offset) - 0x08], %t2; \ 54 !! 55 std %t0, [%dst - (offset) - 0x10]; \ 55 Large copies use a software pipelined loop !! 56 std %t2, [%dst - (offset) - 0x08]; 56 The destination pointer is 16-byte aligned !! 57 57 The loop tail is handled by always copying !! 58 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ 58 */ !! 59 ldub [%src - (offset) - 0x02], %t0; \ 59 !! 60 ldub [%src - (offset) - 0x01], %t1; \ 60 SYM_FUNC_START(__pi_memcpy) !! 61 stb %t0, [%dst - (offset) - 0x02]; \ 61 add srcend, src, count !! 62 stb %t1, [%dst - (offset) - 0x01]; 62 add dstend, dstin, count << 63 cmp count, 128 << 64 b.hi L(copy_long) << 65 cmp count, 32 << 66 b.hi L(copy32_128) << 67 << 68 /* Small copies: 0..32 bytes. */ << 69 cmp count, 16 << 70 b.lo L(copy16) << 71 ldp A_l, A_h, [src] << 72 ldp D_l, D_h, [srcend, -16] << 73 stp A_l, A_h, [dstin] << 74 stp D_l, D_h, [dstend, -16] << 75 ret << 76 << 77 /* Copy 8-15 bytes. */ << 78 L(copy16): << 79 tbz count, 3, L(copy8) << 80 ldr A_l, [src] << 81 ldr A_h, [srcend, -8] << 82 str A_l, [dstin] << 83 str A_h, [dstend, -8] << 84 ret << 85 63 86 .p2align 3 !! 64 .text 87 /* Copy 4-7 bytes. */ !! 65 .align 4 88 L(copy8): << 89 tbz count, 2, L(copy4) << 90 ldr A_lw, [src] << 91 ldr B_lw, [srcend, -4] << 92 str A_lw, [dstin] << 93 str B_lw, [dstend, -4] << 94 ret << 95 66 96 /* Copy 0..3 bytes using a branchless !! 67 FUNC(memmove) 97 L(copy4): !! 68 EXPORT_SYMBOL(memmove) 98 cbz count, L(copy0) !! 69 cmp %o0, %o1 99 lsr tmp1, count, 1 !! 70 mov %o0, %g7 100 ldrb A_lw, [src] !! 71 bleu 9f 101 ldrb C_lw, [srcend, -1] !! 72 sub %o0, %o1, %o4 102 ldrb B_lw, [src, tmp1] !! 73 103 strb A_lw, [dstin] !! 74 add %o1, %o2, %o3 104 strb B_lw, [dstin, tmp1] !! 75 cmp %o3, %o0 105 strb C_lw, [dstend, -1] !! 76 bleu 0f 106 L(copy0): !! 77 andcc %o4, 3, %o5 107 ret !! 78 108 !! 79 add %o1, %o2, %o1 109 .p2align 4 !! 80 add %o0, %o2, %o0 110 /* Medium copies: 33..128 bytes. */ !! 81 sub %o1, 1, %o1 111 L(copy32_128): !! 82 sub %o0, 1, %o0 112 ldp A_l, A_h, [src] !! 83 113 ldp B_l, B_h, [src, 16] !! 84 1: /* reverse_bytes */ 114 ldp C_l, C_h, [srcend, -32] !! 85 115 ldp D_l, D_h, [srcend, -16] !! 86 ldub [%o1], %o4 116 cmp count, 64 !! 87 subcc %o2, 1, %o2 117 b.hi L(copy128) !! 88 stb %o4, [%o0] 118 stp A_l, A_h, [dstin] !! 89 sub %o1, 1, %o1 119 stp B_l, B_h, [dstin, 16] !! 90 bne 1b 120 stp C_l, C_h, [dstend, -32] !! 91 sub %o0, 1, %o0 121 stp D_l, D_h, [dstend, -16] !! 92 122 ret !! 93 retl 123 !! 94 mov %g7, %o0 124 .p2align 4 !! 95 125 /* Copy 65..128 bytes. */ !! 96 /* NOTE: This code is executed just for the cases, 126 L(copy128): !! 97 where %src (=%o1) & 3 is != 0. 127 ldp E_l, E_h, [src, 32] !! 98 We need to align it to 4. So, for (%src & 3) 128 ldp F_l, F_h, [src, 48] !! 99 1 we need to do ldub,lduh 129 cmp count, 96 !! 100 2 lduh 130 b.ls L(copy96) !! 101 3 just ldub 131 ldp G_l, G_h, [srcend, -64] !! 102 so even if it looks weird, the branches 132 ldp H_l, H_h, [srcend, -48] !! 103 are correct here. -jj 133 stp G_l, G_h, [dstend, -64] !! 104 */ 134 stp H_l, H_h, [dstend, -48] !! 105 78: /* dword_align */ 135 L(copy96): << 136 stp A_l, A_h, [dstin] << 137 stp B_l, B_h, [dstin, 16] << 138 stp E_l, E_h, [dstin, 32] << 139 stp F_l, F_h, [dstin, 48] << 140 stp C_l, C_h, [dstend, -32] << 141 stp D_l, D_h, [dstend, -16] << 142 ret << 143 106 144 .p2align 4 !! 107 andcc %o1, 1, %g0 145 /* Copy more than 128 bytes. */ !! 108 be 4f 146 L(copy_long): !! 109 andcc %o1, 2, %g0 147 /* Use backwards copy if there is an o !! 110 148 sub tmp1, dstin, src !! 111 ldub [%o1], %g2 149 cbz tmp1, L(copy0) !! 112 add %o1, 1, %o1 150 cmp tmp1, count !! 113 stb %g2, [%o0] 151 b.lo L(copy_long_backwards) !! 114 sub %o2, 1, %o2 152 !! 115 bne 3f 153 /* Copy 16 bytes and then align dst to !! 116 add %o0, 1, %o0 154 !! 117 4: 155 ldp D_l, D_h, [src] !! 118 lduh [%o1], %g2 156 and tmp1, dstin, 15 !! 119 add %o1, 2, %o1 157 bic dst, dstin, 15 !! 120 sth %g2, [%o0] 158 sub src, src, tmp1 !! 121 sub %o2, 2, %o2 159 add count, count, tmp1 /* Cou !! 122 b 3f 160 ldp A_l, A_h, [src, 16] !! 123 add %o0, 2, %o0 161 stp D_l, D_h, [dstin] << 162 ldp B_l, B_h, [src, 32] << 163 ldp C_l, C_h, [src, 48] << 164 ldp D_l, D_h, [src, 64]! << 165 subs count, count, 128 + 16 /* Tes << 166 b.ls L(copy64_from_end) << 167 << 168 L(loop64): << 169 stp A_l, A_h, [dst, 16] << 170 ldp A_l, A_h, [src, 16] << 171 stp B_l, B_h, [dst, 32] << 172 ldp B_l, B_h, [src, 32] << 173 stp C_l, C_h, [dst, 48] << 174 ldp C_l, C_h, [src, 48] << 175 stp D_l, D_h, [dst, 64]! << 176 ldp D_l, D_h, [src, 64]! << 177 subs count, count, 64 << 178 b.hi L(loop64) << 179 << 180 /* Write the last iteration and copy 6 << 181 L(copy64_from_end): << 182 ldp E_l, E_h, [srcend, -64] << 183 stp A_l, A_h, [dst, 16] << 184 ldp A_l, A_h, [srcend, -48] << 185 stp B_l, B_h, [dst, 32] << 186 ldp B_l, B_h, [srcend, -32] << 187 stp C_l, C_h, [dst, 48] << 188 ldp C_l, C_h, [srcend, -16] << 189 stp D_l, D_h, [dst, 64] << 190 stp E_l, E_h, [dstend, -64] << 191 stp A_l, A_h, [dstend, -48] << 192 stp B_l, B_h, [dstend, -32] << 193 stp C_l, C_h, [dstend, -16] << 194 ret << 195 124 196 .p2align 4 !! 125 FUNC(memcpy) /* %o0=dst %o1=src %o2=len */ >> 126 EXPORT_SYMBOL(memcpy) 197 127 198 /* Large backwards copy for overlappin !! 128 sub %o0, %o1, %o4 199 Copy 16 bytes and then align dst to !! 129 mov %o0, %g7 200 L(copy_long_backwards): !! 130 9: 201 ldp D_l, D_h, [srcend, -16] !! 131 andcc %o4, 3, %o5 202 and tmp1, dstend, 15 !! 132 0: 203 sub srcend, srcend, tmp1 !! 133 bne 86f 204 sub count, count, tmp1 !! 134 cmp %o2, 15 205 ldp A_l, A_h, [srcend, -16] !! 135 206 stp D_l, D_h, [dstend, -16] !! 136 bleu 90f 207 ldp B_l, B_h, [srcend, -32] !! 137 andcc %o1, 3, %g0 208 ldp C_l, C_h, [srcend, -48] !! 138 209 ldp D_l, D_h, [srcend, -64]! !! 139 bne 78b 210 sub dstend, dstend, tmp1 !! 140 3: 211 subs count, count, 128 !! 141 andcc %o1, 4, %g0 212 b.ls L(copy64_from_start) !! 142 213 !! 143 be 2f 214 L(loop64_backwards): !! 144 mov %o2, %g1 215 stp A_l, A_h, [dstend, -16] !! 145 216 ldp A_l, A_h, [srcend, -16] !! 146 ld [%o1], %o4 217 stp B_l, B_h, [dstend, -32] !! 147 sub %g1, 4, %g1 218 ldp B_l, B_h, [srcend, -32] !! 148 st %o4, [%o0] 219 stp C_l, C_h, [dstend, -48] !! 149 add %o1, 4, %o1 220 ldp C_l, C_h, [srcend, -48] !! 150 add %o0, 4, %o0 221 stp D_l, D_h, [dstend, -64]! !! 151 2: 222 ldp D_l, D_h, [srcend, -64]! !! 152 andcc %g1, 0xffffff80, %g0 223 subs count, count, 64 !! 153 be 3f 224 b.hi L(loop64_backwards) !! 154 andcc %o0, 4, %g0 225 !! 155 226 /* Write the last iteration and copy 6 !! 156 be 82f + 4 227 L(copy64_from_start): !! 157 5: 228 ldp G_l, G_h, [src, 48] !! 158 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 229 stp A_l, A_h, [dstend, -16] !! 159 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 230 ldp A_l, A_h, [src, 32] !! 160 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 231 stp B_l, B_h, [dstend, -32] !! 161 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 232 ldp B_l, B_h, [src, 16] !! 162 sub %g1, 128, %g1 233 stp C_l, C_h, [dstend, -48] !! 163 add %o1, 128, %o1 234 ldp C_l, C_h, [src] !! 164 cmp %g1, 128 235 stp D_l, D_h, [dstend, -64] !! 165 bge 5b 236 stp G_l, G_h, [dstin, 48] !! 166 add %o0, 128, %o0 237 stp A_l, A_h, [dstin, 32] !! 167 3: 238 stp B_l, B_h, [dstin, 16] !! 168 andcc %g1, 0x70, %g4 239 stp C_l, C_h, [dstin] !! 169 be 80f >> 170 andcc %g1, 8, %g0 >> 171 >> 172 sethi %hi(80f), %o5 >> 173 srl %g4, 1, %o4 >> 174 add %g4, %o4, %o4 >> 175 add %o1, %g4, %o1 >> 176 sub %o5, %o4, %o5 >> 177 jmpl %o5 + %lo(80f), %g0 >> 178 add %o0, %g4, %o0 >> 179 >> 180 79: /* memcpy_table */ >> 181 >> 182 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 183 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 184 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 185 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 186 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 187 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 188 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 189 >> 190 80: /* memcpy_table_end */ >> 191 be 81f >> 192 andcc %g1, 4, %g0 >> 193 >> 194 ldd [%o1], %g2 >> 195 add %o0, 8, %o0 >> 196 st %g2, [%o0 - 0x08] >> 197 add %o1, 8, %o1 >> 198 st %g3, [%o0 - 0x04] >> 199 >> 200 81: /* memcpy_last7 */ >> 201 >> 202 be 1f >> 203 andcc %g1, 2, %g0 >> 204 >> 205 ld [%o1], %g2 >> 206 add %o1, 4, %o1 >> 207 st %g2, [%o0] >> 208 add %o0, 4, %o0 >> 209 1: >> 210 be 1f >> 211 andcc %g1, 1, %g0 >> 212 >> 213 lduh [%o1], %g2 >> 214 add %o1, 2, %o1 >> 215 sth %g2, [%o0] >> 216 add %o0, 2, %o0 >> 217 1: >> 218 be 1f >> 219 nop >> 220 >> 221 ldub [%o1], %g2 >> 222 stb %g2, [%o0] >> 223 1: >> 224 retl >> 225 mov %g7, %o0 >> 226 >> 227 82: /* ldd_std */ >> 228 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 229 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 230 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 231 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 232 subcc %g1, 128, %g1 >> 233 add %o1, 128, %o1 >> 234 cmp %g1, 128 >> 235 bge 82b >> 236 add %o0, 128, %o0 >> 237 >> 238 andcc %g1, 0x70, %g4 >> 239 be 84f >> 240 andcc %g1, 8, %g0 >> 241 >> 242 sethi %hi(84f), %o5 >> 243 add %o1, %g4, %o1 >> 244 sub %o5, %g4, %o5 >> 245 jmpl %o5 + %lo(84f), %g0 >> 246 add %o0, %g4, %o0 >> 247 >> 248 83: /* amemcpy_table */ >> 249 >> 250 MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 251 MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 252 MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 253 MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 254 MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 255 MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 256 MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 257 >> 258 84: /* amemcpy_table_end */ >> 259 be 85f >> 260 andcc %g1, 4, %g0 >> 261 >> 262 ldd [%o1], %g2 >> 263 add %o0, 8, %o0 >> 264 std %g2, [%o0 - 0x08] >> 265 add %o1, 8, %o1 >> 266 85: /* amemcpy_last7 */ >> 267 be 1f >> 268 andcc %g1, 2, %g0 >> 269 >> 270 ld [%o1], %g2 >> 271 add %o1, 4, %o1 >> 272 st %g2, [%o0] >> 273 add %o0, 4, %o0 >> 274 1: >> 275 be 1f >> 276 andcc %g1, 1, %g0 >> 277 >> 278 lduh [%o1], %g2 >> 279 add %o1, 2, %o1 >> 280 sth %g2, [%o0] >> 281 add %o0, 2, %o0 >> 282 1: >> 283 be 1f >> 284 nop >> 285 >> 286 ldub [%o1], %g2 >> 287 stb %g2, [%o0] >> 288 1: >> 289 retl >> 290 mov %g7, %o0 >> 291 >> 292 86: /* non_aligned */ >> 293 cmp %o2, 6 >> 294 bleu 88f >> 295 nop >> 296 >> 297 save %sp, -96, %sp >> 298 andcc %i0, 3, %g0 >> 299 be 61f >> 300 andcc %i0, 1, %g0 >> 301 be 60f >> 302 andcc %i0, 2, %g0 >> 303 >> 304 ldub [%i1], %g5 >> 305 add %i1, 1, %i1 >> 306 stb %g5, [%i0] >> 307 sub %i2, 1, %i2 >> 308 bne 61f >> 309 add %i0, 1, %i0 >> 310 60: >> 311 ldub [%i1], %g3 >> 312 add %i1, 2, %i1 >> 313 stb %g3, [%i0] >> 314 sub %i2, 2, %i2 >> 315 ldub [%i1 - 1], %g3 >> 316 add %i0, 2, %i0 >> 317 stb %g3, [%i0 - 1] >> 318 61: >> 319 and %i1, 3, %g2 >> 320 and %i2, 0xc, %g3 >> 321 and %i1, -4, %i1 >> 322 cmp %g3, 4 >> 323 sll %g2, 3, %g4 >> 324 mov 32, %g2 >> 325 be 4f >> 326 sub %g2, %g4, %l0 >> 327 >> 328 blu 3f >> 329 cmp %g3, 0x8 >> 330 >> 331 be 2f >> 332 srl %i2, 2, %g3 >> 333 >> 334 ld [%i1], %i3 >> 335 add %i0, -8, %i0 >> 336 ld [%i1 + 4], %i4 >> 337 b 8f >> 338 add %g3, 1, %g3 >> 339 2: >> 340 ld [%i1], %i4 >> 341 add %i0, -12, %i0 >> 342 ld [%i1 + 4], %i5 >> 343 add %g3, 2, %g3 >> 344 b 9f >> 345 add %i1, -4, %i1 >> 346 3: >> 347 ld [%i1], %g1 >> 348 add %i0, -4, %i0 >> 349 ld [%i1 + 4], %i3 >> 350 srl %i2, 2, %g3 >> 351 b 7f >> 352 add %i1, 4, %i1 >> 353 4: >> 354 ld [%i1], %i5 >> 355 cmp %i2, 7 >> 356 ld [%i1 + 4], %g1 >> 357 srl %i2, 2, %g3 >> 358 bleu 10f >> 359 add %i1, 8, %i1 >> 360 >> 361 ld [%i1], %i3 >> 362 add %g3, -1, %g3 >> 363 5: >> 364 sll %i5, %g4, %g2 >> 365 srl %g1, %l0, %g5 >> 366 or %g2, %g5, %g2 >> 367 st %g2, [%i0] >> 368 7: >> 369 ld [%i1 + 4], %i4 >> 370 sll %g1, %g4, %g2 >> 371 srl %i3, %l0, %g5 >> 372 or %g2, %g5, %g2 >> 373 st %g2, [%i0 + 4] >> 374 8: >> 375 ld [%i1 + 8], %i5 >> 376 sll %i3, %g4, %g2 >> 377 srl %i4, %l0, %g5 >> 378 or %g2, %g5, %g2 >> 379 st %g2, [%i0 + 8] >> 380 9: >> 381 ld [%i1 + 12], %g1 >> 382 sll %i4, %g4, %g2 >> 383 srl %i5, %l0, %g5 >> 384 addcc %g3, -4, %g3 >> 385 or %g2, %g5, %g2 >> 386 add %i1, 16, %i1 >> 387 st %g2, [%i0 + 12] >> 388 add %i0, 16, %i0 >> 389 bne,a 5b >> 390 ld [%i1], %i3 >> 391 10: >> 392 sll %i5, %g4, %g2 >> 393 srl %g1, %l0, %g5 >> 394 srl %l0, 3, %g3 >> 395 or %g2, %g5, %g2 >> 396 sub %i1, %g3, %i1 >> 397 andcc %i2, 2, %g0 >> 398 st %g2, [%i0] >> 399 be 1f >> 400 andcc %i2, 1, %g0 >> 401 >> 402 ldub [%i1], %g2 >> 403 add %i1, 2, %i1 >> 404 stb %g2, [%i0 + 4] >> 405 add %i0, 2, %i0 >> 406 ldub [%i1 - 1], %g2 >> 407 stb %g2, [%i0 + 3] >> 408 1: >> 409 be 1f >> 410 nop >> 411 ldub [%i1], %g2 >> 412 stb %g2, [%i0 + 4] >> 413 1: 240 ret 414 ret 241 SYM_FUNC_END(__pi_memcpy) !! 415 restore %g7, %g0, %o0 242 << 243 SYM_FUNC_ALIAS(__memcpy, __pi_memcpy) << 244 EXPORT_SYMBOL(__memcpy) << 245 SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) << 246 EXPORT_SYMBOL(memcpy) << 247 416 248 SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy) !! 417 88: /* short_end */ 249 418 250 SYM_FUNC_ALIAS(__memmove, __pi_memmove) !! 419 and %o2, 0xe, %o3 251 EXPORT_SYMBOL(__memmove) !! 420 20: 252 SYM_FUNC_ALIAS_WEAK(memmove, __memmove) !! 421 sethi %hi(89f), %o5 253 EXPORT_SYMBOL(memmove) !! 422 sll %o3, 3, %o4 >> 423 add %o0, %o3, %o0 >> 424 sub %o5, %o4, %o5 >> 425 add %o1, %o3, %o1 >> 426 jmpl %o5 + %lo(89f), %g0 >> 427 andcc %o2, 1, %g0 >> 428 >> 429 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) >> 430 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) >> 431 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) >> 432 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) >> 433 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) >> 434 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) >> 435 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) >> 436 >> 437 89: /* short_table_end */ >> 438 >> 439 be 1f >> 440 nop >> 441 >> 442 ldub [%o1], %g2 >> 443 stb %g2, [%o0] >> 444 1: >> 445 retl >> 446 mov %g7, %o0 >> 447 >> 448 90: /* short_aligned_end */ >> 449 bne 88b >> 450 andcc %o2, 8, %g0 >> 451 >> 452 be 1f >> 453 andcc %o2, 4, %g0 >> 454 >> 455 ld [%o1 + 0x00], %g2 >> 456 ld [%o1 + 0x04], %g3 >> 457 add %o1, 8, %o1 >> 458 st %g2, [%o0 + 0x00] >> 459 st %g3, [%o0 + 0x04] >> 460 add %o0, 8, %o0 >> 461 1: >> 462 b 81b >> 463 mov %o2, %g1

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

TOMOYO Linux Cross Reference
Linux/arch/arm64/lib/memcpy.S

Diff markup

Differences between /arch/arm64/lib/memcpy.S (Version linux-6.12-rc7) and /arch/sparc/lib/memcpy.S (Version linux-6.7.12)

TOMOYO Linux Cross Reference Linux/arch/arm64/lib/memcpy.S

Diff markup

Differences between /arch/arm64/lib/memcpy.S (Version linux-6.12-rc7) and /arch/sparc/lib/memcpy.S (Version linux-6.7.12)

TOMOYO Linux Cross Reference
Linux/arch/arm64/lib/memcpy.S