Linux/arch/arm64/lib/memcpy.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

1 /* SPDX-License-Identifier: GPL-2.0-only */ !! 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* !! 2 /* memcpy.S: Sparc optimized memcpy and memmove code 3 * Copyright (c) 2012-2021, Arm Limited. !! 3 * Hand optimized from GNU libc's memcpy and memmove 4 * !! 4 * Copyright (C) 1991,1996 Free Software Foundation 5 * Adapted from the original at: !! 5 * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi) 6 * https://github.com/ARM-software/optimized-r !! 6 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) >> 7 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) >> 8 * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 7 */ 9 */ 8 10 9 #include <linux/linkage.h> !! 11 #include <asm/export.h> 10 #include <asm/assembler.h> !! 12 #define FUNC(x) \ >> 13 .globl x; \ >> 14 .type x,@function; \ >> 15 .align 4; \ >> 16 x: >> 17 >> 18 /* Both these macros have to start with exactly the same insn */ >> 19 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 20 ldd [%src + (offset) + 0x00], %t0; \ >> 21 ldd [%src + (offset) + 0x08], %t2; \ >> 22 ldd [%src + (offset) + 0x10], %t4; \ >> 23 ldd [%src + (offset) + 0x18], %t6; \ >> 24 st %t0, [%dst + (offset) + 0x00]; \ >> 25 st %t1, [%dst + (offset) + 0x04]; \ >> 26 st %t2, [%dst + (offset) + 0x08]; \ >> 27 st %t3, [%dst + (offset) + 0x0c]; \ >> 28 st %t4, [%dst + (offset) + 0x10]; \ >> 29 st %t5, [%dst + (offset) + 0x14]; \ >> 30 st %t6, [%dst + (offset) + 0x18]; \ >> 31 st %t7, [%dst + (offset) + 0x1c]; >> 32 >> 33 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 34 ldd [%src + (offset) + 0x00], %t0; \ >> 35 ldd [%src + (offset) + 0x08], %t2; \ >> 36 ldd [%src + (offset) + 0x10], %t4; \ >> 37 ldd [%src + (offset) + 0x18], %t6; \ >> 38 std %t0, [%dst + (offset) + 0x00]; \ >> 39 std %t2, [%dst + (offset) + 0x08]; \ >> 40 std %t4, [%dst + (offset) + 0x10]; \ >> 41 std %t6, [%dst + (offset) + 0x18]; >> 42 >> 43 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ >> 44 ldd [%src - (offset) - 0x10], %t0; \ >> 45 ldd [%src - (offset) - 0x08], %t2; \ >> 46 st %t0, [%dst - (offset) - 0x10]; \ >> 47 st %t1, [%dst - (offset) - 0x0c]; \ >> 48 st %t2, [%dst - (offset) - 0x08]; \ >> 49 st %t3, [%dst - (offset) - 0x04]; >> 50 >> 51 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ >> 52 ldd [%src - (offset) - 0x10], %t0; \ >> 53 ldd [%src - (offset) - 0x08], %t2; \ >> 54 std %t0, [%dst - (offset) - 0x10]; \ >> 55 std %t2, [%dst - (offset) - 0x08]; >> 56 >> 57 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ >> 58 ldub [%src - (offset) - 0x02], %t0; \ >> 59 ldub [%src - (offset) - 0x01], %t1; \ >> 60 stb %t0, [%dst - (offset) - 0x02]; \ >> 61 stb %t1, [%dst - (offset) - 0x01]; 11 62 12 /* Assumptions: !! 63 .text 13 * !! 64 .align 4 14 * ARMv8-a, AArch64, unaligned accesses. << 15 * << 16 */ << 17 << 18 #define L(label) .L ## label << 19 << 20 #define dstin x0 << 21 #define src x1 << 22 #define count x2 << 23 #define dst x3 << 24 #define srcend x4 << 25 #define dstend x5 << 26 #define A_l x6 << 27 #define A_lw w6 << 28 #define A_h x7 << 29 #define B_l x8 << 30 #define B_lw w8 << 31 #define B_h x9 << 32 #define C_l x10 << 33 #define C_lw w10 << 34 #define C_h x11 << 35 #define D_l x12 << 36 #define D_h x13 << 37 #define E_l x14 << 38 #define E_h x15 << 39 #define F_l x16 << 40 #define F_h x17 << 41 #define G_l count << 42 #define G_h dst << 43 #define H_l src << 44 #define H_h srcend << 45 #define tmp1 x14 << 46 << 47 /* This implementation handles overlaps and su << 48 from a single entry point. It uses unalign << 49 sequences to keep the code small, simple an << 50 << 51 Copies are split into 3 main cases: small c << 52 copies of up to 128 bytes, and large copies << 53 check is negligible since it is only requir << 54 << 55 Large copies use a software pipelined loop << 56 The destination pointer is 16-byte aligned << 57 The loop tail is handled by always copying << 58 */ << 59 << 60 SYM_FUNC_START(__pi_memcpy) << 61 add srcend, src, count << 62 add dstend, dstin, count << 63 cmp count, 128 << 64 b.hi L(copy_long) << 65 cmp count, 32 << 66 b.hi L(copy32_128) << 67 << 68 /* Small copies: 0..32 bytes. */ << 69 cmp count, 16 << 70 b.lo L(copy16) << 71 ldp A_l, A_h, [src] << 72 ldp D_l, D_h, [srcend, -16] << 73 stp A_l, A_h, [dstin] << 74 stp D_l, D_h, [dstend, -16] << 75 ret << 76 << 77 /* Copy 8-15 bytes. */ << 78 L(copy16): << 79 tbz count, 3, L(copy8) << 80 ldr A_l, [src] << 81 ldr A_h, [srcend, -8] << 82 str A_l, [dstin] << 83 str A_h, [dstend, -8] << 84 ret << 85 << 86 .p2align 3 << 87 /* Copy 4-7 bytes. */ << 88 L(copy8): << 89 tbz count, 2, L(copy4) << 90 ldr A_lw, [src] << 91 ldr B_lw, [srcend, -4] << 92 str A_lw, [dstin] << 93 str B_lw, [dstend, -4] << 94 ret << 95 << 96 /* Copy 0..3 bytes using a branchless << 97 L(copy4): << 98 cbz count, L(copy0) << 99 lsr tmp1, count, 1 << 100 ldrb A_lw, [src] << 101 ldrb C_lw, [srcend, -1] << 102 ldrb B_lw, [src, tmp1] << 103 strb A_lw, [dstin] << 104 strb B_lw, [dstin, tmp1] << 105 strb C_lw, [dstend, -1] << 106 L(copy0): << 107 ret << 108 65 109 .p2align 4 !! 66 FUNC(memmove) 110 /* Medium copies: 33..128 bytes. */ !! 67 EXPORT_SYMBOL(memmove) 111 L(copy32_128): !! 68 cmp %o0, %o1 112 ldp A_l, A_h, [src] !! 69 mov %o0, %g7 113 ldp B_l, B_h, [src, 16] !! 70 bleu 9f 114 ldp C_l, C_h, [srcend, -32] !! 71 sub %o0, %o1, %o4 115 ldp D_l, D_h, [srcend, -16] !! 72 116 cmp count, 64 !! 73 add %o1, %o2, %o3 117 b.hi L(copy128) !! 74 cmp %o3, %o0 118 stp A_l, A_h, [dstin] !! 75 bleu 0f 119 stp B_l, B_h, [dstin, 16] !! 76 andcc %o4, 3, %o5 120 stp C_l, C_h, [dstend, -32] !! 77 121 stp D_l, D_h, [dstend, -16] !! 78 add %o1, %o2, %o1 122 ret !! 79 add %o0, %o2, %o0 123 !! 80 sub %o1, 1, %o1 124 .p2align 4 !! 81 sub %o0, 1, %o0 125 /* Copy 65..128 bytes. */ !! 82 126 L(copy128): !! 83 1: /* reverse_bytes */ 127 ldp E_l, E_h, [src, 32] !! 84 128 ldp F_l, F_h, [src, 48] !! 85 ldub [%o1], %o4 129 cmp count, 96 !! 86 subcc %o2, 1, %o2 130 b.ls L(copy96) !! 87 stb %o4, [%o0] 131 ldp G_l, G_h, [srcend, -64] !! 88 sub %o1, 1, %o1 132 ldp H_l, H_h, [srcend, -48] !! 89 bne 1b 133 stp G_l, G_h, [dstend, -64] !! 90 sub %o0, 1, %o0 134 stp H_l, H_h, [dstend, -48] !! 91 135 L(copy96): !! 92 retl 136 stp A_l, A_h, [dstin] !! 93 mov %g7, %o0 137 stp B_l, B_h, [dstin, 16] !! 94 138 stp E_l, E_h, [dstin, 32] !! 95 /* NOTE: This code is executed just for the cases, 139 stp F_l, F_h, [dstin, 48] !! 96 where %src (=%o1) & 3 is != 0. 140 stp C_l, C_h, [dstend, -32] !! 97 We need to align it to 4. So, for (%src & 3) 141 stp D_l, D_h, [dstend, -16] !! 98 1 we need to do ldub,lduh 142 ret !! 99 2 lduh >> 100 3 just ldub >> 101 so even if it looks weird, the branches >> 102 are correct here. -jj >> 103 */ >> 104 78: /* dword_align */ 143 105 144 .p2align 4 !! 106 andcc %o1, 1, %g0 145 /* Copy more than 128 bytes. */ !! 107 be 4f 146 L(copy_long): !! 108 andcc %o1, 2, %g0 147 /* Use backwards copy if there is an o !! 109 148 sub tmp1, dstin, src !! 110 ldub [%o1], %g2 149 cbz tmp1, L(copy0) !! 111 add %o1, 1, %o1 150 cmp tmp1, count !! 112 stb %g2, [%o0] 151 b.lo L(copy_long_backwards) !! 113 sub %o2, 1, %o2 152 !! 114 bne 3f 153 /* Copy 16 bytes and then align dst to !! 115 add %o0, 1, %o0 154 !! 116 4: 155 ldp D_l, D_h, [src] !! 117 lduh [%o1], %g2 156 and tmp1, dstin, 15 !! 118 add %o1, 2, %o1 157 bic dst, dstin, 15 !! 119 sth %g2, [%o0] 158 sub src, src, tmp1 !! 120 sub %o2, 2, %o2 159 add count, count, tmp1 /* Cou !! 121 b 3f 160 ldp A_l, A_h, [src, 16] !! 122 add %o0, 2, %o0 161 stp D_l, D_h, [dstin] << 162 ldp B_l, B_h, [src, 32] << 163 ldp C_l, C_h, [src, 48] << 164 ldp D_l, D_h, [src, 64]! << 165 subs count, count, 128 + 16 /* Tes << 166 b.ls L(copy64_from_end) << 167 << 168 L(loop64): << 169 stp A_l, A_h, [dst, 16] << 170 ldp A_l, A_h, [src, 16] << 171 stp B_l, B_h, [dst, 32] << 172 ldp B_l, B_h, [src, 32] << 173 stp C_l, C_h, [dst, 48] << 174 ldp C_l, C_h, [src, 48] << 175 stp D_l, D_h, [dst, 64]! << 176 ldp D_l, D_h, [src, 64]! << 177 subs count, count, 64 << 178 b.hi L(loop64) << 179 << 180 /* Write the last iteration and copy 6 << 181 L(copy64_from_end): << 182 ldp E_l, E_h, [srcend, -64] << 183 stp A_l, A_h, [dst, 16] << 184 ldp A_l, A_h, [srcend, -48] << 185 stp B_l, B_h, [dst, 32] << 186 ldp B_l, B_h, [srcend, -32] << 187 stp C_l, C_h, [dst, 48] << 188 ldp C_l, C_h, [srcend, -16] << 189 stp D_l, D_h, [dst, 64] << 190 stp E_l, E_h, [dstend, -64] << 191 stp A_l, A_h, [dstend, -48] << 192 stp B_l, B_h, [dstend, -32] << 193 stp C_l, C_h, [dstend, -16] << 194 ret << 195 123 196 .p2align 4 !! 124 FUNC(memcpy) /* %o0=dst %o1=src %o2=len */ >> 125 EXPORT_SYMBOL(memcpy) 197 126 198 /* Large backwards copy for overlappin !! 127 sub %o0, %o1, %o4 199 Copy 16 bytes and then align dst to !! 128 mov %o0, %g7 200 L(copy_long_backwards): !! 129 9: 201 ldp D_l, D_h, [srcend, -16] !! 130 andcc %o4, 3, %o5 202 and tmp1, dstend, 15 !! 131 0: 203 sub srcend, srcend, tmp1 !! 132 bne 86f 204 sub count, count, tmp1 !! 133 cmp %o2, 15 205 ldp A_l, A_h, [srcend, -16] !! 134 206 stp D_l, D_h, [dstend, -16] !! 135 bleu 90f 207 ldp B_l, B_h, [srcend, -32] !! 136 andcc %o1, 3, %g0 208 ldp C_l, C_h, [srcend, -48] !! 137 209 ldp D_l, D_h, [srcend, -64]! !! 138 bne 78b 210 sub dstend, dstend, tmp1 !! 139 3: 211 subs count, count, 128 !! 140 andcc %o1, 4, %g0 212 b.ls L(copy64_from_start) !! 141 213 !! 142 be 2f 214 L(loop64_backwards): !! 143 mov %o2, %g1 215 stp A_l, A_h, [dstend, -16] !! 144 216 ldp A_l, A_h, [srcend, -16] !! 145 ld [%o1], %o4 217 stp B_l, B_h, [dstend, -32] !! 146 sub %g1, 4, %g1 218 ldp B_l, B_h, [srcend, -32] !! 147 st %o4, [%o0] 219 stp C_l, C_h, [dstend, -48] !! 148 add %o1, 4, %o1 220 ldp C_l, C_h, [srcend, -48] !! 149 add %o0, 4, %o0 221 stp D_l, D_h, [dstend, -64]! !! 150 2: 222 ldp D_l, D_h, [srcend, -64]! !! 151 andcc %g1, 0xffffff80, %g0 223 subs count, count, 64 !! 152 be 3f 224 b.hi L(loop64_backwards) !! 153 andcc %o0, 4, %g0 225 !! 154 226 /* Write the last iteration and copy 6 !! 155 be 82f + 4 227 L(copy64_from_start): !! 156 5: 228 ldp G_l, G_h, [src, 48] !! 157 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 229 stp A_l, A_h, [dstend, -16] !! 158 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 230 ldp A_l, A_h, [src, 32] !! 159 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 231 stp B_l, B_h, [dstend, -32] !! 160 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 232 ldp B_l, B_h, [src, 16] !! 161 sub %g1, 128, %g1 233 stp C_l, C_h, [dstend, -48] !! 162 add %o1, 128, %o1 234 ldp C_l, C_h, [src] !! 163 cmp %g1, 128 235 stp D_l, D_h, [dstend, -64] !! 164 bge 5b 236 stp G_l, G_h, [dstin, 48] !! 165 add %o0, 128, %o0 237 stp A_l, A_h, [dstin, 32] !! 166 3: 238 stp B_l, B_h, [dstin, 16] !! 167 andcc %g1, 0x70, %g4 239 stp C_l, C_h, [dstin] !! 168 be 80f >> 169 andcc %g1, 8, %g0 >> 170 >> 171 sethi %hi(80f), %o5 >> 172 srl %g4, 1, %o4 >> 173 add %g4, %o4, %o4 >> 174 add %o1, %g4, %o1 >> 175 sub %o5, %o4, %o5 >> 176 jmpl %o5 + %lo(80f), %g0 >> 177 add %o0, %g4, %o0 >> 178 >> 179 79: /* memcpy_table */ >> 180 >> 181 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 182 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 183 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 184 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 185 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 186 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 187 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 188 >> 189 80: /* memcpy_table_end */ >> 190 be 81f >> 191 andcc %g1, 4, %g0 >> 192 >> 193 ldd [%o1], %g2 >> 194 add %o0, 8, %o0 >> 195 st %g2, [%o0 - 0x08] >> 196 add %o1, 8, %o1 >> 197 st %g3, [%o0 - 0x04] >> 198 >> 199 81: /* memcpy_last7 */ >> 200 >> 201 be 1f >> 202 andcc %g1, 2, %g0 >> 203 >> 204 ld [%o1], %g2 >> 205 add %o1, 4, %o1 >> 206 st %g2, [%o0] >> 207 add %o0, 4, %o0 >> 208 1: >> 209 be 1f >> 210 andcc %g1, 1, %g0 >> 211 >> 212 lduh [%o1], %g2 >> 213 add %o1, 2, %o1 >> 214 sth %g2, [%o0] >> 215 add %o0, 2, %o0 >> 216 1: >> 217 be 1f >> 218 nop >> 219 >> 220 ldub [%o1], %g2 >> 221 stb %g2, [%o0] >> 222 1: >> 223 retl >> 224 mov %g7, %o0 >> 225 >> 226 82: /* ldd_std */ >> 227 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 228 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 229 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 230 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 231 subcc %g1, 128, %g1 >> 232 add %o1, 128, %o1 >> 233 cmp %g1, 128 >> 234 bge 82b >> 235 add %o0, 128, %o0 >> 236 >> 237 andcc %g1, 0x70, %g4 >> 238 be 84f >> 239 andcc %g1, 8, %g0 >> 240 >> 241 sethi %hi(84f), %o5 >> 242 add %o1, %g4, %o1 >> 243 sub %o5, %g4, %o5 >> 244 jmpl %o5 + %lo(84f), %g0 >> 245 add %o0, %g4, %o0 >> 246 >> 247 83: /* amemcpy_table */ >> 248 >> 249 MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 250 MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 251 MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 252 MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 253 MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 254 MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 255 MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 256 >> 257 84: /* amemcpy_table_end */ >> 258 be 85f >> 259 andcc %g1, 4, %g0 >> 260 >> 261 ldd [%o1], %g2 >> 262 add %o0, 8, %o0 >> 263 std %g2, [%o0 - 0x08] >> 264 add %o1, 8, %o1 >> 265 85: /* amemcpy_last7 */ >> 266 be 1f >> 267 andcc %g1, 2, %g0 >> 268 >> 269 ld [%o1], %g2 >> 270 add %o1, 4, %o1 >> 271 st %g2, [%o0] >> 272 add %o0, 4, %o0 >> 273 1: >> 274 be 1f >> 275 andcc %g1, 1, %g0 >> 276 >> 277 lduh [%o1], %g2 >> 278 add %o1, 2, %o1 >> 279 sth %g2, [%o0] >> 280 add %o0, 2, %o0 >> 281 1: >> 282 be 1f >> 283 nop >> 284 >> 285 ldub [%o1], %g2 >> 286 stb %g2, [%o0] >> 287 1: >> 288 retl >> 289 mov %g7, %o0 >> 290 >> 291 86: /* non_aligned */ >> 292 cmp %o2, 6 >> 293 bleu 88f >> 294 nop >> 295 >> 296 save %sp, -96, %sp >> 297 andcc %i0, 3, %g0 >> 298 be 61f >> 299 andcc %i0, 1, %g0 >> 300 be 60f >> 301 andcc %i0, 2, %g0 >> 302 >> 303 ldub [%i1], %g5 >> 304 add %i1, 1, %i1 >> 305 stb %g5, [%i0] >> 306 sub %i2, 1, %i2 >> 307 bne 61f >> 308 add %i0, 1, %i0 >> 309 60: >> 310 ldub [%i1], %g3 >> 311 add %i1, 2, %i1 >> 312 stb %g3, [%i0] >> 313 sub %i2, 2, %i2 >> 314 ldub [%i1 - 1], %g3 >> 315 add %i0, 2, %i0 >> 316 stb %g3, [%i0 - 1] >> 317 61: >> 318 and %i1, 3, %g2 >> 319 and %i2, 0xc, %g3 >> 320 and %i1, -4, %i1 >> 321 cmp %g3, 4 >> 322 sll %g2, 3, %g4 >> 323 mov 32, %g2 >> 324 be 4f >> 325 sub %g2, %g4, %l0 >> 326 >> 327 blu 3f >> 328 cmp %g3, 0x8 >> 329 >> 330 be 2f >> 331 srl %i2, 2, %g3 >> 332 >> 333 ld [%i1], %i3 >> 334 add %i0, -8, %i0 >> 335 ld [%i1 + 4], %i4 >> 336 b 8f >> 337 add %g3, 1, %g3 >> 338 2: >> 339 ld [%i1], %i4 >> 340 add %i0, -12, %i0 >> 341 ld [%i1 + 4], %i5 >> 342 add %g3, 2, %g3 >> 343 b 9f >> 344 add %i1, -4, %i1 >> 345 3: >> 346 ld [%i1], %g1 >> 347 add %i0, -4, %i0 >> 348 ld [%i1 + 4], %i3 >> 349 srl %i2, 2, %g3 >> 350 b 7f >> 351 add %i1, 4, %i1 >> 352 4: >> 353 ld [%i1], %i5 >> 354 cmp %i2, 7 >> 355 ld [%i1 + 4], %g1 >> 356 srl %i2, 2, %g3 >> 357 bleu 10f >> 358 add %i1, 8, %i1 >> 359 >> 360 ld [%i1], %i3 >> 361 add %g3, -1, %g3 >> 362 5: >> 363 sll %i5, %g4, %g2 >> 364 srl %g1, %l0, %g5 >> 365 or %g2, %g5, %g2 >> 366 st %g2, [%i0] >> 367 7: >> 368 ld [%i1 + 4], %i4 >> 369 sll %g1, %g4, %g2 >> 370 srl %i3, %l0, %g5 >> 371 or %g2, %g5, %g2 >> 372 st %g2, [%i0 + 4] >> 373 8: >> 374 ld [%i1 + 8], %i5 >> 375 sll %i3, %g4, %g2 >> 376 srl %i4, %l0, %g5 >> 377 or %g2, %g5, %g2 >> 378 st %g2, [%i0 + 8] >> 379 9: >> 380 ld [%i1 + 12], %g1 >> 381 sll %i4, %g4, %g2 >> 382 srl %i5, %l0, %g5 >> 383 addcc %g3, -4, %g3 >> 384 or %g2, %g5, %g2 >> 385 add %i1, 16, %i1 >> 386 st %g2, [%i0 + 12] >> 387 add %i0, 16, %i0 >> 388 bne,a 5b >> 389 ld [%i1], %i3 >> 390 10: >> 391 sll %i5, %g4, %g2 >> 392 srl %g1, %l0, %g5 >> 393 srl %l0, 3, %g3 >> 394 or %g2, %g5, %g2 >> 395 sub %i1, %g3, %i1 >> 396 andcc %i2, 2, %g0 >> 397 st %g2, [%i0] >> 398 be 1f >> 399 andcc %i2, 1, %g0 >> 400 >> 401 ldub [%i1], %g2 >> 402 add %i1, 2, %i1 >> 403 stb %g2, [%i0 + 4] >> 404 add %i0, 2, %i0 >> 405 ldub [%i1 - 1], %g2 >> 406 stb %g2, [%i0 + 3] >> 407 1: >> 408 be 1f >> 409 nop >> 410 ldub [%i1], %g2 >> 411 stb %g2, [%i0 + 4] >> 412 1: 240 ret 413 ret 241 SYM_FUNC_END(__pi_memcpy) !! 414 restore %g7, %g0, %o0 242 415 243 SYM_FUNC_ALIAS(__memcpy, __pi_memcpy) !! 416 88: /* short_end */ 244 EXPORT_SYMBOL(__memcpy) << 245 SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) << 246 EXPORT_SYMBOL(memcpy) << 247 417 248 SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy) !! 418 and %o2, 0xe, %o3 249 !! 419 20: 250 SYM_FUNC_ALIAS(__memmove, __pi_memmove) !! 420 sethi %hi(89f), %o5 251 EXPORT_SYMBOL(__memmove) !! 421 sll %o3, 3, %o4 252 SYM_FUNC_ALIAS_WEAK(memmove, __memmove) !! 422 add %o0, %o3, %o0 253 EXPORT_SYMBOL(memmove) !! 423 sub %o5, %o4, %o5 >> 424 add %o1, %o3, %o1 >> 425 jmpl %o5 + %lo(89f), %g0 >> 426 andcc %o2, 1, %g0 >> 427 >> 428 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) >> 429 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) >> 430 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) >> 431 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) >> 432 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) >> 433 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) >> 434 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) >> 435 >> 436 89: /* short_table_end */ >> 437 >> 438 be 1f >> 439 nop >> 440 >> 441 ldub [%o1], %g2 >> 442 stb %g2, [%o0] >> 443 1: >> 444 retl >> 445 mov %g7, %o0 >> 446 >> 447 90: /* short_aligned_end */ >> 448 bne 88b >> 449 andcc %o2, 8, %g0 >> 450 >> 451 be 1f >> 452 andcc %o2, 4, %g0 >> 453 >> 454 ld [%o1 + 0x00], %g2 >> 455 ld [%o1 + 0x04], %g3 >> 456 add %o1, 8, %o1 >> 457 st %g2, [%o0 + 0x00] >> 458 st %g3, [%o0 + 0x04] >> 459 add %o0, 8, %o0 >> 460 1: >> 461 b 81b >> 462 mov %o2, %g1

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

TOMOYO Linux Cross Reference
Linux/arch/arm64/lib/memcpy.S

Diff markup

Differences between /arch/arm64/lib/memcpy.S (Version linux-6.12-rc7) and /arch/sparc/lib/memcpy.S (Version linux-5.0.21)

TOMOYO Linux Cross Reference Linux/arch/arm64/lib/memcpy.S

Diff markup

Differences between /arch/arm64/lib/memcpy.S (Version linux-6.12-rc7) and /arch/sparc/lib/memcpy.S (Version linux-5.0.21)

TOMOYO Linux Cross Reference
Linux/arch/arm64/lib/memcpy.S