Linux/arch/arm64/lib/memcpy.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

1 /* SPDX-License-Identifier: GPL-2.0-only */ !! 1 /* memcpy.S: Sparc optimized memcpy and memmove code 2 /* !! 2 * Hand optimized from GNU libc's memcpy and memmove 3 * Copyright (c) 2012-2021, Arm Limited. !! 3 * Copyright (C) 1991,1996 Free Software Foundation 4 * !! 4 * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi) 5 * Adapted from the original at: !! 5 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 6 * https://github.com/ARM-software/optimized-r !! 6 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) >> 7 * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 7 */ 8 */ 8 9 9 #include <linux/linkage.h> !! 10 #include <asm/export.h> 10 #include <asm/assembler.h> !! 11 #define FUNC(x) \ >> 12 .globl x; \ >> 13 .type x,@function; \ >> 14 .align 4; \ >> 15 x: >> 16 >> 17 /* Both these macros have to start with exactly the same insn */ >> 18 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 19 ldd [%src + (offset) + 0x00], %t0; \ >> 20 ldd [%src + (offset) + 0x08], %t2; \ >> 21 ldd [%src + (offset) + 0x10], %t4; \ >> 22 ldd [%src + (offset) + 0x18], %t6; \ >> 23 st %t0, [%dst + (offset) + 0x00]; \ >> 24 st %t1, [%dst + (offset) + 0x04]; \ >> 25 st %t2, [%dst + (offset) + 0x08]; \ >> 26 st %t3, [%dst + (offset) + 0x0c]; \ >> 27 st %t4, [%dst + (offset) + 0x10]; \ >> 28 st %t5, [%dst + (offset) + 0x14]; \ >> 29 st %t6, [%dst + (offset) + 0x18]; \ >> 30 st %t7, [%dst + (offset) + 0x1c]; >> 31 >> 32 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 33 ldd [%src + (offset) + 0x00], %t0; \ >> 34 ldd [%src + (offset) + 0x08], %t2; \ >> 35 ldd [%src + (offset) + 0x10], %t4; \ >> 36 ldd [%src + (offset) + 0x18], %t6; \ >> 37 std %t0, [%dst + (offset) + 0x00]; \ >> 38 std %t2, [%dst + (offset) + 0x08]; \ >> 39 std %t4, [%dst + (offset) + 0x10]; \ >> 40 std %t6, [%dst + (offset) + 0x18]; >> 41 >> 42 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ >> 43 ldd [%src - (offset) - 0x10], %t0; \ >> 44 ldd [%src - (offset) - 0x08], %t2; \ >> 45 st %t0, [%dst - (offset) - 0x10]; \ >> 46 st %t1, [%dst - (offset) - 0x0c]; \ >> 47 st %t2, [%dst - (offset) - 0x08]; \ >> 48 st %t3, [%dst - (offset) - 0x04]; >> 49 >> 50 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ >> 51 ldd [%src - (offset) - 0x10], %t0; \ >> 52 ldd [%src - (offset) - 0x08], %t2; \ >> 53 std %t0, [%dst - (offset) - 0x10]; \ >> 54 std %t2, [%dst - (offset) - 0x08]; >> 55 >> 56 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ >> 57 ldub [%src - (offset) - 0x02], %t0; \ >> 58 ldub [%src - (offset) - 0x01], %t1; \ >> 59 stb %t0, [%dst - (offset) - 0x02]; \ >> 60 stb %t1, [%dst - (offset) - 0x01]; 11 61 12 /* Assumptions: !! 62 .text 13 * !! 63 .align 4 14 * ARMv8-a, AArch64, unaligned accesses. << 15 * << 16 */ << 17 << 18 #define L(label) .L ## label << 19 << 20 #define dstin x0 << 21 #define src x1 << 22 #define count x2 << 23 #define dst x3 << 24 #define srcend x4 << 25 #define dstend x5 << 26 #define A_l x6 << 27 #define A_lw w6 << 28 #define A_h x7 << 29 #define B_l x8 << 30 #define B_lw w8 << 31 #define B_h x9 << 32 #define C_l x10 << 33 #define C_lw w10 << 34 #define C_h x11 << 35 #define D_l x12 << 36 #define D_h x13 << 37 #define E_l x14 << 38 #define E_h x15 << 39 #define F_l x16 << 40 #define F_h x17 << 41 #define G_l count << 42 #define G_h dst << 43 #define H_l src << 44 #define H_h srcend << 45 #define tmp1 x14 << 46 << 47 /* This implementation handles overlaps and su << 48 from a single entry point. It uses unalign << 49 sequences to keep the code small, simple an << 50 << 51 Copies are split into 3 main cases: small c << 52 copies of up to 128 bytes, and large copies << 53 check is negligible since it is only requir << 54 << 55 Large copies use a software pipelined loop << 56 The destination pointer is 16-byte aligned << 57 The loop tail is handled by always copying << 58 */ << 59 << 60 SYM_FUNC_START(__pi_memcpy) << 61 add srcend, src, count << 62 add dstend, dstin, count << 63 cmp count, 128 << 64 b.hi L(copy_long) << 65 cmp count, 32 << 66 b.hi L(copy32_128) << 67 << 68 /* Small copies: 0..32 bytes. */ << 69 cmp count, 16 << 70 b.lo L(copy16) << 71 ldp A_l, A_h, [src] << 72 ldp D_l, D_h, [srcend, -16] << 73 stp A_l, A_h, [dstin] << 74 stp D_l, D_h, [dstend, -16] << 75 ret << 76 << 77 /* Copy 8-15 bytes. */ << 78 L(copy16): << 79 tbz count, 3, L(copy8) << 80 ldr A_l, [src] << 81 ldr A_h, [srcend, -8] << 82 str A_l, [dstin] << 83 str A_h, [dstend, -8] << 84 ret << 85 << 86 .p2align 3 << 87 /* Copy 4-7 bytes. */ << 88 L(copy8): << 89 tbz count, 2, L(copy4) << 90 ldr A_lw, [src] << 91 ldr B_lw, [srcend, -4] << 92 str A_lw, [dstin] << 93 str B_lw, [dstend, -4] << 94 ret << 95 << 96 /* Copy 0..3 bytes using a branchless << 97 L(copy4): << 98 cbz count, L(copy0) << 99 lsr tmp1, count, 1 << 100 ldrb A_lw, [src] << 101 ldrb C_lw, [srcend, -1] << 102 ldrb B_lw, [src, tmp1] << 103 strb A_lw, [dstin] << 104 strb B_lw, [dstin, tmp1] << 105 strb C_lw, [dstend, -1] << 106 L(copy0): << 107 ret << 108 64 109 .p2align 4 !! 65 FUNC(memmove) 110 /* Medium copies: 33..128 bytes. */ !! 66 EXPORT_SYMBOL(memmove) 111 L(copy32_128): !! 67 cmp %o0, %o1 112 ldp A_l, A_h, [src] !! 68 mov %o0, %g7 113 ldp B_l, B_h, [src, 16] !! 69 bleu 9f 114 ldp C_l, C_h, [srcend, -32] !! 70 sub %o0, %o1, %o4 115 ldp D_l, D_h, [srcend, -16] !! 71 116 cmp count, 64 !! 72 add %o1, %o2, %o3 117 b.hi L(copy128) !! 73 cmp %o3, %o0 118 stp A_l, A_h, [dstin] !! 74 bleu 0f 119 stp B_l, B_h, [dstin, 16] !! 75 andcc %o4, 3, %o5 120 stp C_l, C_h, [dstend, -32] !! 76 121 stp D_l, D_h, [dstend, -16] !! 77 add %o1, %o2, %o1 122 ret !! 78 add %o0, %o2, %o0 123 !! 79 sub %o1, 1, %o1 124 .p2align 4 !! 80 sub %o0, 1, %o0 125 /* Copy 65..128 bytes. */ !! 81 126 L(copy128): !! 82 1: /* reverse_bytes */ 127 ldp E_l, E_h, [src, 32] !! 83 128 ldp F_l, F_h, [src, 48] !! 84 ldub [%o1], %o4 129 cmp count, 96 !! 85 subcc %o2, 1, %o2 130 b.ls L(copy96) !! 86 stb %o4, [%o0] 131 ldp G_l, G_h, [srcend, -64] !! 87 sub %o1, 1, %o1 132 ldp H_l, H_h, [srcend, -48] !! 88 bne 1b 133 stp G_l, G_h, [dstend, -64] !! 89 sub %o0, 1, %o0 134 stp H_l, H_h, [dstend, -48] !! 90 135 L(copy96): !! 91 retl 136 stp A_l, A_h, [dstin] !! 92 mov %g7, %o0 137 stp B_l, B_h, [dstin, 16] !! 93 138 stp E_l, E_h, [dstin, 32] !! 94 /* NOTE: This code is executed just for the cases, 139 stp F_l, F_h, [dstin, 48] !! 95 where %src (=%o1) & 3 is != 0. 140 stp C_l, C_h, [dstend, -32] !! 96 We need to align it to 4. So, for (%src & 3) 141 stp D_l, D_h, [dstend, -16] !! 97 1 we need to do ldub,lduh 142 ret !! 98 2 lduh >> 99 3 just ldub >> 100 so even if it looks weird, the branches >> 101 are correct here. -jj >> 102 */ >> 103 78: /* dword_align */ 143 104 144 .p2align 4 !! 105 andcc %o1, 1, %g0 145 /* Copy more than 128 bytes. */ !! 106 be 4f 146 L(copy_long): !! 107 andcc %o1, 2, %g0 147 /* Use backwards copy if there is an o !! 108 148 sub tmp1, dstin, src !! 109 ldub [%o1], %g2 149 cbz tmp1, L(copy0) !! 110 add %o1, 1, %o1 150 cmp tmp1, count !! 111 stb %g2, [%o0] 151 b.lo L(copy_long_backwards) !! 112 sub %o2, 1, %o2 152 !! 113 bne 3f 153 /* Copy 16 bytes and then align dst to !! 114 add %o0, 1, %o0 154 !! 115 4: 155 ldp D_l, D_h, [src] !! 116 lduh [%o1], %g2 156 and tmp1, dstin, 15 !! 117 add %o1, 2, %o1 157 bic dst, dstin, 15 !! 118 sth %g2, [%o0] 158 sub src, src, tmp1 !! 119 sub %o2, 2, %o2 159 add count, count, tmp1 /* Cou !! 120 b 3f 160 ldp A_l, A_h, [src, 16] !! 121 add %o0, 2, %o0 161 stp D_l, D_h, [dstin] << 162 ldp B_l, B_h, [src, 32] << 163 ldp C_l, C_h, [src, 48] << 164 ldp D_l, D_h, [src, 64]! << 165 subs count, count, 128 + 16 /* Tes << 166 b.ls L(copy64_from_end) << 167 << 168 L(loop64): << 169 stp A_l, A_h, [dst, 16] << 170 ldp A_l, A_h, [src, 16] << 171 stp B_l, B_h, [dst, 32] << 172 ldp B_l, B_h, [src, 32] << 173 stp C_l, C_h, [dst, 48] << 174 ldp C_l, C_h, [src, 48] << 175 stp D_l, D_h, [dst, 64]! << 176 ldp D_l, D_h, [src, 64]! << 177 subs count, count, 64 << 178 b.hi L(loop64) << 179 << 180 /* Write the last iteration and copy 6 << 181 L(copy64_from_end): << 182 ldp E_l, E_h, [srcend, -64] << 183 stp A_l, A_h, [dst, 16] << 184 ldp A_l, A_h, [srcend, -48] << 185 stp B_l, B_h, [dst, 32] << 186 ldp B_l, B_h, [srcend, -32] << 187 stp C_l, C_h, [dst, 48] << 188 ldp C_l, C_h, [srcend, -16] << 189 stp D_l, D_h, [dst, 64] << 190 stp E_l, E_h, [dstend, -64] << 191 stp A_l, A_h, [dstend, -48] << 192 stp B_l, B_h, [dstend, -32] << 193 stp C_l, C_h, [dstend, -16] << 194 ret << 195 122 196 .p2align 4 !! 123 FUNC(memcpy) /* %o0=dst %o1=src %o2=len */ >> 124 EXPORT_SYMBOL(memcpy) 197 125 198 /* Large backwards copy for overlappin !! 126 sub %o0, %o1, %o4 199 Copy 16 bytes and then align dst to !! 127 mov %o0, %g7 200 L(copy_long_backwards): !! 128 9: 201 ldp D_l, D_h, [srcend, -16] !! 129 andcc %o4, 3, %o5 202 and tmp1, dstend, 15 !! 130 0: 203 sub srcend, srcend, tmp1 !! 131 bne 86f 204 sub count, count, tmp1 !! 132 cmp %o2, 15 205 ldp A_l, A_h, [srcend, -16] !! 133 206 stp D_l, D_h, [dstend, -16] !! 134 bleu 90f 207 ldp B_l, B_h, [srcend, -32] !! 135 andcc %o1, 3, %g0 208 ldp C_l, C_h, [srcend, -48] !! 136 209 ldp D_l, D_h, [srcend, -64]! !! 137 bne 78b 210 sub dstend, dstend, tmp1 !! 138 3: 211 subs count, count, 128 !! 139 andcc %o1, 4, %g0 212 b.ls L(copy64_from_start) !! 140 213 !! 141 be 2f 214 L(loop64_backwards): !! 142 mov %o2, %g1 215 stp A_l, A_h, [dstend, -16] !! 143 216 ldp A_l, A_h, [srcend, -16] !! 144 ld [%o1], %o4 217 stp B_l, B_h, [dstend, -32] !! 145 sub %g1, 4, %g1 218 ldp B_l, B_h, [srcend, -32] !! 146 st %o4, [%o0] 219 stp C_l, C_h, [dstend, -48] !! 147 add %o1, 4, %o1 220 ldp C_l, C_h, [srcend, -48] !! 148 add %o0, 4, %o0 221 stp D_l, D_h, [dstend, -64]! !! 149 2: 222 ldp D_l, D_h, [srcend, -64]! !! 150 andcc %g1, 0xffffff80, %g0 223 subs count, count, 64 !! 151 be 3f 224 b.hi L(loop64_backwards) !! 152 andcc %o0, 4, %g0 225 !! 153 226 /* Write the last iteration and copy 6 !! 154 be 82f + 4 227 L(copy64_from_start): !! 155 5: 228 ldp G_l, G_h, [src, 48] !! 156 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 229 stp A_l, A_h, [dstend, -16] !! 157 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 230 ldp A_l, A_h, [src, 32] !! 158 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 231 stp B_l, B_h, [dstend, -32] !! 159 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 232 ldp B_l, B_h, [src, 16] !! 160 sub %g1, 128, %g1 233 stp C_l, C_h, [dstend, -48] !! 161 add %o1, 128, %o1 234 ldp C_l, C_h, [src] !! 162 cmp %g1, 128 235 stp D_l, D_h, [dstend, -64] !! 163 bge 5b 236 stp G_l, G_h, [dstin, 48] !! 164 add %o0, 128, %o0 237 stp A_l, A_h, [dstin, 32] !! 165 3: 238 stp B_l, B_h, [dstin, 16] !! 166 andcc %g1, 0x70, %g4 239 stp C_l, C_h, [dstin] !! 167 be 80f >> 168 andcc %g1, 8, %g0 >> 169 >> 170 sethi %hi(80f), %o5 >> 171 srl %g4, 1, %o4 >> 172 add %g4, %o4, %o4 >> 173 add %o1, %g4, %o1 >> 174 sub %o5, %o4, %o5 >> 175 jmpl %o5 + %lo(80f), %g0 >> 176 add %o0, %g4, %o0 >> 177 >> 178 79: /* memcpy_table */ >> 179 >> 180 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 181 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 182 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 183 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 184 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 185 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 186 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 187 >> 188 80: /* memcpy_table_end */ >> 189 be 81f >> 190 andcc %g1, 4, %g0 >> 191 >> 192 ldd [%o1], %g2 >> 193 add %o0, 8, %o0 >> 194 st %g2, [%o0 - 0x08] >> 195 add %o1, 8, %o1 >> 196 st %g3, [%o0 - 0x04] >> 197 >> 198 81: /* memcpy_last7 */ >> 199 >> 200 be 1f >> 201 andcc %g1, 2, %g0 >> 202 >> 203 ld [%o1], %g2 >> 204 add %o1, 4, %o1 >> 205 st %g2, [%o0] >> 206 add %o0, 4, %o0 >> 207 1: >> 208 be 1f >> 209 andcc %g1, 1, %g0 >> 210 >> 211 lduh [%o1], %g2 >> 212 add %o1, 2, %o1 >> 213 sth %g2, [%o0] >> 214 add %o0, 2, %o0 >> 215 1: >> 216 be 1f >> 217 nop >> 218 >> 219 ldub [%o1], %g2 >> 220 stb %g2, [%o0] >> 221 1: >> 222 retl >> 223 mov %g7, %o0 >> 224 >> 225 82: /* ldd_std */ >> 226 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 227 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 228 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 229 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 230 subcc %g1, 128, %g1 >> 231 add %o1, 128, %o1 >> 232 cmp %g1, 128 >> 233 bge 82b >> 234 add %o0, 128, %o0 >> 235 >> 236 andcc %g1, 0x70, %g4 >> 237 be 84f >> 238 andcc %g1, 8, %g0 >> 239 >> 240 sethi %hi(84f), %o5 >> 241 add %o1, %g4, %o1 >> 242 sub %o5, %g4, %o5 >> 243 jmpl %o5 + %lo(84f), %g0 >> 244 add %o0, %g4, %o0 >> 245 >> 246 83: /* amemcpy_table */ >> 247 >> 248 MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 249 MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 250 MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 251 MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 252 MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 253 MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 254 MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 255 >> 256 84: /* amemcpy_table_end */ >> 257 be 85f >> 258 andcc %g1, 4, %g0 >> 259 >> 260 ldd [%o1], %g2 >> 261 add %o0, 8, %o0 >> 262 std %g2, [%o0 - 0x08] >> 263 add %o1, 8, %o1 >> 264 85: /* amemcpy_last7 */ >> 265 be 1f >> 266 andcc %g1, 2, %g0 >> 267 >> 268 ld [%o1], %g2 >> 269 add %o1, 4, %o1 >> 270 st %g2, [%o0] >> 271 add %o0, 4, %o0 >> 272 1: >> 273 be 1f >> 274 andcc %g1, 1, %g0 >> 275 >> 276 lduh [%o1], %g2 >> 277 add %o1, 2, %o1 >> 278 sth %g2, [%o0] >> 279 add %o0, 2, %o0 >> 280 1: >> 281 be 1f >> 282 nop >> 283 >> 284 ldub [%o1], %g2 >> 285 stb %g2, [%o0] >> 286 1: >> 287 retl >> 288 mov %g7, %o0 >> 289 >> 290 86: /* non_aligned */ >> 291 cmp %o2, 6 >> 292 bleu 88f >> 293 nop >> 294 >> 295 save %sp, -96, %sp >> 296 andcc %i0, 3, %g0 >> 297 be 61f >> 298 andcc %i0, 1, %g0 >> 299 be 60f >> 300 andcc %i0, 2, %g0 >> 301 >> 302 ldub [%i1], %g5 >> 303 add %i1, 1, %i1 >> 304 stb %g5, [%i0] >> 305 sub %i2, 1, %i2 >> 306 bne 61f >> 307 add %i0, 1, %i0 >> 308 60: >> 309 ldub [%i1], %g3 >> 310 add %i1, 2, %i1 >> 311 stb %g3, [%i0] >> 312 sub %i2, 2, %i2 >> 313 ldub [%i1 - 1], %g3 >> 314 add %i0, 2, %i0 >> 315 stb %g3, [%i0 - 1] >> 316 61: >> 317 and %i1, 3, %g2 >> 318 and %i2, 0xc, %g3 >> 319 and %i1, -4, %i1 >> 320 cmp %g3, 4 >> 321 sll %g2, 3, %g4 >> 322 mov 32, %g2 >> 323 be 4f >> 324 sub %g2, %g4, %l0 >> 325 >> 326 blu 3f >> 327 cmp %g3, 0x8 >> 328 >> 329 be 2f >> 330 srl %i2, 2, %g3 >> 331 >> 332 ld [%i1], %i3 >> 333 add %i0, -8, %i0 >> 334 ld [%i1 + 4], %i4 >> 335 b 8f >> 336 add %g3, 1, %g3 >> 337 2: >> 338 ld [%i1], %i4 >> 339 add %i0, -12, %i0 >> 340 ld [%i1 + 4], %i5 >> 341 add %g3, 2, %g3 >> 342 b 9f >> 343 add %i1, -4, %i1 >> 344 3: >> 345 ld [%i1], %g1 >> 346 add %i0, -4, %i0 >> 347 ld [%i1 + 4], %i3 >> 348 srl %i2, 2, %g3 >> 349 b 7f >> 350 add %i1, 4, %i1 >> 351 4: >> 352 ld [%i1], %i5 >> 353 cmp %i2, 7 >> 354 ld [%i1 + 4], %g1 >> 355 srl %i2, 2, %g3 >> 356 bleu 10f >> 357 add %i1, 8, %i1 >> 358 >> 359 ld [%i1], %i3 >> 360 add %g3, -1, %g3 >> 361 5: >> 362 sll %i5, %g4, %g2 >> 363 srl %g1, %l0, %g5 >> 364 or %g2, %g5, %g2 >> 365 st %g2, [%i0] >> 366 7: >> 367 ld [%i1 + 4], %i4 >> 368 sll %g1, %g4, %g2 >> 369 srl %i3, %l0, %g5 >> 370 or %g2, %g5, %g2 >> 371 st %g2, [%i0 + 4] >> 372 8: >> 373 ld [%i1 + 8], %i5 >> 374 sll %i3, %g4, %g2 >> 375 srl %i4, %l0, %g5 >> 376 or %g2, %g5, %g2 >> 377 st %g2, [%i0 + 8] >> 378 9: >> 379 ld [%i1 + 12], %g1 >> 380 sll %i4, %g4, %g2 >> 381 srl %i5, %l0, %g5 >> 382 addcc %g3, -4, %g3 >> 383 or %g2, %g5, %g2 >> 384 add %i1, 16, %i1 >> 385 st %g2, [%i0 + 12] >> 386 add %i0, 16, %i0 >> 387 bne,a 5b >> 388 ld [%i1], %i3 >> 389 10: >> 390 sll %i5, %g4, %g2 >> 391 srl %g1, %l0, %g5 >> 392 srl %l0, 3, %g3 >> 393 or %g2, %g5, %g2 >> 394 sub %i1, %g3, %i1 >> 395 andcc %i2, 2, %g0 >> 396 st %g2, [%i0] >> 397 be 1f >> 398 andcc %i2, 1, %g0 >> 399 >> 400 ldub [%i1], %g2 >> 401 add %i1, 2, %i1 >> 402 stb %g2, [%i0 + 4] >> 403 add %i0, 2, %i0 >> 404 ldub [%i1 - 1], %g2 >> 405 stb %g2, [%i0 + 3] >> 406 1: >> 407 be 1f >> 408 nop >> 409 ldub [%i1], %g2 >> 410 stb %g2, [%i0 + 4] >> 411 1: 240 ret 412 ret 241 SYM_FUNC_END(__pi_memcpy) !! 413 restore %g7, %g0, %o0 242 414 243 SYM_FUNC_ALIAS(__memcpy, __pi_memcpy) !! 415 88: /* short_end */ 244 EXPORT_SYMBOL(__memcpy) << 245 SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) << 246 EXPORT_SYMBOL(memcpy) << 247 416 248 SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy) !! 417 and %o2, 0xe, %o3 249 !! 418 20: 250 SYM_FUNC_ALIAS(__memmove, __pi_memmove) !! 419 sethi %hi(89f), %o5 251 EXPORT_SYMBOL(__memmove) !! 420 sll %o3, 3, %o4 252 SYM_FUNC_ALIAS_WEAK(memmove, __memmove) !! 421 add %o0, %o3, %o0 253 EXPORT_SYMBOL(memmove) !! 422 sub %o5, %o4, %o5 >> 423 add %o1, %o3, %o1 >> 424 jmpl %o5 + %lo(89f), %g0 >> 425 andcc %o2, 1, %g0 >> 426 >> 427 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) >> 428 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) >> 429 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) >> 430 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) >> 431 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) >> 432 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) >> 433 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) >> 434 >> 435 89: /* short_table_end */ >> 436 >> 437 be 1f >> 438 nop >> 439 >> 440 ldub [%o1], %g2 >> 441 stb %g2, [%o0] >> 442 1: >> 443 retl >> 444 mov %g7, %o0 >> 445 >> 446 90: /* short_aligned_end */ >> 447 bne 88b >> 448 andcc %o2, 8, %g0 >> 449 >> 450 be 1f >> 451 andcc %o2, 4, %g0 >> 452 >> 453 ld [%o1 + 0x00], %g2 >> 454 ld [%o1 + 0x04], %g3 >> 455 add %o1, 8, %o1 >> 456 st %g2, [%o0 + 0x00] >> 457 st %g3, [%o0 + 0x04] >> 458 add %o0, 8, %o0 >> 459 1: >> 460 b 81b >> 461 mov %o2, %g1

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

TOMOYO Linux Cross Reference
Linux/arch/arm64/lib/memcpy.S

Diff markup

Differences between /arch/arm64/lib/memcpy.S (Version linux-6.12-rc7) and /arch/sparc/lib/memcpy.S (Version linux-4.11.12)

TOMOYO Linux Cross Reference Linux/arch/arm64/lib/memcpy.S

Diff markup

Differences between /arch/arm64/lib/memcpy.S (Version linux-6.12-rc7) and /arch/sparc/lib/memcpy.S (Version linux-4.11.12)

TOMOYO Linux Cross Reference
Linux/arch/arm64/lib/memcpy.S