1 /* SPDX-License-Identifier: GPL-2.0-only */ !! 1 /* memcpy.S: Sparc optimized memcpy and memmove code 2 /* !! 2 * Hand optimized from GNU libc's memcpy and memmove 3 * Copyright (c) 2012-2021, Arm Limited. !! 3 * Copyright (C) 1991,1996 Free Software Foundation 4 * !! 4 * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi) 5 * Adapted from the original at: !! 5 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 6 * https://github.com/ARM-software/optimized-r !! 6 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) >> 7 * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 7 */ 8 */ 8 9 9 #include <linux/linkage.h> !! 10 #ifdef __KERNEL__ 10 #include <asm/assembler.h> << 11 11 12 /* Assumptions: !! 12 #define FUNC(x) \ 13 * !! 13 .globl x; \ 14 * ARMv8-a, AArch64, unaligned accesses. !! 14 .type x,@function; \ 15 * !! 15 .align 4; \ >> 16 x: >> 17 >> 18 #undef FASTER_REVERSE >> 19 #undef FASTER_NONALIGNED >> 20 #define FASTER_ALIGNED >> 21 >> 22 /* In kernel these functions don't return a value. >> 23 * One should use macros in asm/string.h for that purpose. >> 24 * We return 0, so that bugs are more apparent. 16 */ 25 */ >> 26 #define SETUP_RETL >> 27 #define RETL_INSN clr %o0 17 28 18 #define L(label) .L ## label !! 29 #else 19 30 20 #define dstin x0 !! 31 /* libc */ 21 #define src x1 !! 32 22 #define count x2 !! 33 #include "DEFS.h" 23 #define dst x3 !! 34 24 #define srcend x4 !! 35 #define FASTER_REVERSE 25 #define dstend x5 !! 36 #define FASTER_NONALIGNED 26 #define A_l x6 !! 37 #define FASTER_ALIGNED 27 #define A_lw w6 !! 38 28 #define A_h x7 !! 39 #define SETUP_RETL mov %o0, %g6 29 #define B_l x8 !! 40 #define RETL_INSN mov %g6, %o0 30 #define B_lw w8 !! 41 31 #define B_h x9 !! 42 #endif 32 #define C_l x10 !! 43 33 #define C_lw w10 !! 44 /* Both these macros have to start with exactly the same insn */ 34 #define C_h x11 !! 45 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 35 #define D_l x12 !! 46 ldd [%src + (offset) + 0x00], %t0; \ 36 #define D_h x13 !! 47 ldd [%src + (offset) + 0x08], %t2; \ 37 #define E_l x14 !! 48 ldd [%src + (offset) + 0x10], %t4; \ 38 #define E_h x15 !! 49 ldd [%src + (offset) + 0x18], %t6; \ 39 #define F_l x16 !! 50 st %t0, [%dst + (offset) + 0x00]; \ 40 #define F_h x17 !! 51 st %t1, [%dst + (offset) + 0x04]; \ 41 #define G_l count !! 52 st %t2, [%dst + (offset) + 0x08]; \ 42 #define G_h dst !! 53 st %t3, [%dst + (offset) + 0x0c]; \ 43 #define H_l src !! 54 st %t4, [%dst + (offset) + 0x10]; \ 44 #define H_h srcend !! 55 st %t5, [%dst + (offset) + 0x14]; \ 45 #define tmp1 x14 !! 56 st %t6, [%dst + (offset) + 0x18]; \ 46 !! 57 st %t7, [%dst + (offset) + 0x1c]; 47 /* This implementation handles overlaps and su !! 58 48 from a single entry point. It uses unalign !! 59 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 49 sequences to keep the code small, simple an !! 60 ldd [%src + (offset) + 0x00], %t0; \ 50 !! 61 ldd [%src + (offset) + 0x08], %t2; \ 51 Copies are split into 3 main cases: small c !! 62 ldd [%src + (offset) + 0x10], %t4; \ 52 copies of up to 128 bytes, and large copies !! 63 ldd [%src + (offset) + 0x18], %t6; \ 53 check is negligible since it is only requir !! 64 std %t0, [%dst + (offset) + 0x00]; \ 54 !! 65 std %t2, [%dst + (offset) + 0x08]; \ 55 Large copies use a software pipelined loop !! 66 std %t4, [%dst + (offset) + 0x10]; \ 56 The destination pointer is 16-byte aligned !! 67 std %t6, [%dst + (offset) + 0x18]; 57 The loop tail is handled by always copying !! 68 58 */ !! 69 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ 59 !! 70 ldd [%src - (offset) - 0x10], %t0; \ 60 SYM_FUNC_START(__pi_memcpy) !! 71 ldd [%src - (offset) - 0x08], %t2; \ 61 add srcend, src, count !! 72 st %t0, [%dst - (offset) - 0x10]; \ 62 add dstend, dstin, count !! 73 st %t1, [%dst - (offset) - 0x0c]; \ 63 cmp count, 128 !! 74 st %t2, [%dst - (offset) - 0x08]; \ 64 b.hi L(copy_long) !! 75 st %t3, [%dst - (offset) - 0x04]; 65 cmp count, 32 !! 76 66 b.hi L(copy32_128) !! 77 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ 67 !! 78 ldd [%src - (offset) - 0x10], %t0; \ 68 /* Small copies: 0..32 bytes. */ !! 79 ldd [%src - (offset) - 0x08], %t2; \ 69 cmp count, 16 !! 80 std %t0, [%dst - (offset) - 0x10]; \ 70 b.lo L(copy16) !! 81 std %t2, [%dst - (offset) - 0x08]; 71 ldp A_l, A_h, [src] !! 82 72 ldp D_l, D_h, [srcend, -16] !! 83 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ 73 stp A_l, A_h, [dstin] !! 84 ldub [%src - (offset) - 0x02], %t0; \ 74 stp D_l, D_h, [dstend, -16] !! 85 ldub [%src - (offset) - 0x01], %t1; \ 75 ret !! 86 stb %t0, [%dst - (offset) - 0x02]; \ 76 !! 87 stb %t1, [%dst - (offset) - 0x01]; 77 /* Copy 8-15 bytes. */ !! 88 78 L(copy16): !! 89 /* Both these macros have to start with exactly the same insn */ 79 tbz count, 3, L(copy8) !! 90 #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 80 ldr A_l, [src] !! 91 ldd [%src - (offset) - 0x20], %t0; \ 81 ldr A_h, [srcend, -8] !! 92 ldd [%src - (offset) - 0x18], %t2; \ 82 str A_l, [dstin] !! 93 ldd [%src - (offset) - 0x10], %t4; \ 83 str A_h, [dstend, -8] !! 94 ldd [%src - (offset) - 0x08], %t6; \ 84 ret !! 95 st %t0, [%dst - (offset) - 0x20]; \ 85 !! 96 st %t1, [%dst - (offset) - 0x1c]; \ 86 .p2align 3 !! 97 st %t2, [%dst - (offset) - 0x18]; \ 87 /* Copy 4-7 bytes. */ !! 98 st %t3, [%dst - (offset) - 0x14]; \ 88 L(copy8): !! 99 st %t4, [%dst - (offset) - 0x10]; \ 89 tbz count, 2, L(copy4) !! 100 st %t5, [%dst - (offset) - 0x0c]; \ 90 ldr A_lw, [src] !! 101 st %t6, [%dst - (offset) - 0x08]; \ 91 ldr B_lw, [srcend, -4] !! 102 st %t7, [%dst - (offset) - 0x04]; 92 str A_lw, [dstin] !! 103 93 str B_lw, [dstend, -4] !! 104 #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 94 ret !! 105 ldd [%src - (offset) - 0x20], %t0; \ 95 !! 106 ldd [%src - (offset) - 0x18], %t2; \ 96 /* Copy 0..3 bytes using a branchless !! 107 ldd [%src - (offset) - 0x10], %t4; \ 97 L(copy4): !! 108 ldd [%src - (offset) - 0x08], %t6; \ 98 cbz count, L(copy0) !! 109 std %t0, [%dst - (offset) - 0x20]; \ 99 lsr tmp1, count, 1 !! 110 std %t2, [%dst - (offset) - 0x18]; \ 100 ldrb A_lw, [src] !! 111 std %t4, [%dst - (offset) - 0x10]; \ 101 ldrb C_lw, [srcend, -1] !! 112 std %t6, [%dst - (offset) - 0x08]; 102 ldrb B_lw, [src, tmp1] !! 113 103 strb A_lw, [dstin] !! 114 #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ 104 strb B_lw, [dstin, tmp1] !! 115 ldd [%src + (offset) + 0x00], %t0; \ 105 strb C_lw, [dstend, -1] !! 116 ldd [%src + (offset) + 0x08], %t2; \ 106 L(copy0): !! 117 st %t0, [%dst + (offset) + 0x00]; \ 107 ret !! 118 st %t1, [%dst + (offset) + 0x04]; \ 108 !! 119 st %t2, [%dst + (offset) + 0x08]; \ 109 .p2align 4 !! 120 st %t3, [%dst + (offset) + 0x0c]; 110 /* Medium copies: 33..128 bytes. */ !! 121 111 L(copy32_128): !! 122 #define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ 112 ldp A_l, A_h, [src] !! 123 ldub [%src + (offset) + 0x00], %t0; \ 113 ldp B_l, B_h, [src, 16] !! 124 ldub [%src + (offset) + 0x01], %t1; \ 114 ldp C_l, C_h, [srcend, -32] !! 125 stb %t0, [%dst + (offset) + 0x00]; \ 115 ldp D_l, D_h, [srcend, -16] !! 126 stb %t1, [%dst + (offset) + 0x01]; 116 cmp count, 64 !! 127 117 b.hi L(copy128) !! 128 #define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ 118 stp A_l, A_h, [dstin] !! 129 ldd [%src + (offset) + 0x00], %t0; \ 119 stp B_l, B_h, [dstin, 16] !! 130 ldd [%src + (offset) + 0x08], %t2; \ 120 stp C_l, C_h, [dstend, -32] !! 131 srl %t0, shir, %t5; \ 121 stp D_l, D_h, [dstend, -16] !! 132 srl %t1, shir, %t6; \ 122 ret !! 133 sll %t0, shil, %t0; \ 123 !! 134 or %t5, %prev, %t5; \ 124 .p2align 4 !! 135 sll %t1, shil, %prev; \ 125 /* Copy 65..128 bytes. */ !! 136 or %t6, %t0, %t0; \ 126 L(copy128): !! 137 srl %t2, shir, %t1; \ 127 ldp E_l, E_h, [src, 32] !! 138 srl %t3, shir, %t6; \ 128 ldp F_l, F_h, [src, 48] !! 139 sll %t2, shil, %t2; \ 129 cmp count, 96 !! 140 or %t1, %prev, %t1; \ 130 b.ls L(copy96) !! 141 std %t4, [%dst + (offset) + (offset2) - 0x04]; \ 131 ldp G_l, G_h, [srcend, -64] !! 142 std %t0, [%dst + (offset) + (offset2) + 0x04]; \ 132 ldp H_l, H_h, [srcend, -48] !! 143 sll %t3, shil, %prev; \ 133 stp G_l, G_h, [dstend, -64] !! 144 or %t6, %t2, %t4; 134 stp H_l, H_h, [dstend, -48] !! 145 135 L(copy96): !! 146 #define SMOVE_ALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ 136 stp A_l, A_h, [dstin] !! 147 ldd [%src + (offset) + 0x00], %t0; \ 137 stp B_l, B_h, [dstin, 16] !! 148 ldd [%src + (offset) + 0x08], %t2; \ 138 stp E_l, E_h, [dstin, 32] !! 149 srl %t0, shir, %t4; \ 139 stp F_l, F_h, [dstin, 48] !! 150 srl %t1, shir, %t5; \ 140 stp C_l, C_h, [dstend, -32] !! 151 sll %t0, shil, %t6; \ 141 stp D_l, D_h, [dstend, -16] !! 152 or %t4, %prev, %t0; \ 142 ret !! 153 sll %t1, shil, %prev; \ 143 !! 154 or %t5, %t6, %t1; \ 144 .p2align 4 !! 155 srl %t2, shir, %t4; \ 145 /* Copy more than 128 bytes. */ !! 156 srl %t3, shir, %t5; \ 146 L(copy_long): !! 157 sll %t2, shil, %t6; \ 147 /* Use backwards copy if there is an o !! 158 or %t4, %prev, %t2; \ 148 sub tmp1, dstin, src !! 159 sll %t3, shil, %prev; \ 149 cbz tmp1, L(copy0) !! 160 or %t5, %t6, %t3; \ 150 cmp tmp1, count !! 161 std %t0, [%dst + (offset) + (offset2) + 0x00]; \ 151 b.lo L(copy_long_backwards) !! 162 std %t2, [%dst + (offset) + (offset2) + 0x08]; 152 !! 163 153 /* Copy 16 bytes and then align dst to !! 164 .text 154 !! 165 .align 4 155 ldp D_l, D_h, [src] !! 166 156 and tmp1, dstin, 15 !! 167 #ifdef FASTER_REVERSE 157 bic dst, dstin, 15 !! 168 158 sub src, src, tmp1 !! 169 70: /* rdword_align */ 159 add count, count, tmp1 /* Cou !! 170 160 ldp A_l, A_h, [src, 16] !! 171 andcc %o1, 1, %g0 161 stp D_l, D_h, [dstin] !! 172 be 4f 162 ldp B_l, B_h, [src, 32] !! 173 andcc %o1, 2, %g0 163 ldp C_l, C_h, [src, 48] !! 174 164 ldp D_l, D_h, [src, 64]! !! 175 ldub [%o1 - 1], %g2 165 subs count, count, 128 + 16 /* Tes !! 176 sub %o1, 1, %o1 166 b.ls L(copy64_from_end) !! 177 stb %g2, [%o0 - 1] 167 !! 178 sub %o2, 1, %o2 168 L(loop64): !! 179 be 3f 169 stp A_l, A_h, [dst, 16] !! 180 sub %o0, 1, %o0 170 ldp A_l, A_h, [src, 16] !! 181 4: 171 stp B_l, B_h, [dst, 32] !! 182 lduh [%o1 - 2], %g2 172 ldp B_l, B_h, [src, 32] !! 183 sub %o1, 2, %o1 173 stp C_l, C_h, [dst, 48] !! 184 sth %g2, [%o0 - 2] 174 ldp C_l, C_h, [src, 48] !! 185 sub %o2, 2, %o2 175 stp D_l, D_h, [dst, 64]! !! 186 b 3f 176 ldp D_l, D_h, [src, 64]! !! 187 sub %o0, 2, %o0 177 subs count, count, 64 !! 188 178 b.hi L(loop64) !! 189 #endif /* FASTER_REVERSE */ 179 !! 190 180 /* Write the last iteration and copy 6 !! 191 0: 181 L(copy64_from_end): !! 192 retl 182 ldp E_l, E_h, [srcend, -64] !! 193 nop ! Only bcopy returns here and it retuns void... 183 stp A_l, A_h, [dst, 16] !! 194 184 ldp A_l, A_h, [srcend, -48] !! 195 #ifdef __KERNEL__ 185 stp B_l, B_h, [dst, 32] !! 196 FUNC(amemmove) 186 ldp B_l, B_h, [srcend, -32] !! 197 FUNC(__memmove) 187 stp C_l, C_h, [dst, 48] !! 198 #endif 188 ldp C_l, C_h, [srcend, -16] !! 199 FUNC(memmove) 189 stp D_l, D_h, [dst, 64] !! 200 cmp %o0, %o1 190 stp E_l, E_h, [dstend, -64] !! 201 SETUP_RETL 191 stp A_l, A_h, [dstend, -48] !! 202 bleu 9f 192 stp B_l, B_h, [dstend, -32] !! 203 sub %o0, %o1, %o4 193 stp C_l, C_h, [dstend, -16] !! 204 194 ret !! 205 add %o1, %o2, %o3 195 !! 206 cmp %o3, %o0 196 .p2align 4 !! 207 bleu 0f 197 !! 208 andcc %o4, 3, %o5 198 /* Large backwards copy for overlappin !! 209 199 Copy 16 bytes and then align dst to !! 210 #ifndef FASTER_REVERSE 200 L(copy_long_backwards): !! 211 201 ldp D_l, D_h, [srcend, -16] !! 212 add %o1, %o2, %o1 202 and tmp1, dstend, 15 !! 213 add %o0, %o2, %o0 203 sub srcend, srcend, tmp1 !! 214 sub %o1, 1, %o1 204 sub count, count, tmp1 !! 215 sub %o0, 1, %o0 205 ldp A_l, A_h, [srcend, -16] !! 216 206 stp D_l, D_h, [dstend, -16] !! 217 1: /* reverse_bytes */ 207 ldp B_l, B_h, [srcend, -32] !! 218 208 ldp C_l, C_h, [srcend, -48] !! 219 ldub [%o1], %o4 209 ldp D_l, D_h, [srcend, -64]! !! 220 subcc %o2, 1, %o2 210 sub dstend, dstend, tmp1 !! 221 stb %o4, [%o0] 211 subs count, count, 128 !! 222 sub %o1, 1, %o1 212 b.ls L(copy64_from_start) !! 223 bne 1b 213 !! 224 sub %o0, 1, %o0 214 L(loop64_backwards): !! 225 215 stp A_l, A_h, [dstend, -16] !! 226 retl 216 ldp A_l, A_h, [srcend, -16] !! 227 RETL_INSN 217 stp B_l, B_h, [dstend, -32] !! 228 218 ldp B_l, B_h, [srcend, -32] !! 229 #else /* FASTER_REVERSE */ 219 stp C_l, C_h, [dstend, -48] !! 230 220 ldp C_l, C_h, [srcend, -48] !! 231 add %o1, %o2, %o1 221 stp D_l, D_h, [dstend, -64]! !! 232 add %o0, %o2, %o0 222 ldp D_l, D_h, [srcend, -64]! !! 233 bne 77f 223 subs count, count, 64 !! 234 cmp %o2, 15 224 b.hi L(loop64_backwards) !! 235 bleu 91f 225 !! 236 andcc %o1, 3, %g0 226 /* Write the last iteration and copy 6 !! 237 bne 70b 227 L(copy64_from_start): !! 238 3: 228 ldp G_l, G_h, [src, 48] !! 239 andcc %o1, 4, %g0 229 stp A_l, A_h, [dstend, -16] !! 240 230 ldp A_l, A_h, [src, 32] !! 241 be 2f 231 stp B_l, B_h, [dstend, -32] !! 242 mov %o2, %g1 232 ldp B_l, B_h, [src, 16] !! 243 233 stp C_l, C_h, [dstend, -48] !! 244 ld [%o1 - 4], %o4 234 ldp C_l, C_h, [src] !! 245 sub %g1, 4, %g1 235 stp D_l, D_h, [dstend, -64] !! 246 st %o4, [%o0 - 4] 236 stp G_l, G_h, [dstin, 48] !! 247 sub %o1, 4, %o1 237 stp A_l, A_h, [dstin, 32] !! 248 sub %o0, 4, %o0 238 stp B_l, B_h, [dstin, 16] !! 249 2: 239 stp C_l, C_h, [dstin] !! 250 andcc %g1, 0xffffff80, %g7 240 ret !! 251 be 3f 241 SYM_FUNC_END(__pi_memcpy) !! 252 andcc %o0, 4, %g0 242 !! 253 243 SYM_FUNC_ALIAS(__memcpy, __pi_memcpy) !! 254 be 74f + 4 244 EXPORT_SYMBOL(__memcpy) !! 255 5: 245 SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) !! 256 RMOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 246 EXPORT_SYMBOL(memcpy) !! 257 RMOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 247 !! 258 RMOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 248 SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy) !! 259 RMOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 249 !! 260 subcc %g7, 128, %g7 250 SYM_FUNC_ALIAS(__memmove, __pi_memmove) !! 261 sub %o1, 128, %o1 251 EXPORT_SYMBOL(__memmove) !! 262 bne 5b 252 SYM_FUNC_ALIAS_WEAK(memmove, __memmove) !! 263 sub %o0, 128, %o0 253 EXPORT_SYMBOL(memmove) !! 264 3: >> 265 andcc %g1, 0x70, %g7 >> 266 be 72f >> 267 andcc %g1, 8, %g0 >> 268 >> 269 sethi %hi(72f), %o5 >> 270 srl %g7, 1, %o4 >> 271 add %g7, %o4, %o4 >> 272 sub %o1, %g7, %o1 >> 273 sub %o5, %o4, %o5 >> 274 jmpl %o5 + %lo(72f), %g0 >> 275 sub %o0, %g7, %o0 >> 276 >> 277 71: /* rmemcpy_table */ >> 278 RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 279 RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 280 RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 281 RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 282 RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 283 RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 284 RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 285 >> 286 72: /* rmemcpy_table_end */ >> 287 >> 288 be 73f >> 289 andcc %g1, 4, %g0 >> 290 >> 291 ldd [%o1 - 0x08], %g2 >> 292 sub %o0, 8, %o0 >> 293 sub %o1, 8, %o1 >> 294 st %g2, [%o0] >> 295 st %g3, [%o0 + 0x04] >> 296 >> 297 73: /* rmemcpy_last7 */ >> 298 >> 299 be 1f >> 300 andcc %g1, 2, %g0 >> 301 >> 302 ld [%o1 - 4], %g2 >> 303 sub %o1, 4, %o1 >> 304 st %g2, [%o0 - 4] >> 305 sub %o0, 4, %o0 >> 306 1: >> 307 be 1f >> 308 andcc %g1, 1, %g0 >> 309 >> 310 lduh [%o1 - 2], %g2 >> 311 sub %o1, 2, %o1 >> 312 sth %g2, [%o0 - 2] >> 313 sub %o0, 2, %o0 >> 314 1: >> 315 be 1f >> 316 nop >> 317 >> 318 ldub [%o1 - 1], %g2 >> 319 stb %g2, [%o0 - 1] >> 320 1: >> 321 retl >> 322 RETL_INSN >> 323 >> 324 74: /* rldd_std */ >> 325 RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 326 RMOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 327 RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 328 RMOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 329 subcc %g7, 128, %g7 >> 330 sub %o1, 128, %o1 >> 331 bne 74b >> 332 sub %o0, 128, %o0 >> 333 >> 334 andcc %g1, 0x70, %g7 >> 335 be 72b >> 336 andcc %g1, 8, %g0 >> 337 >> 338 sethi %hi(72b), %o5 >> 339 srl %g7, 1, %o4 >> 340 add %g7, %o4, %o4 >> 341 sub %o1, %g7, %o1 >> 342 sub %o5, %o4, %o5 >> 343 jmpl %o5 + %lo(72b), %g0 >> 344 sub %o0, %g7, %o0 >> 345 >> 346 75: /* rshort_end */ >> 347 >> 348 and %o2, 0xe, %o3 >> 349 2: >> 350 sethi %hi(76f), %o5 >> 351 sll %o3, 3, %o4 >> 352 sub %o0, %o3, %o0 >> 353 sub %o5, %o4, %o5 >> 354 sub %o1, %o3, %o1 >> 355 jmpl %o5 + %lo(76f), %g0 >> 356 andcc %o2, 1, %g0 >> 357 >> 358 RMOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) >> 359 RMOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) >> 360 RMOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) >> 361 RMOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) >> 362 RMOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) >> 363 RMOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) >> 364 RMOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) >> 365 >> 366 76: /* rshort_table_end */ >> 367 >> 368 be 1f >> 369 nop >> 370 ldub [%o1 - 1], %g2 >> 371 stb %g2, [%o0 - 1] >> 372 1: >> 373 retl >> 374 RETL_INSN >> 375 >> 376 91: /* rshort_aligned_end */ >> 377 >> 378 bne 75b >> 379 andcc %o2, 8, %g0 >> 380 >> 381 be 1f >> 382 andcc %o2, 4, %g0 >> 383 >> 384 ld [%o1 - 0x08], %g2 >> 385 ld [%o1 - 0x04], %g3 >> 386 sub %o1, 8, %o1 >> 387 st %g2, [%o0 - 0x08] >> 388 st %g3, [%o0 - 0x04] >> 389 sub %o0, 8, %o0 >> 390 1: >> 391 b 73b >> 392 mov %o2, %g1 >> 393 >> 394 77: /* rnon_aligned */ >> 395 cmp %o2, 15 >> 396 bleu 75b >> 397 andcc %o0, 3, %g0 >> 398 be 64f >> 399 andcc %o0, 1, %g0 >> 400 be 63f >> 401 andcc %o0, 2, %g0 >> 402 ldub [%o1 - 1], %g5 >> 403 sub %o1, 1, %o1 >> 404 stb %g5, [%o0 - 1] >> 405 sub %o0, 1, %o0 >> 406 be 64f >> 407 sub %o2, 1, %o2 >> 408 63: >> 409 ldub [%o1 - 1], %g5 >> 410 sub %o1, 2, %o1 >> 411 stb %g5, [%o0 - 1] >> 412 sub %o0, 2, %o0 >> 413 ldub [%o1], %g5 >> 414 sub %o2, 2, %o2 >> 415 stb %g5, [%o0] >> 416 64: >> 417 and %o1, 3, %g2 >> 418 and %o1, -4, %o1 >> 419 and %o2, 0xc, %g3 >> 420 add %o1, 4, %o1 >> 421 cmp %g3, 4 >> 422 sll %g2, 3, %g4 >> 423 mov 32, %g2 >> 424 be 4f >> 425 sub %g2, %g4, %g7 >> 426 >> 427 blu 3f >> 428 cmp %g3, 8 >> 429 >> 430 be 2f >> 431 srl %o2, 2, %g3 >> 432 >> 433 ld [%o1 - 4], %o3 >> 434 add %o0, -8, %o0 >> 435 ld [%o1 - 8], %o4 >> 436 add %o1, -16, %o1 >> 437 b 7f >> 438 add %g3, 1, %g3 >> 439 2: >> 440 ld [%o1 - 4], %o4 >> 441 add %o0, -4, %o0 >> 442 ld [%o1 - 8], %g1 >> 443 add %o1, -12, %o1 >> 444 b 8f >> 445 add %g3, 2, %g3 >> 446 3: >> 447 ld [%o1 - 4], %o5 >> 448 add %o0, -12, %o0 >> 449 ld [%o1 - 8], %o3 >> 450 add %o1, -20, %o1 >> 451 b 6f >> 452 srl %o2, 2, %g3 >> 453 4: >> 454 ld [%o1 - 4], %g1 >> 455 srl %o2, 2, %g3 >> 456 ld [%o1 - 8], %o5 >> 457 add %o1, -24, %o1 >> 458 add %o0, -16, %o0 >> 459 add %g3, -1, %g3 >> 460 >> 461 ld [%o1 + 12], %o3 >> 462 5: >> 463 sll %o5, %g4, %g2 >> 464 srl %g1, %g7, %g5 >> 465 or %g2, %g5, %g2 >> 466 st %g2, [%o0 + 12] >> 467 6: >> 468 ld [%o1 + 8], %o4 >> 469 sll %o3, %g4, %g2 >> 470 srl %o5, %g7, %g5 >> 471 or %g2, %g5, %g2 >> 472 st %g2, [%o0 + 8] >> 473 7: >> 474 ld [%o1 + 4], %g1 >> 475 sll %o4, %g4, %g2 >> 476 srl %o3, %g7, %g5 >> 477 or %g2, %g5, %g2 >> 478 st %g2, [%o0 + 4] >> 479 8: >> 480 ld [%o1], %o5 >> 481 sll %g1, %g4, %g2 >> 482 srl %o4, %g7, %g5 >> 483 addcc %g3, -4, %g3 >> 484 or %g2, %g5, %g2 >> 485 add %o1, -16, %o1 >> 486 st %g2, [%o0] >> 487 add %o0, -16, %o0 >> 488 bne,a 5b >> 489 ld [%o1 + 12], %o3 >> 490 sll %o5, %g4, %g2 >> 491 srl %g1, %g7, %g5 >> 492 srl %g4, 3, %g3 >> 493 or %g2, %g5, %g2 >> 494 add %o1, %g3, %o1 >> 495 andcc %o2, 2, %g0 >> 496 st %g2, [%o0 + 12] >> 497 be 1f >> 498 andcc %o2, 1, %g0 >> 499 >> 500 ldub [%o1 + 15], %g5 >> 501 add %o1, -2, %o1 >> 502 stb %g5, [%o0 + 11] >> 503 add %o0, -2, %o0 >> 504 ldub [%o1 + 16], %g5 >> 505 stb %g5, [%o0 + 12] >> 506 1: >> 507 be 1f >> 508 nop >> 509 ldub [%o1 + 15], %g5 >> 510 stb %g5, [%o0 + 11] >> 511 1: >> 512 retl >> 513 RETL_INSN >> 514 >> 515 #endif /* FASTER_REVERSE */ >> 516 >> 517 /* NOTE: This code is executed just for the cases, >> 518 where %src (=%o1) & 3 is != 0. >> 519 We need to align it to 4. So, for (%src & 3) >> 520 1 we need to do ldub,lduh >> 521 2 lduh >> 522 3 just ldub >> 523 so even if it looks weird, the branches >> 524 are correct here. -jj >> 525 */ >> 526 78: /* dword_align */ >> 527 >> 528 andcc %o1, 1, %g0 >> 529 be 4f >> 530 andcc %o1, 2, %g0 >> 531 >> 532 ldub [%o1], %g2 >> 533 add %o1, 1, %o1 >> 534 stb %g2, [%o0] >> 535 sub %o2, 1, %o2 >> 536 bne 3f >> 537 add %o0, 1, %o0 >> 538 4: >> 539 lduh [%o1], %g2 >> 540 add %o1, 2, %o1 >> 541 sth %g2, [%o0] >> 542 sub %o2, 2, %o2 >> 543 b 3f >> 544 add %o0, 2, %o0 >> 545 >> 546 #ifdef __KERNEL__ >> 547 FUNC(__memcpy) >> 548 #endif >> 549 FUNC(memcpy) /* %o0=dst %o1=src %o2=len */ >> 550 >> 551 sub %o0, %o1, %o4 >> 552 SETUP_RETL >> 553 9: >> 554 andcc %o4, 3, %o5 >> 555 0: >> 556 bne 86f >> 557 cmp %o2, 15 >> 558 >> 559 bleu 90f >> 560 andcc %o1, 3, %g0 >> 561 >> 562 bne 78b >> 563 3: >> 564 andcc %o1, 4, %g0 >> 565 >> 566 be 2f >> 567 mov %o2, %g1 >> 568 >> 569 ld [%o1], %o4 >> 570 sub %g1, 4, %g1 >> 571 st %o4, [%o0] >> 572 add %o1, 4, %o1 >> 573 add %o0, 4, %o0 >> 574 2: >> 575 andcc %g1, 0xffffff80, %g7 >> 576 be 3f >> 577 andcc %o0, 4, %g0 >> 578 >> 579 be 82f + 4 >> 580 5: >> 581 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 582 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 583 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 584 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 585 subcc %g7, 128, %g7 >> 586 add %o1, 128, %o1 >> 587 bne 5b >> 588 add %o0, 128, %o0 >> 589 3: >> 590 andcc %g1, 0x70, %g7 >> 591 be 80f >> 592 andcc %g1, 8, %g0 >> 593 >> 594 sethi %hi(80f), %o5 >> 595 srl %g7, 1, %o4 >> 596 add %g7, %o4, %o4 >> 597 add %o1, %g7, %o1 >> 598 sub %o5, %o4, %o5 >> 599 jmpl %o5 + %lo(80f), %g0 >> 600 add %o0, %g7, %o0 >> 601 >> 602 79: /* memcpy_table */ >> 603 >> 604 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 605 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 606 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 607 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 608 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 609 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 610 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 611 >> 612 80: /* memcpy_table_end */ >> 613 be 81f >> 614 andcc %g1, 4, %g0 >> 615 >> 616 ldd [%o1], %g2 >> 617 add %o0, 8, %o0 >> 618 st %g2, [%o0 - 0x08] >> 619 add %o1, 8, %o1 >> 620 st %g3, [%o0 - 0x04] >> 621 >> 622 81: /* memcpy_last7 */ >> 623 >> 624 be 1f >> 625 andcc %g1, 2, %g0 >> 626 >> 627 ld [%o1], %g2 >> 628 add %o1, 4, %o1 >> 629 st %g2, [%o0] >> 630 add %o0, 4, %o0 >> 631 1: >> 632 be 1f >> 633 andcc %g1, 1, %g0 >> 634 >> 635 lduh [%o1], %g2 >> 636 add %o1, 2, %o1 >> 637 sth %g2, [%o0] >> 638 add %o0, 2, %o0 >> 639 1: >> 640 be 1f >> 641 nop >> 642 >> 643 ldub [%o1], %g2 >> 644 stb %g2, [%o0] >> 645 1: >> 646 retl >> 647 RETL_INSN >> 648 >> 649 82: /* ldd_std */ >> 650 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 651 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 652 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 653 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 654 subcc %g7, 128, %g7 >> 655 add %o1, 128, %o1 >> 656 bne 82b >> 657 add %o0, 128, %o0 >> 658 >> 659 #ifndef FASTER_ALIGNED >> 660 >> 661 andcc %g1, 0x70, %g7 >> 662 be 80b >> 663 andcc %g1, 8, %g0 >> 664 >> 665 sethi %hi(80b), %o5 >> 666 srl %g7, 1, %o4 >> 667 add %g7, %o4, %o4 >> 668 add %o1, %g7, %o1 >> 669 sub %o5, %o4, %o5 >> 670 jmpl %o5 + %lo(80b), %g0 >> 671 add %o0, %g7, %o0 >> 672 >> 673 #else /* FASTER_ALIGNED */ >> 674 >> 675 andcc %g1, 0x70, %g7 >> 676 be 84f >> 677 andcc %g1, 8, %g0 >> 678 >> 679 sethi %hi(84f), %o5 >> 680 add %o1, %g7, %o1 >> 681 sub %o5, %g7, %o5 >> 682 jmpl %o5 + %lo(84f), %g0 >> 683 add %o0, %g7, %o0 >> 684 >> 685 83: /* amemcpy_table */ >> 686 >> 687 MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 688 MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 689 MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 690 MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 691 MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 692 MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 693 MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 694 >> 695 84: /* amemcpy_table_end */ >> 696 be 85f >> 697 andcc %g1, 4, %g0 >> 698 >> 699 ldd [%o1], %g2 >> 700 add %o0, 8, %o0 >> 701 std %g2, [%o0 - 0x08] >> 702 add %o1, 8, %o1 >> 703 85: /* amemcpy_last7 */ >> 704 be 1f >> 705 andcc %g1, 2, %g0 >> 706 >> 707 ld [%o1], %g2 >> 708 add %o1, 4, %o1 >> 709 st %g2, [%o0] >> 710 add %o0, 4, %o0 >> 711 1: >> 712 be 1f >> 713 andcc %g1, 1, %g0 >> 714 >> 715 lduh [%o1], %g2 >> 716 add %o1, 2, %o1 >> 717 sth %g2, [%o0] >> 718 add %o0, 2, %o0 >> 719 1: >> 720 be 1f >> 721 nop >> 722 >> 723 ldub [%o1], %g2 >> 724 stb %g2, [%o0] >> 725 1: >> 726 retl >> 727 RETL_INSN >> 728 >> 729 #endif /* FASTER_ALIGNED */ >> 730 >> 731 86: /* non_aligned */ >> 732 cmp %o2, 6 >> 733 bleu 88f >> 734 >> 735 #ifdef FASTER_NONALIGNED >> 736 >> 737 cmp %o2, 256 >> 738 bcc 87f >> 739 >> 740 #endif /* FASTER_NONALIGNED */ >> 741 >> 742 andcc %o0, 3, %g0 >> 743 be 61f >> 744 andcc %o0, 1, %g0 >> 745 be 60f >> 746 andcc %o0, 2, %g0 >> 747 >> 748 ldub [%o1], %g5 >> 749 add %o1, 1, %o1 >> 750 stb %g5, [%o0] >> 751 sub %o2, 1, %o2 >> 752 bne 61f >> 753 add %o0, 1, %o0 >> 754 60: >> 755 ldub [%o1], %g3 >> 756 add %o1, 2, %o1 >> 757 stb %g3, [%o0] >> 758 sub %o2, 2, %o2 >> 759 ldub [%o1 - 1], %g3 >> 760 add %o0, 2, %o0 >> 761 stb %g3, [%o0 - 1] >> 762 61: >> 763 and %o1, 3, %g2 >> 764 and %o2, 0xc, %g3 >> 765 and %o1, -4, %o1 >> 766 cmp %g3, 4 >> 767 sll %g2, 3, %g4 >> 768 mov 32, %g2 >> 769 be 4f >> 770 sub %g2, %g4, %g7 >> 771 >> 772 blu 3f >> 773 cmp %g3, 0x8 >> 774 >> 775 be 2f >> 776 srl %o2, 2, %g3 >> 777 >> 778 ld [%o1], %o3 >> 779 add %o0, -8, %o0 >> 780 ld [%o1 + 4], %o4 >> 781 b 8f >> 782 add %g3, 1, %g3 >> 783 2: >> 784 ld [%o1], %o4 >> 785 add %o0, -12, %o0 >> 786 ld [%o1 + 4], %o5 >> 787 add %g3, 2, %g3 >> 788 b 9f >> 789 add %o1, -4, %o1 >> 790 3: >> 791 ld [%o1], %g1 >> 792 add %o0, -4, %o0 >> 793 ld [%o1 + 4], %o3 >> 794 srl %o2, 2, %g3 >> 795 b 7f >> 796 add %o1, 4, %o1 >> 797 4: >> 798 ld [%o1], %o5 >> 799 cmp %o2, 7 >> 800 ld [%o1 + 4], %g1 >> 801 srl %o2, 2, %g3 >> 802 bleu 10f >> 803 add %o1, 8, %o1 >> 804 >> 805 ld [%o1], %o3 >> 806 add %g3, -1, %g3 >> 807 5: >> 808 sll %o5, %g4, %g2 >> 809 srl %g1, %g7, %g5 >> 810 or %g2, %g5, %g2 >> 811 st %g2, [%o0] >> 812 7: >> 813 ld [%o1 + 4], %o4 >> 814 sll %g1, %g4, %g2 >> 815 srl %o3, %g7, %g5 >> 816 or %g2, %g5, %g2 >> 817 st %g2, [%o0 + 4] >> 818 8: >> 819 ld [%o1 + 8], %o5 >> 820 sll %o3, %g4, %g2 >> 821 srl %o4, %g7, %g5 >> 822 or %g2, %g5, %g2 >> 823 st %g2, [%o0 + 8] >> 824 9: >> 825 ld [%o1 + 12], %g1 >> 826 sll %o4, %g4, %g2 >> 827 srl %o5, %g7, %g5 >> 828 addcc %g3, -4, %g3 >> 829 or %g2, %g5, %g2 >> 830 add %o1, 16, %o1 >> 831 st %g2, [%o0 + 12] >> 832 add %o0, 16, %o0 >> 833 bne,a 5b >> 834 ld [%o1], %o3 >> 835 10: >> 836 sll %o5, %g4, %g2 >> 837 srl %g1, %g7, %g5 >> 838 srl %g7, 3, %g3 >> 839 or %g2, %g5, %g2 >> 840 sub %o1, %g3, %o1 >> 841 andcc %o2, 2, %g0 >> 842 st %g2, [%o0] >> 843 be 1f >> 844 andcc %o2, 1, %g0 >> 845 >> 846 ldub [%o1], %g2 >> 847 add %o1, 2, %o1 >> 848 stb %g2, [%o0 + 4] >> 849 add %o0, 2, %o0 >> 850 ldub [%o1 - 1], %g2 >> 851 stb %g2, [%o0 + 3] >> 852 1: >> 853 be 1f >> 854 nop >> 855 ldub [%o1], %g2 >> 856 stb %g2, [%o0 + 4] >> 857 1: >> 858 retl >> 859 RETL_INSN >> 860 >> 861 #ifdef FASTER_NONALIGNED >> 862 >> 863 87: /* faster_nonaligned */ >> 864 >> 865 andcc %o1, 3, %g0 >> 866 be 3f >> 867 andcc %o1, 1, %g0 >> 868 >> 869 be 4f >> 870 andcc %o1, 2, %g0 >> 871 >> 872 ldub [%o1], %g2 >> 873 add %o1, 1, %o1 >> 874 stb %g2, [%o0] >> 875 sub %o2, 1, %o2 >> 876 bne 3f >> 877 add %o0, 1, %o0 >> 878 4: >> 879 lduh [%o1], %g2 >> 880 add %o1, 2, %o1 >> 881 srl %g2, 8, %g3 >> 882 sub %o2, 2, %o2 >> 883 stb %g3, [%o0] >> 884 add %o0, 2, %o0 >> 885 stb %g2, [%o0 - 1] >> 886 3: >> 887 andcc %o1, 4, %g0 >> 888 >> 889 bne 2f >> 890 cmp %o5, 1 >> 891 >> 892 ld [%o1], %o4 >> 893 srl %o4, 24, %g2 >> 894 stb %g2, [%o0] >> 895 srl %o4, 16, %g3 >> 896 stb %g3, [%o0 + 1] >> 897 srl %o4, 8, %g2 >> 898 stb %g2, [%o0 + 2] >> 899 sub %o2, 4, %o2 >> 900 stb %o4, [%o0 + 3] >> 901 add %o1, 4, %o1 >> 902 add %o0, 4, %o0 >> 903 2: >> 904 be 33f >> 905 cmp %o5, 2 >> 906 be 32f >> 907 sub %o2, 4, %o2 >> 908 31: >> 909 ld [%o1], %g2 >> 910 add %o1, 4, %o1 >> 911 srl %g2, 24, %g3 >> 912 and %o0, 7, %g5 >> 913 stb %g3, [%o0] >> 914 cmp %g5, 7 >> 915 sll %g2, 8, %g1 >> 916 add %o0, 4, %o0 >> 917 be 41f >> 918 and %o2, 0xffffffc0, %o3 >> 919 ld [%o0 - 7], %o4 >> 920 4: >> 921 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 922 SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 923 SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 924 SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 925 subcc %o3, 64, %o3 >> 926 add %o1, 64, %o1 >> 927 bne 4b >> 928 add %o0, 64, %o0 >> 929 >> 930 andcc %o2, 0x30, %o3 >> 931 be,a 1f >> 932 srl %g1, 16, %g2 >> 933 4: >> 934 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 935 subcc %o3, 16, %o3 >> 936 add %o1, 16, %o1 >> 937 bne 4b >> 938 add %o0, 16, %o0 >> 939 >> 940 srl %g1, 16, %g2 >> 941 1: >> 942 st %o4, [%o0 - 7] >> 943 sth %g2, [%o0 - 3] >> 944 srl %g1, 8, %g4 >> 945 b 88f >> 946 stb %g4, [%o0 - 1] >> 947 32: >> 948 ld [%o1], %g2 >> 949 add %o1, 4, %o1 >> 950 srl %g2, 16, %g3 >> 951 and %o0, 7, %g5 >> 952 sth %g3, [%o0] >> 953 cmp %g5, 6 >> 954 sll %g2, 16, %g1 >> 955 add %o0, 4, %o0 >> 956 be 42f >> 957 and %o2, 0xffffffc0, %o3 >> 958 ld [%o0 - 6], %o4 >> 959 4: >> 960 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 961 SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 962 SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 963 SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 964 subcc %o3, 64, %o3 >> 965 add %o1, 64, %o1 >> 966 bne 4b >> 967 add %o0, 64, %o0 >> 968 >> 969 andcc %o2, 0x30, %o3 >> 970 be,a 1f >> 971 srl %g1, 16, %g2 >> 972 4: >> 973 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 974 subcc %o3, 16, %o3 >> 975 add %o1, 16, %o1 >> 976 bne 4b >> 977 add %o0, 16, %o0 >> 978 >> 979 srl %g1, 16, %g2 >> 980 1: >> 981 st %o4, [%o0 - 6] >> 982 b 88f >> 983 sth %g2, [%o0 - 2] >> 984 33: >> 985 ld [%o1], %g2 >> 986 sub %o2, 4, %o2 >> 987 srl %g2, 24, %g3 >> 988 and %o0, 7, %g5 >> 989 stb %g3, [%o0] >> 990 cmp %g5, 5 >> 991 srl %g2, 8, %g4 >> 992 sll %g2, 24, %g1 >> 993 sth %g4, [%o0 + 1] >> 994 add %o1, 4, %o1 >> 995 be 43f >> 996 and %o2, 0xffffffc0, %o3 >> 997 >> 998 ld [%o0 - 1], %o4 >> 999 add %o0, 4, %o0 >> 1000 4: >> 1001 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) >> 1002 SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) >> 1003 SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) >> 1004 SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) >> 1005 subcc %o3, 64, %o3 >> 1006 add %o1, 64, %o1 >> 1007 bne 4b >> 1008 add %o0, 64, %o0 >> 1009 >> 1010 andcc %o2, 0x30, %o3 >> 1011 be,a 1f >> 1012 srl %g1, 24, %g2 >> 1013 4: >> 1014 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) >> 1015 subcc %o3, 16, %o3 >> 1016 add %o1, 16, %o1 >> 1017 bne 4b >> 1018 add %o0, 16, %o0 >> 1019 >> 1020 srl %g1, 24, %g2 >> 1021 1: >> 1022 st %o4, [%o0 - 5] >> 1023 b 88f >> 1024 stb %g2, [%o0 - 1] >> 1025 41: >> 1026 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 1027 SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 1028 SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 1029 SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 1030 subcc %o3, 64, %o3 >> 1031 add %o1, 64, %o1 >> 1032 bne 41b >> 1033 add %o0, 64, %o0 >> 1034 >> 1035 andcc %o2, 0x30, %o3 >> 1036 be,a 1f >> 1037 srl %g1, 16, %g2 >> 1038 4: >> 1039 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 1040 subcc %o3, 16, %o3 >> 1041 add %o1, 16, %o1 >> 1042 bne 4b >> 1043 add %o0, 16, %o0 >> 1044 >> 1045 srl %g1, 16, %g2 >> 1046 1: >> 1047 sth %g2, [%o0 - 3] >> 1048 srl %g1, 8, %g4 >> 1049 b 88f >> 1050 stb %g4, [%o0 - 1] >> 1051 43: >> 1052 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) >> 1053 SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) >> 1054 SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) >> 1055 SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) >> 1056 subcc %o3, 64, %o3 >> 1057 add %o1, 64, %o1 >> 1058 bne 43b >> 1059 add %o0, 64, %o0 >> 1060 >> 1061 andcc %o2, 0x30, %o3 >> 1062 be,a 1f >> 1063 srl %g1, 24, %g2 >> 1064 4: >> 1065 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) >> 1066 subcc %o3, 16, %o3 >> 1067 add %o1, 16, %o1 >> 1068 bne 4b >> 1069 add %o0, 16, %o0 >> 1070 >> 1071 srl %g1, 24, %g2 >> 1072 1: >> 1073 stb %g2, [%o0 + 3] >> 1074 b 88f >> 1075 add %o0, 4, %o0 >> 1076 42: >> 1077 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 1078 SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 1079 SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 1080 SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 1081 subcc %o3, 64, %o3 >> 1082 add %o1, 64, %o1 >> 1083 bne 42b >> 1084 add %o0, 64, %o0 >> 1085 >> 1086 andcc %o2, 0x30, %o3 >> 1087 be,a 1f >> 1088 srl %g1, 16, %g2 >> 1089 4: >> 1090 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 1091 subcc %o3, 16, %o3 >> 1092 add %o1, 16, %o1 >> 1093 bne 4b >> 1094 add %o0, 16, %o0 >> 1095 >> 1096 srl %g1, 16, %g2 >> 1097 1: >> 1098 sth %g2, [%o0 - 2] >> 1099 >> 1100 /* Fall through */ >> 1101 >> 1102 #endif /* FASTER_NONALIGNED */ >> 1103 >> 1104 88: /* short_end */ >> 1105 >> 1106 and %o2, 0xe, %o3 >> 1107 20: >> 1108 sethi %hi(89f), %o5 >> 1109 sll %o3, 3, %o4 >> 1110 add %o0, %o3, %o0 >> 1111 sub %o5, %o4, %o5 >> 1112 add %o1, %o3, %o1 >> 1113 jmpl %o5 + %lo(89f), %g0 >> 1114 andcc %o2, 1, %g0 >> 1115 >> 1116 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) >> 1117 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) >> 1118 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) >> 1119 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) >> 1120 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) >> 1121 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) >> 1122 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) >> 1123 >> 1124 89: /* short_table_end */ >> 1125 >> 1126 be 1f >> 1127 nop >> 1128 >> 1129 ldub [%o1], %g2 >> 1130 stb %g2, [%o0] >> 1131 1: >> 1132 retl >> 1133 RETL_INSN >> 1134 >> 1135 90: /* short_aligned_end */ >> 1136 bne 88b >> 1137 andcc %o2, 8, %g0 >> 1138 >> 1139 be 1f >> 1140 andcc %o2, 4, %g0 >> 1141 >> 1142 ld [%o1 + 0x00], %g2 >> 1143 ld [%o1 + 0x04], %g3 >> 1144 add %o1, 8, %o1 >> 1145 st %g2, [%o0 + 0x00] >> 1146 st %g3, [%o0 + 0x04] >> 1147 add %o0, 8, %o0 >> 1148 1: >> 1149 b 81b >> 1150 mov %o2, %g1
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.