1 /* SPDX-License-Identifier: GPL-2.0 */ !! 1 /* memcpy.S: Sparc optimized memcpy, bcopy and memmove code 2 /* memcpy.S: Sparc optimized memcpy and memmov !! 2 * Hand optimized from GNU libc's memcpy, bcopy and memmove 3 * Hand optimized from GNU libc's memcpy and m << 4 * Copyright (C) 1991,1996 Free Software Found 3 * Copyright (C) 1991,1996 Free Software Foundation 5 * Copyright (C) 1995 Linus Torvalds (Linus.To 4 * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi) 6 * Copyright (C) 1996 David S. Miller (davem@c 5 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 7 * Copyright (C) 1996 Eddie C. Dost (ecd@skyne 6 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) 8 * Copyright (C) 1996 Jakub Jelinek (jj@sunsit 7 * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 9 */ 8 */ 10 9 11 #include <linux/export.h> !! 10 #ifdef __KERNEL__ 12 11 13 #define FUNC(x) \ !! 12 #include <asm/cprefix.h> 14 .globl x; \ !! 13 15 .type x,@function; \ !! 14 #define FUNC(x) \ 16 .align 4; \ !! 15 .globl C_LABEL(x); \ 17 x: !! 16 .type C_LABEL(x),@function; \ >> 17 .align 4; \ >> 18 C_LABEL(x): >> 19 >> 20 #undef FASTER_REVERSE >> 21 #undef FASTER_NONALIGNED >> 22 #define FASTER_ALIGNED >> 23 >> 24 /* In kernel these functions don't return a value. >> 25 * One should use macros in asm/string.h for that purpose. >> 26 * We return 0, so that bugs are more apparent. >> 27 */ >> 28 #define SETUP_RETL >> 29 #define RETL_INSN clr %o0 >> 30 >> 31 #else >> 32 >> 33 /* libc */ >> 34 >> 35 #include "DEFS.h" >> 36 >> 37 #define FASTER_REVERSE >> 38 #define FASTER_NONALIGNED >> 39 #define FASTER_ALIGNED >> 40 >> 41 #define SETUP_RETL mov %o0, %g6 >> 42 #define RETL_INSN mov %g6, %o0 >> 43 >> 44 #endif 18 45 19 /* Both these macros have to start with exactl 46 /* Both these macros have to start with exactly the same insn */ 20 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1 !! 47 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 21 ldd [%src + (offset) + 0x00], %t0; !! 48 ldd [%src + offset + 0x00], %t0; \ 22 ldd [%src + (offset) + 0x08], %t2; !! 49 ldd [%src + offset + 0x08], %t2; \ 23 ldd [%src + (offset) + 0x10], %t4; !! 50 ldd [%src + offset + 0x10], %t4; \ 24 ldd [%src + (offset) + 0x18], %t6; !! 51 ldd [%src + offset + 0x18], %t6; \ 25 st %t0, [%dst + (offset) + 0x00]; !! 52 st %t0, [%dst + offset + 0x00]; \ 26 st %t1, [%dst + (offset) + 0x04]; !! 53 st %t1, [%dst + offset + 0x04]; \ 27 st %t2, [%dst + (offset) + 0x08]; !! 54 st %t2, [%dst + offset + 0x08]; \ 28 st %t3, [%dst + (offset) + 0x0c]; !! 55 st %t3, [%dst + offset + 0x0c]; \ 29 st %t4, [%dst + (offset) + 0x10]; !! 56 st %t4, [%dst + offset + 0x10]; \ 30 st %t5, [%dst + (offset) + 0x14]; !! 57 st %t5, [%dst + offset + 0x14]; \ 31 st %t6, [%dst + (offset) + 0x18]; !! 58 st %t6, [%dst + offset + 0x18]; \ 32 st %t7, [%dst + (offset) + 0x1c]; !! 59 st %t7, [%dst + offset + 0x1c]; 33 !! 60 34 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t !! 61 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 35 ldd [%src + (offset) + 0x00], %t0; !! 62 ldd [%src + offset + 0x00], %t0; \ 36 ldd [%src + (offset) + 0x08], %t2; !! 63 ldd [%src + offset + 0x08], %t2; \ 37 ldd [%src + (offset) + 0x10], %t4; !! 64 ldd [%src + offset + 0x10], %t4; \ 38 ldd [%src + (offset) + 0x18], %t6; !! 65 ldd [%src + offset + 0x18], %t6; \ 39 std %t0, [%dst + (offset) + 0x00]; !! 66 std %t0, [%dst + offset + 0x00]; \ 40 std %t2, [%dst + (offset) + 0x08]; !! 67 std %t2, [%dst + offset + 0x08]; \ 41 std %t4, [%dst + (offset) + 0x10]; !! 68 std %t4, [%dst + offset + 0x10]; \ 42 std %t6, [%dst + (offset) + 0x18]; !! 69 std %t6, [%dst + offset + 0x18]; 43 !! 70 44 #define MOVE_LASTCHUNK(src, dst, offset, t0, t !! 71 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ 45 ldd [%src - (offset) - 0x10], %t0; !! 72 ldd [%src - offset - 0x10], %t0; \ 46 ldd [%src - (offset) - 0x08], %t2; !! 73 ldd [%src - offset - 0x08], %t2; \ 47 st %t0, [%dst - (offset) - 0x10]; !! 74 st %t0, [%dst - offset - 0x10]; \ 48 st %t1, [%dst - (offset) - 0x0c]; !! 75 st %t1, [%dst - offset - 0x0c]; \ 49 st %t2, [%dst - (offset) - 0x08]; !! 76 st %t2, [%dst - offset - 0x08]; \ 50 st %t3, [%dst - (offset) - 0x04]; !! 77 st %t3, [%dst - offset - 0x04]; 51 !! 78 52 #define MOVE_LASTALIGNCHUNK(src, dst, offset, !! 79 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ 53 ldd [%src - (offset) - 0x10], %t0; !! 80 ldd [%src - offset - 0x10], %t0; \ 54 ldd [%src - (offset) - 0x08], %t2; !! 81 ldd [%src - offset - 0x08], %t2; \ 55 std %t0, [%dst - (offset) - 0x10]; !! 82 std %t0, [%dst - offset - 0x10]; \ 56 std %t2, [%dst - (offset) - 0x08]; !! 83 std %t2, [%dst - offset - 0x08]; 57 !! 84 58 #define MOVE_SHORTCHUNK(src, dst, offset, t0, !! 85 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ 59 ldub [%src - (offset) - 0x02], %t0; !! 86 ldub [%src - offset - 0x02], %t0; \ 60 ldub [%src - (offset) - 0x01], %t1; !! 87 ldub [%src - offset - 0x01], %t1; \ 61 stb %t0, [%dst - (offset) - 0x02]; !! 88 stb %t0, [%dst - offset - 0x02]; \ 62 stb %t1, [%dst - (offset) - 0x01]; !! 89 stb %t1, [%dst - offset - 0x01]; >> 90 >> 91 /* Both these macros have to start with exactly the same insn */ >> 92 #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 93 ldd [%src - offset - 0x20], %t0; \ >> 94 ldd [%src - offset - 0x18], %t2; \ >> 95 ldd [%src - offset - 0x10], %t4; \ >> 96 ldd [%src - offset - 0x08], %t6; \ >> 97 st %t0, [%dst - offset - 0x20]; \ >> 98 st %t1, [%dst - offset - 0x1c]; \ >> 99 st %t2, [%dst - offset - 0x18]; \ >> 100 st %t3, [%dst - offset - 0x14]; \ >> 101 st %t4, [%dst - offset - 0x10]; \ >> 102 st %t5, [%dst - offset - 0x0c]; \ >> 103 st %t6, [%dst - offset - 0x08]; \ >> 104 st %t7, [%dst - offset - 0x04]; >> 105 >> 106 #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 107 ldd [%src - offset - 0x20], %t0; \ >> 108 ldd [%src - offset - 0x18], %t2; \ >> 109 ldd [%src - offset - 0x10], %t4; \ >> 110 ldd [%src - offset - 0x08], %t6; \ >> 111 std %t0, [%dst - offset - 0x20]; \ >> 112 std %t2, [%dst - offset - 0x18]; \ >> 113 std %t4, [%dst - offset - 0x10]; \ >> 114 std %t6, [%dst - offset - 0x08]; >> 115 >> 116 #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ >> 117 ldd [%src + offset + 0x00], %t0; \ >> 118 ldd [%src + offset + 0x08], %t2; \ >> 119 st %t0, [%dst + offset + 0x00]; \ >> 120 st %t1, [%dst + offset + 0x04]; \ >> 121 st %t2, [%dst + offset + 0x08]; \ >> 122 st %t3, [%dst + offset + 0x0c]; >> 123 >> 124 #define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ >> 125 ldub [%src + offset + 0x00], %t0; \ >> 126 ldub [%src + offset + 0x01], %t1; \ >> 127 stb %t0, [%dst + offset + 0x00]; \ >> 128 stb %t1, [%dst + offset + 0x01]; >> 129 >> 130 #define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ >> 131 ldd [%src + offset + 0x00], %t0; \ >> 132 ldd [%src + offset + 0x08], %t2; \ >> 133 srl %t0, shir, %t5; \ >> 134 srl %t1, shir, %t6; \ >> 135 sll %t0, shil, %t0; \ >> 136 or %t5, %prev, %t5; \ >> 137 sll %t1, shil, %prev; \ >> 138 or %t6, %t0, %t0; \ >> 139 srl %t2, shir, %t1; \ >> 140 srl %t3, shir, %t6; \ >> 141 sll %t2, shil, %t2; \ >> 142 or %t1, %prev, %t1; \ >> 143 std %t4, [%dst + offset + offset2 - 0x04]; \ >> 144 std %t0, [%dst + offset + offset2 + 0x04]; \ >> 145 sll %t3, shil, %prev; \ >> 146 or %t6, %t2, %t4; >> 147 >> 148 #define SMOVE_ALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ >> 149 ldd [%src + offset + 0x00], %t0; \ >> 150 ldd [%src + offset + 0x08], %t2; \ >> 151 srl %t0, shir, %t4; \ >> 152 srl %t1, shir, %t5; \ >> 153 sll %t0, shil, %t6; \ >> 154 or %t4, %prev, %t0; \ >> 155 sll %t1, shil, %prev; \ >> 156 or %t5, %t6, %t1; \ >> 157 srl %t2, shir, %t4; \ >> 158 srl %t3, shir, %t5; \ >> 159 sll %t2, shil, %t6; \ >> 160 or %t4, %prev, %t2; \ >> 161 sll %t3, shil, %prev; \ >> 162 or %t5, %t6, %t3; \ >> 163 std %t0, [%dst + offset + offset2 + 0x00]; \ >> 164 std %t2, [%dst + offset + offset2 + 0x08]; 63 165 64 .text 166 .text 65 .align 4 167 .align 4 66 168 >> 169 #ifdef FASTER_REVERSE >> 170 >> 171 70: /* rdword_align */ >> 172 >> 173 andcc %o1, 1, %g0 >> 174 be 4f >> 175 andcc %o1, 2, %g0 >> 176 >> 177 ldub [%o1 - 1], %g2 >> 178 sub %o1, 1, %o1 >> 179 stb %g2, [%o0 - 1] >> 180 sub %o2, 1, %o2 >> 181 be 3f >> 182 sub %o0, 1, %o0 >> 183 4: >> 184 lduh [%o1 - 2], %g2 >> 185 sub %o1, 2, %o1 >> 186 sth %g2, [%o0 - 2] >> 187 sub %o2, 2, %o2 >> 188 b 3f >> 189 sub %o0, 2, %o0 >> 190 >> 191 #endif /* FASTER_REVERSE */ >> 192 >> 193 0: >> 194 retl >> 195 nop ! Only bcopy returns here and it retuns void... >> 196 >> 197 FUNC(bcopy) >> 198 mov %o0, %o3 >> 199 mov %o1, %o0 >> 200 mov %o3, %o1 >> 201 tst %o2 >> 202 bcs 0b >> 203 /* Do the cmp in the delay slot */ >> 204 #ifdef __KERNEL__ >> 205 FUNC(amemmove) >> 206 FUNC(__memmove) >> 207 #endif 67 FUNC(memmove) 208 FUNC(memmove) 68 EXPORT_SYMBOL(memmove) << 69 cmp %o0, %o1 209 cmp %o0, %o1 70 mov %o0, %g7 !! 210 SETUP_RETL 71 bleu 9f 211 bleu 9f 72 sub %o0, %o1, %o4 212 sub %o0, %o1, %o4 73 213 74 add %o1, %o2, %o3 214 add %o1, %o2, %o3 75 cmp %o3, %o0 215 cmp %o3, %o0 76 bleu 0f 216 bleu 0f 77 andcc %o4, 3, %o5 217 andcc %o4, 3, %o5 78 218 >> 219 #ifndef FASTER_REVERSE >> 220 79 add %o1, %o2, %o1 221 add %o1, %o2, %o1 80 add %o0, %o2, %o0 222 add %o0, %o2, %o0 81 sub %o1, 1, %o1 223 sub %o1, 1, %o1 82 sub %o0, 1, %o0 224 sub %o0, 1, %o0 83 225 84 1: /* reverse_bytes */ 226 1: /* reverse_bytes */ 85 227 86 ldub [%o1], %o4 228 ldub [%o1], %o4 87 subcc %o2, 1, %o2 229 subcc %o2, 1, %o2 88 stb %o4, [%o0] 230 stb %o4, [%o0] 89 sub %o1, 1, %o1 231 sub %o1, 1, %o1 90 bne 1b 232 bne 1b 91 sub %o0, 1, %o0 233 sub %o0, 1, %o0 92 234 93 retl 235 retl 94 mov %g7, %o0 !! 236 RETL_INSN >> 237 >> 238 #else /* FASTER_REVERSE */ >> 239 >> 240 add %o1, %o2, %o1 >> 241 add %o0, %o2, %o0 >> 242 bne 77f >> 243 cmp %o2, 15 >> 244 bleu 91f >> 245 andcc %o1, 3, %g0 >> 246 bne 70b >> 247 3: >> 248 andcc %o1, 4, %g0 >> 249 >> 250 be 2f >> 251 mov %o2, %g1 >> 252 >> 253 ld [%o1 - 4], %o4 >> 254 sub %g1, 4, %g1 >> 255 st %o4, [%o0 - 4] >> 256 sub %o1, 4, %o1 >> 257 sub %o0, 4, %o0 >> 258 2: >> 259 andcc %g1, 0xffffff80, %g7 >> 260 be 3f >> 261 andcc %o0, 4, %g0 >> 262 >> 263 be 74f + 4 >> 264 5: >> 265 RMOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 266 RMOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 267 RMOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 268 RMOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 269 subcc %g7, 128, %g7 >> 270 sub %o1, 128, %o1 >> 271 bne 5b >> 272 sub %o0, 128, %o0 >> 273 3: >> 274 andcc %g1, 0x70, %g7 >> 275 be 72f >> 276 andcc %g1, 8, %g0 >> 277 >> 278 sethi %hi(72f), %o5 >> 279 srl %g7, 1, %o4 >> 280 add %g7, %o4, %o4 >> 281 sub %o1, %g7, %o1 >> 282 sub %o5, %o4, %o5 >> 283 jmpl %o5 + %lo(72f), %g0 >> 284 sub %o0, %g7, %o0 >> 285 >> 286 71: /* rmemcpy_table */ >> 287 RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 288 RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 289 RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 290 RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 291 RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 292 RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 293 RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 294 >> 295 72: /* rmemcpy_table_end */ >> 296 >> 297 be 73f >> 298 andcc %g1, 4, %g0 >> 299 >> 300 ldd [%o1 - 0x08], %g2 >> 301 sub %o0, 8, %o0 >> 302 sub %o1, 8, %o1 >> 303 st %g2, [%o0] >> 304 st %g3, [%o0 + 0x04] >> 305 >> 306 73: /* rmemcpy_last7 */ >> 307 >> 308 be 1f >> 309 andcc %g1, 2, %g0 >> 310 >> 311 ld [%o1 - 4], %g2 >> 312 sub %o1, 4, %o1 >> 313 st %g2, [%o0 - 4] >> 314 sub %o0, 4, %o0 >> 315 1: >> 316 be 1f >> 317 andcc %g1, 1, %g0 >> 318 >> 319 lduh [%o1 - 2], %g2 >> 320 sub %o1, 2, %o1 >> 321 sth %g2, [%o0 - 2] >> 322 sub %o0, 2, %o0 >> 323 1: >> 324 be 1f >> 325 nop >> 326 >> 327 ldub [%o1 - 1], %g2 >> 328 stb %g2, [%o0 - 1] >> 329 1: >> 330 retl >> 331 RETL_INSN >> 332 >> 333 74: /* rldd_std */ >> 334 RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 335 RMOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 336 RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 337 RMOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 338 subcc %g7, 128, %g7 >> 339 sub %o1, 128, %o1 >> 340 bne 74b >> 341 sub %o0, 128, %o0 >> 342 >> 343 andcc %g1, 0x70, %g7 >> 344 be 72b >> 345 andcc %g1, 8, %g0 >> 346 >> 347 sethi %hi(72b), %o5 >> 348 srl %g7, 1, %o4 >> 349 add %g7, %o4, %o4 >> 350 sub %o1, %g7, %o1 >> 351 sub %o5, %o4, %o5 >> 352 jmpl %o5 + %lo(72b), %g0 >> 353 sub %o0, %g7, %o0 >> 354 >> 355 75: /* rshort_end */ >> 356 >> 357 and %o2, 0xe, %o3 >> 358 2: >> 359 sethi %hi(76f), %o5 >> 360 sll %o3, 3, %o4 >> 361 sub %o0, %o3, %o0 >> 362 sub %o5, %o4, %o5 >> 363 sub %o1, %o3, %o1 >> 364 jmpl %o5 + %lo(76f), %g0 >> 365 andcc %o2, 1, %g0 >> 366 >> 367 RMOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) >> 368 RMOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) >> 369 RMOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) >> 370 RMOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) >> 371 RMOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) >> 372 RMOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) >> 373 RMOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) >> 374 >> 375 76: /* rshort_table_end */ >> 376 >> 377 be 1f >> 378 nop >> 379 ldub [%o1 - 1], %g2 >> 380 stb %g2, [%o0 - 1] >> 381 1: >> 382 retl >> 383 RETL_INSN >> 384 >> 385 91: /* rshort_aligned_end */ >> 386 >> 387 bne 75b >> 388 andcc %o2, 8, %g0 >> 389 >> 390 be 1f >> 391 andcc %o2, 4, %g0 >> 392 >> 393 ld [%o1 - 0x08], %g2 >> 394 ld [%o1 - 0x04], %g3 >> 395 sub %o1, 8, %o1 >> 396 st %g2, [%o0 - 0x08] >> 397 st %g3, [%o0 - 0x04] >> 398 sub %o0, 8, %o0 >> 399 1: >> 400 b 73b >> 401 mov %o2, %g1 >> 402 >> 403 77: /* rnon_aligned */ >> 404 cmp %o2, 15 >> 405 bleu 75b >> 406 andcc %o0, 3, %g0 >> 407 be 64f >> 408 andcc %o0, 1, %g0 >> 409 be 63f >> 410 andcc %o0, 2, %g0 >> 411 ldub [%o1 - 1], %g5 >> 412 sub %o1, 1, %o1 >> 413 stb %g5, [%o0 - 1] >> 414 sub %o0, 1, %o0 >> 415 be 64f >> 416 sub %o2, 1, %o2 >> 417 63: >> 418 ldub [%o1 - 1], %g5 >> 419 sub %o1, 2, %o1 >> 420 stb %g5, [%o0 - 1] >> 421 sub %o0, 2, %o0 >> 422 ldub [%o1], %g5 >> 423 sub %o2, 2, %o2 >> 424 stb %g5, [%o0] >> 425 64: >> 426 and %o1, 3, %g2 >> 427 and %o1, -4, %o1 >> 428 and %o2, 0xc, %g3 >> 429 add %o1, 4, %o1 >> 430 cmp %g3, 4 >> 431 sll %g2, 3, %g4 >> 432 mov 32, %g2 >> 433 be 4f >> 434 sub %g2, %g4, %g7 >> 435 >> 436 blu 3f >> 437 cmp %g3, 8 >> 438 >> 439 be 2f >> 440 srl %o2, 2, %g3 >> 441 >> 442 ld [%o1 - 4], %o3 >> 443 add %o0, -8, %o0 >> 444 ld [%o1 - 8], %o4 >> 445 add %o1, -16, %o1 >> 446 b 7f >> 447 add %g3, 1, %g3 >> 448 2: >> 449 ld [%o1 - 4], %o4 >> 450 add %o0, -4, %o0 >> 451 ld [%o1 - 8], %g1 >> 452 add %o1, -12, %o1 >> 453 b 8f >> 454 add %g3, 2, %g3 >> 455 3: >> 456 ld [%o1 - 4], %o5 >> 457 add %o0, -12, %o0 >> 458 ld [%o1 - 8], %o3 >> 459 add %o1, -20, %o1 >> 460 b 6f >> 461 srl %o2, 2, %g3 >> 462 4: >> 463 ld [%o1 - 4], %g1 >> 464 srl %o2, 2, %g3 >> 465 ld [%o1 - 8], %o5 >> 466 add %o1, -24, %o1 >> 467 add %o0, -16, %o0 >> 468 add %g3, -1, %g3 >> 469 >> 470 ld [%o1 + 12], %o3 >> 471 5: >> 472 sll %o5, %g4, %g2 >> 473 srl %g1, %g7, %g5 >> 474 or %g2, %g5, %g2 >> 475 st %g2, [%o0 + 12] >> 476 6: >> 477 ld [%o1 + 8], %o4 >> 478 sll %o3, %g4, %g2 >> 479 srl %o5, %g7, %g5 >> 480 or %g2, %g5, %g2 >> 481 st %g2, [%o0 + 8] >> 482 7: >> 483 ld [%o1 + 4], %g1 >> 484 sll %o4, %g4, %g2 >> 485 srl %o3, %g7, %g5 >> 486 or %g2, %g5, %g2 >> 487 st %g2, [%o0 + 4] >> 488 8: >> 489 ld [%o1], %o5 >> 490 sll %g1, %g4, %g2 >> 491 srl %o4, %g7, %g5 >> 492 addcc %g3, -4, %g3 >> 493 or %g2, %g5, %g2 >> 494 add %o1, -16, %o1 >> 495 st %g2, [%o0] >> 496 add %o0, -16, %o0 >> 497 bne,a 5b >> 498 ld [%o1 + 12], %o3 >> 499 sll %o5, %g4, %g2 >> 500 srl %g1, %g7, %g5 >> 501 srl %g4, 3, %g3 >> 502 or %g2, %g5, %g2 >> 503 add %o1, %g3, %o1 >> 504 andcc %o2, 2, %g0 >> 505 st %g2, [%o0 + 12] >> 506 be 1f >> 507 andcc %o2, 1, %g0 >> 508 >> 509 ldub [%o1 + 15], %g5 >> 510 add %o1, -2, %o1 >> 511 stb %g5, [%o0 + 11] >> 512 add %o0, -2, %o0 >> 513 ldub [%o1 + 16], %g5 >> 514 stb %g5, [%o0 + 12] >> 515 1: >> 516 be 1f >> 517 nop >> 518 ldub [%o1 + 15], %g5 >> 519 stb %g5, [%o0 + 11] >> 520 1: >> 521 retl >> 522 RETL_INSN >> 523 >> 524 #endif /* FASTER_REVERSE */ 95 525 96 /* NOTE: This code is executed just for the ca 526 /* NOTE: This code is executed just for the cases, 97 where %src (=%o1) & 3 is != 0. 527 where %src (=%o1) & 3 is != 0. 98 We need to align it to 4. So, for (%s 528 We need to align it to 4. So, for (%src & 3) 99 1 we need to do ldub,lduh 529 1 we need to do ldub,lduh 100 2 lduh 530 2 lduh 101 3 just ldub 531 3 just ldub 102 so even if it looks weird, the branch 532 so even if it looks weird, the branches 103 are correct here. -jj 533 are correct here. -jj 104 */ 534 */ 105 78: /* dword_align */ 535 78: /* dword_align */ 106 536 107 andcc %o1, 1, %g0 537 andcc %o1, 1, %g0 108 be 4f 538 be 4f 109 andcc %o1, 2, %g0 539 andcc %o1, 2, %g0 110 540 111 ldub [%o1], %g2 541 ldub [%o1], %g2 112 add %o1, 1, %o1 542 add %o1, 1, %o1 113 stb %g2, [%o0] 543 stb %g2, [%o0] 114 sub %o2, 1, %o2 544 sub %o2, 1, %o2 115 bne 3f 545 bne 3f 116 add %o0, 1, %o0 546 add %o0, 1, %o0 117 4: 547 4: 118 lduh [%o1], %g2 548 lduh [%o1], %g2 119 add %o1, 2, %o1 549 add %o1, 2, %o1 120 sth %g2, [%o0] 550 sth %g2, [%o0] 121 sub %o2, 2, %o2 551 sub %o2, 2, %o2 122 b 3f 552 b 3f 123 add %o0, 2, %o0 553 add %o0, 2, %o0 124 554 >> 555 #ifdef __KERNEL__ >> 556 FUNC(__memcpy) >> 557 #endif 125 FUNC(memcpy) /* %o0=dst %o1=src %o2=len */ 558 FUNC(memcpy) /* %o0=dst %o1=src %o2=len */ 126 EXPORT_SYMBOL(memcpy) << 127 559 128 sub %o0, %o1, %o4 560 sub %o0, %o1, %o4 129 mov %o0, %g7 !! 561 SETUP_RETL 130 9: 562 9: 131 andcc %o4, 3, %o5 563 andcc %o4, 3, %o5 132 0: 564 0: 133 bne 86f 565 bne 86f 134 cmp %o2, 15 566 cmp %o2, 15 135 567 136 bleu 90f 568 bleu 90f 137 andcc %o1, 3, %g0 569 andcc %o1, 3, %g0 138 570 139 bne 78b 571 bne 78b 140 3: 572 3: 141 andcc %o1, 4, %g0 573 andcc %o1, 4, %g0 142 574 143 be 2f 575 be 2f 144 mov %o2, %g1 576 mov %o2, %g1 145 577 146 ld [%o1], %o4 578 ld [%o1], %o4 147 sub %g1, 4, %g1 579 sub %g1, 4, %g1 148 st %o4, [%o0] 580 st %o4, [%o0] 149 add %o1, 4, %o1 581 add %o1, 4, %o1 150 add %o0, 4, %o0 582 add %o0, 4, %o0 151 2: 583 2: 152 andcc %g1, 0xffffff80, %g0 !! 584 andcc %g1, 0xffffff80, %g7 153 be 3f 585 be 3f 154 andcc %o0, 4, %g0 586 andcc %o0, 4, %g0 155 587 156 be 82f + 4 588 be 82f + 4 157 5: 589 5: 158 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4 590 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 159 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4 591 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 160 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4 592 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 161 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4 593 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 162 sub %g1, 128, %g1 !! 594 subcc %g7, 128, %g7 163 add %o1, 128, %o1 595 add %o1, 128, %o1 164 cmp %g1, 128 !! 596 bne 5b 165 bge 5b << 166 add %o0, 128, %o0 597 add %o0, 128, %o0 167 3: 598 3: 168 andcc %g1, 0x70, %g4 !! 599 andcc %g1, 0x70, %g7 169 be 80f 600 be 80f 170 andcc %g1, 8, %g0 601 andcc %g1, 8, %g0 171 602 172 sethi %hi(80f), %o5 603 sethi %hi(80f), %o5 173 srl %g4, 1, %o4 !! 604 srl %g7, 1, %o4 174 add %g4, %o4, %o4 !! 605 add %g7, %o4, %o4 175 add %o1, %g4, %o1 !! 606 add %o1, %g7, %o1 176 sub %o5, %o4, %o5 607 sub %o5, %o4, %o5 177 jmpl %o5 + %lo(80f), %g0 608 jmpl %o5 + %lo(80f), %g0 178 add %o0, %g4, %o0 !! 609 add %o0, %g7, %o0 179 610 180 79: /* memcpy_table */ 611 79: /* memcpy_table */ 181 612 182 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g 613 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) 183 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g 614 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) 184 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g 615 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) 185 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g 616 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) 186 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g 617 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) 187 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g 618 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) 188 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g 619 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 189 620 190 80: /* memcpy_table_end */ 621 80: /* memcpy_table_end */ 191 be 81f 622 be 81f 192 andcc %g1, 4, %g0 623 andcc %g1, 4, %g0 193 624 194 ldd [%o1], %g2 625 ldd [%o1], %g2 195 add %o0, 8, %o0 626 add %o0, 8, %o0 196 st %g2, [%o0 - 0x08] 627 st %g2, [%o0 - 0x08] 197 add %o1, 8, %o1 628 add %o1, 8, %o1 198 st %g3, [%o0 - 0x04] 629 st %g3, [%o0 - 0x04] 199 630 200 81: /* memcpy_last7 */ 631 81: /* memcpy_last7 */ 201 632 202 be 1f 633 be 1f 203 andcc %g1, 2, %g0 634 andcc %g1, 2, %g0 204 635 205 ld [%o1], %g2 636 ld [%o1], %g2 206 add %o1, 4, %o1 637 add %o1, 4, %o1 207 st %g2, [%o0] 638 st %g2, [%o0] 208 add %o0, 4, %o0 639 add %o0, 4, %o0 209 1: 640 1: 210 be 1f 641 be 1f 211 andcc %g1, 1, %g0 642 andcc %g1, 1, %g0 212 643 213 lduh [%o1], %g2 644 lduh [%o1], %g2 214 add %o1, 2, %o1 645 add %o1, 2, %o1 215 sth %g2, [%o0] 646 sth %g2, [%o0] 216 add %o0, 2, %o0 647 add %o0, 2, %o0 217 1: 648 1: 218 be 1f 649 be 1f 219 nop 650 nop 220 651 221 ldub [%o1], %g2 652 ldub [%o1], %g2 222 stb %g2, [%o0] 653 stb %g2, [%o0] 223 1: 654 1: 224 retl 655 retl 225 mov %g7, %o0 !! 656 RETL_INSN 226 657 227 82: /* ldd_std */ 658 82: /* ldd_std */ 228 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o 659 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 229 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o 660 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 230 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o 661 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 231 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o 662 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 232 subcc %g1, 128, %g1 !! 663 subcc %g7, 128, %g7 233 add %o1, 128, %o1 664 add %o1, 128, %o1 234 cmp %g1, 128 !! 665 bne 82b 235 bge 82b << 236 add %o0, 128, %o0 666 add %o0, 128, %o0 237 667 238 andcc %g1, 0x70, %g4 !! 668 #ifndef FASTER_ALIGNED >> 669 >> 670 andcc %g1, 0x70, %g7 >> 671 be 80b >> 672 andcc %g1, 8, %g0 >> 673 >> 674 sethi %hi(80b), %o5 >> 675 srl %g7, 1, %o4 >> 676 add %g7, %o4, %o4 >> 677 add %o1, %g7, %o1 >> 678 sub %o5, %o4, %o5 >> 679 jmpl %o5 + %lo(80b), %g0 >> 680 add %o0, %g7, %o0 >> 681 >> 682 #else /* FASTER_ALIGNED */ >> 683 >> 684 andcc %g1, 0x70, %g7 239 be 84f 685 be 84f 240 andcc %g1, 8, %g0 686 andcc %g1, 8, %g0 241 687 242 sethi %hi(84f), %o5 688 sethi %hi(84f), %o5 243 add %o1, %g4, %o1 !! 689 add %o1, %g7, %o1 244 sub %o5, %g4, %o5 !! 690 sub %o5, %g7, %o5 245 jmpl %o5 + %lo(84f), %g0 691 jmpl %o5 + %lo(84f), %g0 246 add %o0, %g4, %o0 !! 692 add %o0, %g7, %o0 247 693 248 83: /* amemcpy_table */ 694 83: /* amemcpy_table */ 249 695 250 MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, 696 MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5) 251 MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, 697 MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5) 252 MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, 698 MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5) 253 MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, 699 MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5) 254 MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, 700 MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5) 255 MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, 701 MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5) 256 MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, 702 MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 257 703 258 84: /* amemcpy_table_end */ 704 84: /* amemcpy_table_end */ 259 be 85f 705 be 85f 260 andcc %g1, 4, %g0 706 andcc %g1, 4, %g0 261 707 262 ldd [%o1], %g2 708 ldd [%o1], %g2 263 add %o0, 8, %o0 709 add %o0, 8, %o0 264 std %g2, [%o0 - 0x08] 710 std %g2, [%o0 - 0x08] 265 add %o1, 8, %o1 711 add %o1, 8, %o1 266 85: /* amemcpy_last7 */ 712 85: /* amemcpy_last7 */ 267 be 1f 713 be 1f 268 andcc %g1, 2, %g0 714 andcc %g1, 2, %g0 269 715 270 ld [%o1], %g2 716 ld [%o1], %g2 271 add %o1, 4, %o1 717 add %o1, 4, %o1 272 st %g2, [%o0] 718 st %g2, [%o0] 273 add %o0, 4, %o0 719 add %o0, 4, %o0 274 1: 720 1: 275 be 1f 721 be 1f 276 andcc %g1, 1, %g0 722 andcc %g1, 1, %g0 277 723 278 lduh [%o1], %g2 724 lduh [%o1], %g2 279 add %o1, 2, %o1 725 add %o1, 2, %o1 280 sth %g2, [%o0] 726 sth %g2, [%o0] 281 add %o0, 2, %o0 727 add %o0, 2, %o0 282 1: 728 1: 283 be 1f 729 be 1f 284 nop 730 nop 285 731 286 ldub [%o1], %g2 732 ldub [%o1], %g2 287 stb %g2, [%o0] 733 stb %g2, [%o0] 288 1: 734 1: 289 retl 735 retl 290 mov %g7, %o0 !! 736 RETL_INSN >> 737 >> 738 #endif /* FASTER_ALIGNED */ 291 739 292 86: /* non_aligned */ 740 86: /* non_aligned */ 293 cmp %o2, 6 741 cmp %o2, 6 294 bleu 88f 742 bleu 88f 295 nop << 296 743 297 save %sp, -96, %sp !! 744 #ifdef FASTER_NONALIGNED 298 andcc %i0, 3, %g0 !! 745 >> 746 cmp %o2, 256 >> 747 bcc 87f >> 748 >> 749 #endif /* FASTER_NONALIGNED */ >> 750 >> 751 andcc %o0, 3, %g0 299 be 61f 752 be 61f 300 andcc %i0, 1, %g0 !! 753 andcc %o0, 1, %g0 301 be 60f 754 be 60f 302 andcc %i0, 2, %g0 !! 755 andcc %o0, 2, %g0 303 756 304 ldub [%i1], %g5 !! 757 ldub [%o1], %g5 305 add %i1, 1, %i1 !! 758 add %o1, 1, %o1 306 stb %g5, [%i0] !! 759 stb %g5, [%o0] 307 sub %i2, 1, %i2 !! 760 sub %o2, 1, %o2 308 bne 61f 761 bne 61f 309 add %i0, 1, %i0 !! 762 add %o0, 1, %o0 310 60: 763 60: 311 ldub [%i1], %g3 !! 764 ldub [%o1], %g3 312 add %i1, 2, %i1 !! 765 add %o1, 2, %o1 313 stb %g3, [%i0] !! 766 stb %g3, [%o0] 314 sub %i2, 2, %i2 !! 767 sub %o2, 2, %o2 315 ldub [%i1 - 1], %g3 !! 768 ldub [%o1 - 1], %g3 316 add %i0, 2, %i0 !! 769 add %o0, 2, %o0 317 stb %g3, [%i0 - 1] !! 770 stb %g3, [%o0 - 1] 318 61: 771 61: 319 and %i1, 3, %g2 !! 772 and %o1, 3, %g2 320 and %i2, 0xc, %g3 !! 773 and %o2, 0xc, %g3 321 and %i1, -4, %i1 !! 774 and %o1, -4, %o1 322 cmp %g3, 4 775 cmp %g3, 4 323 sll %g2, 3, %g4 776 sll %g2, 3, %g4 324 mov 32, %g2 777 mov 32, %g2 325 be 4f 778 be 4f 326 sub %g2, %g4, %l0 !! 779 sub %g2, %g4, %g7 327 780 328 blu 3f 781 blu 3f 329 cmp %g3, 0x8 782 cmp %g3, 0x8 330 783 331 be 2f 784 be 2f 332 srl %i2, 2, %g3 !! 785 srl %o2, 2, %g3 333 786 334 ld [%i1], %i3 !! 787 ld [%o1], %o3 335 add %i0, -8, %i0 !! 788 add %o0, -8, %o0 336 ld [%i1 + 4], %i4 !! 789 ld [%o1 + 4], %o4 337 b 8f 790 b 8f 338 add %g3, 1, %g3 791 add %g3, 1, %g3 339 2: 792 2: 340 ld [%i1], %i4 !! 793 ld [%o1], %o4 341 add %i0, -12, %i0 !! 794 add %o0, -12, %o0 342 ld [%i1 + 4], %i5 !! 795 ld [%o1 + 4], %o5 343 add %g3, 2, %g3 796 add %g3, 2, %g3 344 b 9f 797 b 9f 345 add %i1, -4, %i1 !! 798 add %o1, -4, %o1 346 3: 799 3: 347 ld [%i1], %g1 !! 800 ld [%o1], %g1 348 add %i0, -4, %i0 !! 801 add %o0, -4, %o0 349 ld [%i1 + 4], %i3 !! 802 ld [%o1 + 4], %o3 350 srl %i2, 2, %g3 !! 803 srl %o2, 2, %g3 351 b 7f 804 b 7f 352 add %i1, 4, %i1 !! 805 add %o1, 4, %o1 353 4: 806 4: 354 ld [%i1], %i5 !! 807 ld [%o1], %o5 355 cmp %i2, 7 !! 808 cmp %o2, 7 356 ld [%i1 + 4], %g1 !! 809 ld [%o1 + 4], %g1 357 srl %i2, 2, %g3 !! 810 srl %o2, 2, %g3 358 bleu 10f 811 bleu 10f 359 add %i1, 8, %i1 !! 812 add %o1, 8, %o1 360 813 361 ld [%i1], %i3 !! 814 ld [%o1], %o3 362 add %g3, -1, %g3 815 add %g3, -1, %g3 363 5: 816 5: 364 sll %i5, %g4, %g2 !! 817 sll %o5, %g4, %g2 365 srl %g1, %l0, %g5 !! 818 srl %g1, %g7, %g5 366 or %g2, %g5, %g2 819 or %g2, %g5, %g2 367 st %g2, [%i0] !! 820 st %g2, [%o0] 368 7: 821 7: 369 ld [%i1 + 4], %i4 !! 822 ld [%o1 + 4], %o4 370 sll %g1, %g4, %g2 823 sll %g1, %g4, %g2 371 srl %i3, %l0, %g5 !! 824 srl %o3, %g7, %g5 372 or %g2, %g5, %g2 825 or %g2, %g5, %g2 373 st %g2, [%i0 + 4] !! 826 st %g2, [%o0 + 4] 374 8: 827 8: 375 ld [%i1 + 8], %i5 !! 828 ld [%o1 + 8], %o5 376 sll %i3, %g4, %g2 !! 829 sll %o3, %g4, %g2 377 srl %i4, %l0, %g5 !! 830 srl %o4, %g7, %g5 378 or %g2, %g5, %g2 831 or %g2, %g5, %g2 379 st %g2, [%i0 + 8] !! 832 st %g2, [%o0 + 8] 380 9: 833 9: 381 ld [%i1 + 12], %g1 !! 834 ld [%o1 + 12], %g1 382 sll %i4, %g4, %g2 !! 835 sll %o4, %g4, %g2 383 srl %i5, %l0, %g5 !! 836 srl %o5, %g7, %g5 384 addcc %g3, -4, %g3 837 addcc %g3, -4, %g3 385 or %g2, %g5, %g2 838 or %g2, %g5, %g2 386 add %i1, 16, %i1 !! 839 add %o1, 16, %o1 387 st %g2, [%i0 + 12] !! 840 st %g2, [%o0 + 12] 388 add %i0, 16, %i0 !! 841 add %o0, 16, %o0 389 bne,a 5b 842 bne,a 5b 390 ld [%i1], %i3 !! 843 ld [%o1], %o3 391 10: 844 10: 392 sll %i5, %g4, %g2 !! 845 sll %o5, %g4, %g2 393 srl %g1, %l0, %g5 !! 846 srl %g1, %g7, %g5 394 srl %l0, 3, %g3 !! 847 srl %g7, 3, %g3 395 or %g2, %g5, %g2 848 or %g2, %g5, %g2 396 sub %i1, %g3, %i1 !! 849 sub %o1, %g3, %o1 397 andcc %i2, 2, %g0 !! 850 andcc %o2, 2, %g0 398 st %g2, [%i0] !! 851 st %g2, [%o0] 399 be 1f !! 852 be 1f 400 andcc %i2, 1, %g0 !! 853 andcc %o2, 1, %g0 401 !! 854 402 ldub [%i1], %g2 !! 855 ldub [%o1], %g2 403 add %i1, 2, %i1 !! 856 add %o1, 2, %o1 404 stb %g2, [%i0 + 4] !! 857 stb %g2, [%o0 + 4] 405 add %i0, 2, %i0 !! 858 add %o0, 2, %o0 406 ldub [%i1 - 1], %g2 !! 859 ldub [%o1 - 1], %g2 407 stb %g2, [%i0 + 3] !! 860 stb %g2, [%o0 + 3] 408 1: 861 1: 409 be 1f 862 be 1f 410 nop 863 nop 411 ldub [%i1], %g2 !! 864 ldub [%o1], %g2 412 stb %g2, [%i0 + 4] !! 865 stb %g2, [%o0 + 4] 413 1: 866 1: 414 ret !! 867 retl 415 restore %g7, %g0, %o0 !! 868 RETL_INSN >> 869 >> 870 #ifdef FASTER_NONALIGNED >> 871 >> 872 87: /* faster_nonaligned */ >> 873 >> 874 andcc %o1, 3, %g0 >> 875 be 3f >> 876 andcc %o1, 1, %g0 >> 877 >> 878 be 4f >> 879 andcc %o1, 2, %g0 >> 880 >> 881 ldub [%o1], %g2 >> 882 add %o1, 1, %o1 >> 883 stb %g2, [%o0] >> 884 sub %o2, 1, %o2 >> 885 bne 3f >> 886 add %o0, 1, %o0 >> 887 4: >> 888 lduh [%o1], %g2 >> 889 add %o1, 2, %o1 >> 890 srl %g2, 8, %g3 >> 891 sub %o2, 2, %o2 >> 892 stb %g3, [%o0] >> 893 add %o0, 2, %o0 >> 894 stb %g2, [%o0 - 1] >> 895 3: >> 896 andcc %o1, 4, %g0 >> 897 >> 898 bne 2f >> 899 cmp %o5, 1 >> 900 >> 901 ld [%o1], %o4 >> 902 srl %o4, 24, %g2 >> 903 stb %g2, [%o0] >> 904 srl %o4, 16, %g3 >> 905 stb %g3, [%o0 + 1] >> 906 srl %o4, 8, %g2 >> 907 stb %g2, [%o0 + 2] >> 908 sub %o2, 4, %o2 >> 909 stb %o4, [%o0 + 3] >> 910 add %o1, 4, %o1 >> 911 add %o0, 4, %o0 >> 912 2: >> 913 be 33f >> 914 cmp %o5, 2 >> 915 be 32f >> 916 sub %o2, 4, %o2 >> 917 31: >> 918 ld [%o1], %g2 >> 919 add %o1, 4, %o1 >> 920 srl %g2, 24, %g3 >> 921 and %o0, 7, %g5 >> 922 stb %g3, [%o0] >> 923 cmp %g5, 7 >> 924 sll %g2, 8, %g1 >> 925 add %o0, 4, %o0 >> 926 be 41f >> 927 and %o2, 0xffffffc0, %o3 >> 928 ld [%o0 - 7], %o4 >> 929 4: >> 930 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 931 SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 932 SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 933 SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 934 subcc %o3, 64, %o3 >> 935 add %o1, 64, %o1 >> 936 bne 4b >> 937 add %o0, 64, %o0 >> 938 >> 939 andcc %o2, 0x30, %o3 >> 940 be,a 1f >> 941 srl %g1, 16, %g2 >> 942 4: >> 943 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 944 subcc %o3, 16, %o3 >> 945 add %o1, 16, %o1 >> 946 bne 4b >> 947 add %o0, 16, %o0 >> 948 >> 949 srl %g1, 16, %g2 >> 950 1: >> 951 st %o4, [%o0 - 7] >> 952 sth %g2, [%o0 - 3] >> 953 srl %g1, 8, %g4 >> 954 b 88f >> 955 stb %g4, [%o0 - 1] >> 956 32: >> 957 ld [%o1], %g2 >> 958 add %o1, 4, %o1 >> 959 srl %g2, 16, %g3 >> 960 and %o0, 7, %g5 >> 961 sth %g3, [%o0] >> 962 cmp %g5, 6 >> 963 sll %g2, 16, %g1 >> 964 add %o0, 4, %o0 >> 965 be 42f >> 966 and %o2, 0xffffffc0, %o3 >> 967 ld [%o0 - 6], %o4 >> 968 4: >> 969 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 970 SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 971 SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 972 SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 973 subcc %o3, 64, %o3 >> 974 add %o1, 64, %o1 >> 975 bne 4b >> 976 add %o0, 64, %o0 >> 977 >> 978 andcc %o2, 0x30, %o3 >> 979 be,a 1f >> 980 srl %g1, 16, %g2 >> 981 4: >> 982 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 983 subcc %o3, 16, %o3 >> 984 add %o1, 16, %o1 >> 985 bne 4b >> 986 add %o0, 16, %o0 >> 987 >> 988 srl %g1, 16, %g2 >> 989 1: >> 990 st %o4, [%o0 - 6] >> 991 b 88f >> 992 sth %g2, [%o0 - 2] >> 993 33: >> 994 ld [%o1], %g2 >> 995 sub %o2, 4, %o2 >> 996 srl %g2, 24, %g3 >> 997 and %o0, 7, %g5 >> 998 stb %g3, [%o0] >> 999 cmp %g5, 5 >> 1000 srl %g2, 8, %g4 >> 1001 sll %g2, 24, %g1 >> 1002 sth %g4, [%o0 + 1] >> 1003 add %o1, 4, %o1 >> 1004 be 43f >> 1005 and %o2, 0xffffffc0, %o3 >> 1006 >> 1007 ld [%o0 - 1], %o4 >> 1008 add %o0, 4, %o0 >> 1009 4: >> 1010 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) >> 1011 SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) >> 1012 SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) >> 1013 SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) >> 1014 subcc %o3, 64, %o3 >> 1015 add %o1, 64, %o1 >> 1016 bne 4b >> 1017 add %o0, 64, %o0 >> 1018 >> 1019 andcc %o2, 0x30, %o3 >> 1020 be,a 1f >> 1021 srl %g1, 24, %g2 >> 1022 4: >> 1023 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) >> 1024 subcc %o3, 16, %o3 >> 1025 add %o1, 16, %o1 >> 1026 bne 4b >> 1027 add %o0, 16, %o0 >> 1028 >> 1029 srl %g1, 24, %g2 >> 1030 1: >> 1031 st %o4, [%o0 - 5] >> 1032 b 88f >> 1033 stb %g2, [%o0 - 1] >> 1034 41: >> 1035 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 1036 SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 1037 SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 1038 SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 1039 subcc %o3, 64, %o3 >> 1040 add %o1, 64, %o1 >> 1041 bne 41b >> 1042 add %o0, 64, %o0 >> 1043 >> 1044 andcc %o2, 0x30, %o3 >> 1045 be,a 1f >> 1046 srl %g1, 16, %g2 >> 1047 4: >> 1048 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 1049 subcc %o3, 16, %o3 >> 1050 add %o1, 16, %o1 >> 1051 bne 4b >> 1052 add %o0, 16, %o0 >> 1053 >> 1054 srl %g1, 16, %g2 >> 1055 1: >> 1056 sth %g2, [%o0 - 3] >> 1057 srl %g1, 8, %g4 >> 1058 b 88f >> 1059 stb %g4, [%o0 - 1] >> 1060 43: >> 1061 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) >> 1062 SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) >> 1063 SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) >> 1064 SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) >> 1065 subcc %o3, 64, %o3 >> 1066 add %o1, 64, %o1 >> 1067 bne 43b >> 1068 add %o0, 64, %o0 >> 1069 >> 1070 andcc %o2, 0x30, %o3 >> 1071 be,a 1f >> 1072 srl %g1, 24, %g2 >> 1073 4: >> 1074 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) >> 1075 subcc %o3, 16, %o3 >> 1076 add %o1, 16, %o1 >> 1077 bne 4b >> 1078 add %o0, 16, %o0 >> 1079 >> 1080 srl %g1, 24, %g2 >> 1081 1: >> 1082 stb %g2, [%o0 + 3] >> 1083 b 88f >> 1084 add %o0, 4, %o0 >> 1085 42: >> 1086 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 1087 SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 1088 SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 1089 SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 1090 subcc %o3, 64, %o3 >> 1091 add %o1, 64, %o1 >> 1092 bne 42b >> 1093 add %o0, 64, %o0 >> 1094 >> 1095 andcc %o2, 0x30, %o3 >> 1096 be,a 1f >> 1097 srl %g1, 16, %g2 >> 1098 4: >> 1099 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 1100 subcc %o3, 16, %o3 >> 1101 add %o1, 16, %o1 >> 1102 bne 4b >> 1103 add %o0, 16, %o0 >> 1104 >> 1105 srl %g1, 16, %g2 >> 1106 1: >> 1107 sth %g2, [%o0 - 2] >> 1108 >> 1109 /* Fall through */ >> 1110 >> 1111 #endif /* FASTER_NONALIGNED */ 416 1112 417 88: /* short_end */ 1113 88: /* short_end */ 418 1114 419 and %o2, 0xe, %o3 1115 and %o2, 0xe, %o3 420 20: 1116 20: 421 sethi %hi(89f), %o5 1117 sethi %hi(89f), %o5 422 sll %o3, 3, %o4 1118 sll %o3, 3, %o4 423 add %o0, %o3, %o0 1119 add %o0, %o3, %o0 424 sub %o5, %o4, %o5 1120 sub %o5, %o4, %o5 425 add %o1, %o3, %o1 1121 add %o1, %o3, %o1 426 jmpl %o5 + %lo(89f), %g0 1122 jmpl %o5 + %lo(89f), %g0 427 andcc %o2, 1, %g0 1123 andcc %o2, 1, %g0 428 1124 429 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) 1125 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) 430 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) 1126 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) 431 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) 1127 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) 432 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) 1128 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) 433 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) 1129 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) 434 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) 1130 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) 435 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) 1131 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) 436 1132 437 89: /* short_table_end */ 1133 89: /* short_table_end */ 438 1134 439 be 1f 1135 be 1f 440 nop 1136 nop 441 1137 442 ldub [%o1], %g2 1138 ldub [%o1], %g2 443 stb %g2, [%o0] 1139 stb %g2, [%o0] 444 1: 1140 1: 445 retl 1141 retl 446 mov %g7, %o0 !! 1142 RETL_INSN 447 1143 448 90: /* short_aligned_end */ 1144 90: /* short_aligned_end */ 449 bne 88b 1145 bne 88b 450 andcc %o2, 8, %g0 1146 andcc %o2, 8, %g0 451 1147 452 be 1f 1148 be 1f 453 andcc %o2, 4, %g0 1149 andcc %o2, 4, %g0 454 1150 455 ld [%o1 + 0x00], %g2 1151 ld [%o1 + 0x00], %g2 456 ld [%o1 + 0x04], %g3 1152 ld [%o1 + 0x04], %g3 457 add %o1, 8, %o1 1153 add %o1, 8, %o1 458 st %g2, [%o0 + 0x00] 1154 st %g2, [%o0 + 0x00] 459 st %g3, [%o0 + 0x04] 1155 st %g3, [%o0 + 0x04] 460 add %o0, 8, %o0 1156 add %o0, 8, %o0 461 1: 1157 1: 462 b 81b 1158 b 81b 463 mov %o2, %g1 1159 mov %o2, %g1
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.