1 /* SPDX-License-Identifier: GPL-2.0-only */ !! 1 /* memcpy.S: Sparc optimized memcpy, bcopy and memmove code 2 /* !! 2 * Hand optimized from GNU libc's memcpy, bcopy and memmove 3 * Copyright (c) 2012-2021, Arm Limited. !! 3 * Copyright (C) 1991,1996 Free Software Foundation 4 * !! 4 * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi) 5 * Adapted from the original at: !! 5 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 6 * https://github.com/ARM-software/optimized-r !! 6 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) >> 7 * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 7 */ 8 */ 8 9 9 #include <linux/linkage.h> !! 10 #ifdef __KERNEL__ 10 #include <asm/assembler.h> << 11 11 12 /* Assumptions: !! 12 #include <asm/cprefix.h> 13 * !! 13 14 * ARMv8-a, AArch64, unaligned accesses. !! 14 #define FUNC(x) \ 15 * !! 15 .globl C_LABEL(x); \ >> 16 .type C_LABEL(x),@function; \ >> 17 .align 4; \ >> 18 C_LABEL(x): >> 19 >> 20 #undef FASTER_REVERSE >> 21 #undef FASTER_NONALIGNED >> 22 #define FASTER_ALIGNED >> 23 >> 24 /* In kernel these functions don't return a value. >> 25 * One should use macros in asm/string.h for that purpose. >> 26 * We return 0, so that bugs are more apparent. 16 */ 27 */ >> 28 #define SETUP_RETL >> 29 #define RETL_INSN clr %o0 >> 30 >> 31 #else >> 32 >> 33 /* libc */ 17 34 18 #define L(label) .L ## label !! 35 #include "DEFS.h" >> 36 >> 37 #define FASTER_REVERSE >> 38 #define FASTER_NONALIGNED >> 39 #define FASTER_ALIGNED >> 40 >> 41 #define SETUP_RETL mov %o0, %g6 >> 42 #define RETL_INSN mov %g6, %o0 >> 43 >> 44 #endif >> 45 >> 46 /* Both these macros have to start with exactly the same insn */ >> 47 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 48 ldd [%src + offset + 0x00], %t0; \ >> 49 ldd [%src + offset + 0x08], %t2; \ >> 50 ldd [%src + offset + 0x10], %t4; \ >> 51 ldd [%src + offset + 0x18], %t6; \ >> 52 st %t0, [%dst + offset + 0x00]; \ >> 53 st %t1, [%dst + offset + 0x04]; \ >> 54 st %t2, [%dst + offset + 0x08]; \ >> 55 st %t3, [%dst + offset + 0x0c]; \ >> 56 st %t4, [%dst + offset + 0x10]; \ >> 57 st %t5, [%dst + offset + 0x14]; \ >> 58 st %t6, [%dst + offset + 0x18]; \ >> 59 st %t7, [%dst + offset + 0x1c]; >> 60 >> 61 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 62 ldd [%src + offset + 0x00], %t0; \ >> 63 ldd [%src + offset + 0x08], %t2; \ >> 64 ldd [%src + offset + 0x10], %t4; \ >> 65 ldd [%src + offset + 0x18], %t6; \ >> 66 std %t0, [%dst + offset + 0x00]; \ >> 67 std %t2, [%dst + offset + 0x08]; \ >> 68 std %t4, [%dst + offset + 0x10]; \ >> 69 std %t6, [%dst + offset + 0x18]; >> 70 >> 71 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ >> 72 ldd [%src - offset - 0x10], %t0; \ >> 73 ldd [%src - offset - 0x08], %t2; \ >> 74 st %t0, [%dst - offset - 0x10]; \ >> 75 st %t1, [%dst - offset - 0x0c]; \ >> 76 st %t2, [%dst - offset - 0x08]; \ >> 77 st %t3, [%dst - offset - 0x04]; >> 78 >> 79 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ >> 80 ldd [%src - offset - 0x10], %t0; \ >> 81 ldd [%src - offset - 0x08], %t2; \ >> 82 std %t0, [%dst - offset - 0x10]; \ >> 83 std %t2, [%dst - offset - 0x08]; >> 84 >> 85 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ >> 86 ldub [%src - offset - 0x02], %t0; \ >> 87 ldub [%src - offset - 0x01], %t1; \ >> 88 stb %t0, [%dst - offset - 0x02]; \ >> 89 stb %t1, [%dst - offset - 0x01]; >> 90 >> 91 /* Both these macros have to start with exactly the same insn */ >> 92 #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 93 ldd [%src - offset - 0x20], %t0; \ >> 94 ldd [%src - offset - 0x18], %t2; \ >> 95 ldd [%src - offset - 0x10], %t4; \ >> 96 ldd [%src - offset - 0x08], %t6; \ >> 97 st %t0, [%dst - offset - 0x20]; \ >> 98 st %t1, [%dst - offset - 0x1c]; \ >> 99 st %t2, [%dst - offset - 0x18]; \ >> 100 st %t3, [%dst - offset - 0x14]; \ >> 101 st %t4, [%dst - offset - 0x10]; \ >> 102 st %t5, [%dst - offset - 0x0c]; \ >> 103 st %t6, [%dst - offset - 0x08]; \ >> 104 st %t7, [%dst - offset - 0x04]; >> 105 >> 106 #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 107 ldd [%src - offset - 0x20], %t0; \ >> 108 ldd [%src - offset - 0x18], %t2; \ >> 109 ldd [%src - offset - 0x10], %t4; \ >> 110 ldd [%src - offset - 0x08], %t6; \ >> 111 std %t0, [%dst - offset - 0x20]; \ >> 112 std %t2, [%dst - offset - 0x18]; \ >> 113 std %t4, [%dst - offset - 0x10]; \ >> 114 std %t6, [%dst - offset - 0x08]; >> 115 >> 116 #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ >> 117 ldd [%src + offset + 0x00], %t0; \ >> 118 ldd [%src + offset + 0x08], %t2; \ >> 119 st %t0, [%dst + offset + 0x00]; \ >> 120 st %t1, [%dst + offset + 0x04]; \ >> 121 st %t2, [%dst + offset + 0x08]; \ >> 122 st %t3, [%dst + offset + 0x0c]; >> 123 >> 124 #define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ >> 125 ldub [%src + offset + 0x00], %t0; \ >> 126 ldub [%src + offset + 0x01], %t1; \ >> 127 stb %t0, [%dst + offset + 0x00]; \ >> 128 stb %t1, [%dst + offset + 0x01]; >> 129 >> 130 #define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ >> 131 ldd [%src + offset + 0x00], %t0; \ >> 132 ldd [%src + offset + 0x08], %t2; \ >> 133 srl %t0, shir, %t5; \ >> 134 srl %t1, shir, %t6; \ >> 135 sll %t0, shil, %t0; \ >> 136 or %t5, %prev, %t5; \ >> 137 sll %t1, shil, %prev; \ >> 138 or %t6, %t0, %t0; \ >> 139 srl %t2, shir, %t1; \ >> 140 srl %t3, shir, %t6; \ >> 141 sll %t2, shil, %t2; \ >> 142 or %t1, %prev, %t1; \ >> 143 std %t4, [%dst + offset + offset2 - 0x04]; \ >> 144 std %t0, [%dst + offset + offset2 + 0x04]; \ >> 145 sll %t3, shil, %prev; \ >> 146 or %t6, %t2, %t4; >> 147 >> 148 #define SMOVE_ALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ >> 149 ldd [%src + offset + 0x00], %t0; \ >> 150 ldd [%src + offset + 0x08], %t2; \ >> 151 srl %t0, shir, %t4; \ >> 152 srl %t1, shir, %t5; \ >> 153 sll %t0, shil, %t6; \ >> 154 or %t4, %prev, %t0; \ >> 155 sll %t1, shil, %prev; \ >> 156 or %t5, %t6, %t1; \ >> 157 srl %t2, shir, %t4; \ >> 158 srl %t3, shir, %t5; \ >> 159 sll %t2, shil, %t6; \ >> 160 or %t4, %prev, %t2; \ >> 161 sll %t3, shil, %prev; \ >> 162 or %t5, %t6, %t3; \ >> 163 std %t0, [%dst + offset + offset2 + 0x00]; \ >> 164 std %t2, [%dst + offset + offset2 + 0x08]; >> 165 >> 166 .text >> 167 .align 4 >> 168 >> 169 #ifdef FASTER_REVERSE >> 170 >> 171 70: /* rdword_align */ >> 172 >> 173 andcc %o1, 1, %g0 >> 174 be 4f >> 175 andcc %o1, 2, %g0 >> 176 >> 177 ldub [%o1 - 1], %g2 >> 178 sub %o1, 1, %o1 >> 179 stb %g2, [%o0 - 1] >> 180 sub %o2, 1, %o2 >> 181 be 3f >> 182 sub %o0, 1, %o0 >> 183 4: >> 184 lduh [%o1 - 2], %g2 >> 185 sub %o1, 2, %o1 >> 186 sth %g2, [%o0 - 2] >> 187 sub %o2, 2, %o2 >> 188 b 3f >> 189 sub %o0, 2, %o0 >> 190 >> 191 #endif /* FASTER_REVERSE */ >> 192 >> 193 0: >> 194 retl >> 195 nop ! Only bcopy returns here and it retuns void... >> 196 >> 197 FUNC(bcopy) >> 198 mov %o0, %o3 >> 199 mov %o1, %o0 >> 200 mov %o3, %o1 >> 201 tst %o2 >> 202 bcs 0b >> 203 /* Do the cmp in the delay slot */ >> 204 #ifdef __KERNEL__ >> 205 FUNC(amemmove) >> 206 FUNC(__memmove) >> 207 #endif >> 208 FUNC(memmove) >> 209 cmp %o0, %o1 >> 210 SETUP_RETL >> 211 bleu 9f >> 212 sub %o0, %o1, %o4 >> 213 >> 214 add %o1, %o2, %o3 >> 215 cmp %o3, %o0 >> 216 bleu 0f >> 217 andcc %o4, 3, %o5 >> 218 >> 219 #ifndef FASTER_REVERSE >> 220 >> 221 add %o1, %o2, %o1 >> 222 add %o0, %o2, %o0 >> 223 sub %o1, 1, %o1 >> 224 sub %o0, 1, %o0 >> 225 >> 226 1: /* reverse_bytes */ >> 227 >> 228 ldub [%o1], %o4 >> 229 subcc %o2, 1, %o2 >> 230 stb %o4, [%o0] >> 231 sub %o1, 1, %o1 >> 232 bne 1b >> 233 sub %o0, 1, %o0 >> 234 >> 235 retl >> 236 RETL_INSN >> 237 >> 238 #else /* FASTER_REVERSE */ >> 239 >> 240 add %o1, %o2, %o1 >> 241 add %o0, %o2, %o0 >> 242 bne 77f >> 243 cmp %o2, 15 >> 244 bleu 91f >> 245 andcc %o1, 3, %g0 >> 246 bne 70b >> 247 3: >> 248 andcc %o1, 4, %g0 >> 249 >> 250 be 2f >> 251 mov %o2, %g1 >> 252 >> 253 ld [%o1 - 4], %o4 >> 254 sub %g1, 4, %g1 >> 255 st %o4, [%o0 - 4] >> 256 sub %o1, 4, %o1 >> 257 sub %o0, 4, %o0 >> 258 2: >> 259 andcc %g1, 0xffffff80, %g7 >> 260 be 3f >> 261 andcc %o0, 4, %g0 >> 262 >> 263 be 74f + 4 >> 264 5: >> 265 RMOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 266 RMOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 267 RMOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 268 RMOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 269 subcc %g7, 128, %g7 >> 270 sub %o1, 128, %o1 >> 271 bne 5b >> 272 sub %o0, 128, %o0 >> 273 3: >> 274 andcc %g1, 0x70, %g7 >> 275 be 72f >> 276 andcc %g1, 8, %g0 >> 277 >> 278 sethi %hi(72f), %o5 >> 279 srl %g7, 1, %o4 >> 280 add %g7, %o4, %o4 >> 281 sub %o1, %g7, %o1 >> 282 sub %o5, %o4, %o5 >> 283 jmpl %o5 + %lo(72f), %g0 >> 284 sub %o0, %g7, %o0 >> 285 >> 286 71: /* rmemcpy_table */ >> 287 RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 288 RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 289 RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 290 RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 291 RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 292 RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 293 RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 294 >> 295 72: /* rmemcpy_table_end */ >> 296 >> 297 be 73f >> 298 andcc %g1, 4, %g0 >> 299 >> 300 ldd [%o1 - 0x08], %g2 >> 301 sub %o0, 8, %o0 >> 302 sub %o1, 8, %o1 >> 303 st %g2, [%o0] >> 304 st %g3, [%o0 + 0x04] >> 305 >> 306 73: /* rmemcpy_last7 */ >> 307 >> 308 be 1f >> 309 andcc %g1, 2, %g0 >> 310 >> 311 ld [%o1 - 4], %g2 >> 312 sub %o1, 4, %o1 >> 313 st %g2, [%o0 - 4] >> 314 sub %o0, 4, %o0 >> 315 1: >> 316 be 1f >> 317 andcc %g1, 1, %g0 >> 318 >> 319 lduh [%o1 - 2], %g2 >> 320 sub %o1, 2, %o1 >> 321 sth %g2, [%o0 - 2] >> 322 sub %o0, 2, %o0 >> 323 1: >> 324 be 1f >> 325 nop >> 326 >> 327 ldub [%o1 - 1], %g2 >> 328 stb %g2, [%o0 - 1] >> 329 1: >> 330 retl >> 331 RETL_INSN >> 332 >> 333 74: /* rldd_std */ >> 334 RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 335 RMOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 336 RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 337 RMOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 338 subcc %g7, 128, %g7 >> 339 sub %o1, 128, %o1 >> 340 bne 74b >> 341 sub %o0, 128, %o0 >> 342 >> 343 andcc %g1, 0x70, %g7 >> 344 be 72b >> 345 andcc %g1, 8, %g0 >> 346 >> 347 sethi %hi(72b), %o5 >> 348 srl %g7, 1, %o4 >> 349 add %g7, %o4, %o4 >> 350 sub %o1, %g7, %o1 >> 351 sub %o5, %o4, %o5 >> 352 jmpl %o5 + %lo(72b), %g0 >> 353 sub %o0, %g7, %o0 >> 354 >> 355 75: /* rshort_end */ >> 356 >> 357 and %o2, 0xe, %o3 >> 358 2: >> 359 sethi %hi(76f), %o5 >> 360 sll %o3, 3, %o4 >> 361 sub %o0, %o3, %o0 >> 362 sub %o5, %o4, %o5 >> 363 sub %o1, %o3, %o1 >> 364 jmpl %o5 + %lo(76f), %g0 >> 365 andcc %o2, 1, %g0 >> 366 >> 367 RMOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) >> 368 RMOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) >> 369 RMOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) >> 370 RMOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) >> 371 RMOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) >> 372 RMOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) >> 373 RMOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) >> 374 >> 375 76: /* rshort_table_end */ >> 376 >> 377 be 1f >> 378 nop >> 379 ldub [%o1 - 1], %g2 >> 380 stb %g2, [%o0 - 1] >> 381 1: >> 382 retl >> 383 RETL_INSN >> 384 >> 385 91: /* rshort_aligned_end */ >> 386 >> 387 bne 75b >> 388 andcc %o2, 8, %g0 >> 389 >> 390 be 1f >> 391 andcc %o2, 4, %g0 >> 392 >> 393 ld [%o1 - 0x08], %g2 >> 394 ld [%o1 - 0x04], %g3 >> 395 sub %o1, 8, %o1 >> 396 st %g2, [%o0 - 0x08] >> 397 st %g3, [%o0 - 0x04] >> 398 sub %o0, 8, %o0 >> 399 1: >> 400 b 73b >> 401 mov %o2, %g1 >> 402 >> 403 77: /* rnon_aligned */ >> 404 cmp %o2, 15 >> 405 bleu 75b >> 406 andcc %o0, 3, %g0 >> 407 be 64f >> 408 andcc %o0, 1, %g0 >> 409 be 63f >> 410 andcc %o0, 2, %g0 >> 411 ldub [%o1 - 1], %g5 >> 412 sub %o1, 1, %o1 >> 413 stb %g5, [%o0 - 1] >> 414 sub %o0, 1, %o0 >> 415 be 64f >> 416 sub %o2, 1, %o2 >> 417 63: >> 418 ldub [%o1 - 1], %g5 >> 419 sub %o1, 2, %o1 >> 420 stb %g5, [%o0 - 1] >> 421 sub %o0, 2, %o0 >> 422 ldub [%o1], %g5 >> 423 sub %o2, 2, %o2 >> 424 stb %g5, [%o0] >> 425 64: >> 426 and %o1, 3, %g2 >> 427 and %o1, -4, %o1 >> 428 and %o2, 0xc, %g3 >> 429 add %o1, 4, %o1 >> 430 cmp %g3, 4 >> 431 sll %g2, 3, %g4 >> 432 mov 32, %g2 >> 433 be 4f >> 434 sub %g2, %g4, %g7 >> 435 >> 436 blu 3f >> 437 cmp %g3, 8 >> 438 >> 439 be 2f >> 440 srl %o2, 2, %g3 >> 441 >> 442 ld [%o1 - 4], %o3 >> 443 add %o0, -8, %o0 >> 444 ld [%o1 - 8], %o4 >> 445 add %o1, -16, %o1 >> 446 b 7f >> 447 add %g3, 1, %g3 >> 448 2: >> 449 ld [%o1 - 4], %o4 >> 450 add %o0, -4, %o0 >> 451 ld [%o1 - 8], %g1 >> 452 add %o1, -12, %o1 >> 453 b 8f >> 454 add %g3, 2, %g3 >> 455 3: >> 456 ld [%o1 - 4], %o5 >> 457 add %o0, -12, %o0 >> 458 ld [%o1 - 8], %o3 >> 459 add %o1, -20, %o1 >> 460 b 6f >> 461 srl %o2, 2, %g3 >> 462 4: >> 463 ld [%o1 - 4], %g1 >> 464 srl %o2, 2, %g3 >> 465 ld [%o1 - 8], %o5 >> 466 add %o1, -24, %o1 >> 467 add %o0, -16, %o0 >> 468 add %g3, -1, %g3 >> 469 >> 470 ld [%o1 + 12], %o3 >> 471 5: >> 472 sll %o5, %g4, %g2 >> 473 srl %g1, %g7, %g5 >> 474 or %g2, %g5, %g2 >> 475 st %g2, [%o0 + 12] >> 476 6: >> 477 ld [%o1 + 8], %o4 >> 478 sll %o3, %g4, %g2 >> 479 srl %o5, %g7, %g5 >> 480 or %g2, %g5, %g2 >> 481 st %g2, [%o0 + 8] >> 482 7: >> 483 ld [%o1 + 4], %g1 >> 484 sll %o4, %g4, %g2 >> 485 srl %o3, %g7, %g5 >> 486 or %g2, %g5, %g2 >> 487 st %g2, [%o0 + 4] >> 488 8: >> 489 ld [%o1], %o5 >> 490 sll %g1, %g4, %g2 >> 491 srl %o4, %g7, %g5 >> 492 addcc %g3, -4, %g3 >> 493 or %g2, %g5, %g2 >> 494 add %o1, -16, %o1 >> 495 st %g2, [%o0] >> 496 add %o0, -16, %o0 >> 497 bne,a 5b >> 498 ld [%o1 + 12], %o3 >> 499 sll %o5, %g4, %g2 >> 500 srl %g1, %g7, %g5 >> 501 srl %g4, 3, %g3 >> 502 or %g2, %g5, %g2 >> 503 add %o1, %g3, %o1 >> 504 andcc %o2, 2, %g0 >> 505 st %g2, [%o0 + 12] >> 506 be 1f >> 507 andcc %o2, 1, %g0 >> 508 >> 509 ldub [%o1 + 15], %g5 >> 510 add %o1, -2, %o1 >> 511 stb %g5, [%o0 + 11] >> 512 add %o0, -2, %o0 >> 513 ldub [%o1 + 16], %g5 >> 514 stb %g5, [%o0 + 12] >> 515 1: >> 516 be 1f >> 517 nop >> 518 ldub [%o1 + 15], %g5 >> 519 stb %g5, [%o0 + 11] >> 520 1: >> 521 retl >> 522 RETL_INSN >> 523 >> 524 #endif /* FASTER_REVERSE */ >> 525 >> 526 /* NOTE: This code is executed just for the cases, >> 527 where %src (=%o1) & 3 is != 0. >> 528 We need to align it to 4. So, for (%src & 3) >> 529 1 we need to do ldub,lduh >> 530 2 lduh >> 531 3 just ldub >> 532 so even if it looks weird, the branches >> 533 are correct here. -jj >> 534 */ >> 535 78: /* dword_align */ 19 536 20 #define dstin x0 !! 537 andcc %o1, 1, %g0 21 #define src x1 !! 538 be 4f 22 #define count x2 !! 539 andcc %o1, 2, %g0 23 #define dst x3 !! 540 24 #define srcend x4 !! 541 ldub [%o1], %g2 25 #define dstend x5 !! 542 add %o1, 1, %o1 26 #define A_l x6 !! 543 stb %g2, [%o0] 27 #define A_lw w6 !! 544 sub %o2, 1, %o2 28 #define A_h x7 !! 545 bne 3f 29 #define B_l x8 !! 546 add %o0, 1, %o0 30 #define B_lw w8 !! 547 4: 31 #define B_h x9 !! 548 lduh [%o1], %g2 32 #define C_l x10 !! 549 add %o1, 2, %o1 33 #define C_lw w10 !! 550 sth %g2, [%o0] 34 #define C_h x11 !! 551 sub %o2, 2, %o2 35 #define D_l x12 !! 552 b 3f 36 #define D_h x13 !! 553 add %o0, 2, %o0 37 #define E_l x14 !! 554 38 #define E_h x15 !! 555 #ifdef __KERNEL__ 39 #define F_l x16 !! 556 FUNC(__memcpy) 40 #define F_h x17 !! 557 #endif 41 #define G_l count !! 558 FUNC(memcpy) /* %o0=dst %o1=src %o2=len */ 42 #define G_h dst !! 559 43 #define H_l src !! 560 sub %o0, %o1, %o4 44 #define H_h srcend !! 561 SETUP_RETL 45 #define tmp1 x14 !! 562 9: 46 !! 563 andcc %o4, 3, %o5 47 /* This implementation handles overlaps and su !! 564 0: 48 from a single entry point. It uses unalign !! 565 bne 86f 49 sequences to keep the code small, simple an !! 566 cmp %o2, 15 50 !! 567 51 Copies are split into 3 main cases: small c !! 568 bleu 90f 52 copies of up to 128 bytes, and large copies !! 569 andcc %o1, 3, %g0 53 check is negligible since it is only requir !! 570 54 !! 571 bne 78b 55 Large copies use a software pipelined loop !! 572 3: 56 The destination pointer is 16-byte aligned !! 573 andcc %o1, 4, %g0 57 The loop tail is handled by always copying !! 574 58 */ !! 575 be 2f 59 !! 576 mov %o2, %g1 60 SYM_FUNC_START(__pi_memcpy) !! 577 61 add srcend, src, count !! 578 ld [%o1], %o4 62 add dstend, dstin, count !! 579 sub %g1, 4, %g1 63 cmp count, 128 !! 580 st %o4, [%o0] 64 b.hi L(copy_long) !! 581 add %o1, 4, %o1 65 cmp count, 32 !! 582 add %o0, 4, %o0 66 b.hi L(copy32_128) !! 583 2: 67 !! 584 andcc %g1, 0xffffff80, %g7 68 /* Small copies: 0..32 bytes. */ !! 585 be 3f 69 cmp count, 16 !! 586 andcc %o0, 4, %g0 70 b.lo L(copy16) !! 587 71 ldp A_l, A_h, [src] !! 588 be 82f + 4 72 ldp D_l, D_h, [srcend, -16] !! 589 5: 73 stp A_l, A_h, [dstin] !! 590 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 74 stp D_l, D_h, [dstend, -16] !! 591 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 75 ret !! 592 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 76 !! 593 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 77 /* Copy 8-15 bytes. */ !! 594 subcc %g7, 128, %g7 78 L(copy16): !! 595 add %o1, 128, %o1 79 tbz count, 3, L(copy8) !! 596 bne 5b 80 ldr A_l, [src] !! 597 add %o0, 128, %o0 81 ldr A_h, [srcend, -8] !! 598 3: 82 str A_l, [dstin] !! 599 andcc %g1, 0x70, %g7 83 str A_h, [dstend, -8] !! 600 be 80f 84 ret !! 601 andcc %g1, 8, %g0 85 !! 602 86 .p2align 3 !! 603 sethi %hi(80f), %o5 87 /* Copy 4-7 bytes. */ !! 604 srl %g7, 1, %o4 88 L(copy8): !! 605 add %g7, %o4, %o4 89 tbz count, 2, L(copy4) !! 606 add %o1, %g7, %o1 90 ldr A_lw, [src] !! 607 sub %o5, %o4, %o5 91 ldr B_lw, [srcend, -4] !! 608 jmpl %o5 + %lo(80f), %g0 92 str A_lw, [dstin] !! 609 add %o0, %g7, %o0 93 str B_lw, [dstend, -4] !! 610 94 ret !! 611 79: /* memcpy_table */ 95 !! 612 96 /* Copy 0..3 bytes using a branchless !! 613 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) 97 L(copy4): !! 614 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) 98 cbz count, L(copy0) !! 615 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) 99 lsr tmp1, count, 1 !! 616 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) 100 ldrb A_lw, [src] !! 617 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) 101 ldrb C_lw, [srcend, -1] !! 618 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) 102 ldrb B_lw, [src, tmp1] !! 619 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 103 strb A_lw, [dstin] !! 620 104 strb B_lw, [dstin, tmp1] !! 621 80: /* memcpy_table_end */ 105 strb C_lw, [dstend, -1] !! 622 be 81f 106 L(copy0): !! 623 andcc %g1, 4, %g0 107 ret !! 624 108 !! 625 ldd [%o1], %g2 109 .p2align 4 !! 626 add %o0, 8, %o0 110 /* Medium copies: 33..128 bytes. */ !! 627 st %g2, [%o0 - 0x08] 111 L(copy32_128): !! 628 add %o1, 8, %o1 112 ldp A_l, A_h, [src] !! 629 st %g3, [%o0 - 0x04] 113 ldp B_l, B_h, [src, 16] !! 630 114 ldp C_l, C_h, [srcend, -32] !! 631 81: /* memcpy_last7 */ 115 ldp D_l, D_h, [srcend, -16] !! 632 116 cmp count, 64 !! 633 be 1f 117 b.hi L(copy128) !! 634 andcc %g1, 2, %g0 118 stp A_l, A_h, [dstin] !! 635 119 stp B_l, B_h, [dstin, 16] !! 636 ld [%o1], %g2 120 stp C_l, C_h, [dstend, -32] !! 637 add %o1, 4, %o1 121 stp D_l, D_h, [dstend, -16] !! 638 st %g2, [%o0] 122 ret !! 639 add %o0, 4, %o0 123 !! 640 1: 124 .p2align 4 !! 641 be 1f 125 /* Copy 65..128 bytes. */ !! 642 andcc %g1, 1, %g0 126 L(copy128): !! 643 127 ldp E_l, E_h, [src, 32] !! 644 lduh [%o1], %g2 128 ldp F_l, F_h, [src, 48] !! 645 add %o1, 2, %o1 129 cmp count, 96 !! 646 sth %g2, [%o0] 130 b.ls L(copy96) !! 647 add %o0, 2, %o0 131 ldp G_l, G_h, [srcend, -64] !! 648 1: 132 ldp H_l, H_h, [srcend, -48] !! 649 be 1f 133 stp G_l, G_h, [dstend, -64] !! 650 nop 134 stp H_l, H_h, [dstend, -48] !! 651 135 L(copy96): !! 652 ldub [%o1], %g2 136 stp A_l, A_h, [dstin] !! 653 stb %g2, [%o0] 137 stp B_l, B_h, [dstin, 16] !! 654 1: 138 stp E_l, E_h, [dstin, 32] !! 655 retl 139 stp F_l, F_h, [dstin, 48] !! 656 RETL_INSN 140 stp C_l, C_h, [dstend, -32] !! 657 141 stp D_l, D_h, [dstend, -16] !! 658 82: /* ldd_std */ 142 ret !! 659 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 143 !! 660 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 144 .p2align 4 !! 661 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 145 /* Copy more than 128 bytes. */ !! 662 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 146 L(copy_long): !! 663 subcc %g7, 128, %g7 147 /* Use backwards copy if there is an o !! 664 add %o1, 128, %o1 148 sub tmp1, dstin, src !! 665 bne 82b 149 cbz tmp1, L(copy0) !! 666 add %o0, 128, %o0 150 cmp tmp1, count !! 667 151 b.lo L(copy_long_backwards) !! 668 #ifndef FASTER_ALIGNED 152 !! 669 153 /* Copy 16 bytes and then align dst to !! 670 andcc %g1, 0x70, %g7 154 !! 671 be 80b 155 ldp D_l, D_h, [src] !! 672 andcc %g1, 8, %g0 156 and tmp1, dstin, 15 !! 673 157 bic dst, dstin, 15 !! 674 sethi %hi(80b), %o5 158 sub src, src, tmp1 !! 675 srl %g7, 1, %o4 159 add count, count, tmp1 /* Cou !! 676 add %g7, %o4, %o4 160 ldp A_l, A_h, [src, 16] !! 677 add %o1, %g7, %o1 161 stp D_l, D_h, [dstin] !! 678 sub %o5, %o4, %o5 162 ldp B_l, B_h, [src, 32] !! 679 jmpl %o5 + %lo(80b), %g0 163 ldp C_l, C_h, [src, 48] !! 680 add %o0, %g7, %o0 164 ldp D_l, D_h, [src, 64]! !! 681 165 subs count, count, 128 + 16 /* Tes !! 682 #else /* FASTER_ALIGNED */ 166 b.ls L(copy64_from_end) !! 683 167 !! 684 andcc %g1, 0x70, %g7 168 L(loop64): !! 685 be 84f 169 stp A_l, A_h, [dst, 16] !! 686 andcc %g1, 8, %g0 170 ldp A_l, A_h, [src, 16] !! 687 171 stp B_l, B_h, [dst, 32] !! 688 sethi %hi(84f), %o5 172 ldp B_l, B_h, [src, 32] !! 689 add %o1, %g7, %o1 173 stp C_l, C_h, [dst, 48] !! 690 sub %o5, %g7, %o5 174 ldp C_l, C_h, [src, 48] !! 691 jmpl %o5 + %lo(84f), %g0 175 stp D_l, D_h, [dst, 64]! !! 692 add %o0, %g7, %o0 176 ldp D_l, D_h, [src, 64]! !! 693 177 subs count, count, 64 !! 694 83: /* amemcpy_table */ 178 b.hi L(loop64) !! 695 179 !! 696 MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5) 180 /* Write the last iteration and copy 6 !! 697 MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5) 181 L(copy64_from_end): !! 698 MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5) 182 ldp E_l, E_h, [srcend, -64] !! 699 MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5) 183 stp A_l, A_h, [dst, 16] !! 700 MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5) 184 ldp A_l, A_h, [srcend, -48] !! 701 MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5) 185 stp B_l, B_h, [dst, 32] !! 702 MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 186 ldp B_l, B_h, [srcend, -32] !! 703 187 stp C_l, C_h, [dst, 48] !! 704 84: /* amemcpy_table_end */ 188 ldp C_l, C_h, [srcend, -16] !! 705 be 85f 189 stp D_l, D_h, [dst, 64] !! 706 andcc %g1, 4, %g0 190 stp E_l, E_h, [dstend, -64] !! 707 191 stp A_l, A_h, [dstend, -48] !! 708 ldd [%o1], %g2 192 stp B_l, B_h, [dstend, -32] !! 709 add %o0, 8, %o0 193 stp C_l, C_h, [dstend, -16] !! 710 std %g2, [%o0 - 0x08] 194 ret !! 711 add %o1, 8, %o1 195 !! 712 85: /* amemcpy_last7 */ 196 .p2align 4 !! 713 be 1f 197 !! 714 andcc %g1, 2, %g0 198 /* Large backwards copy for overlappin !! 715 199 Copy 16 bytes and then align dst to !! 716 ld [%o1], %g2 200 L(copy_long_backwards): !! 717 add %o1, 4, %o1 201 ldp D_l, D_h, [srcend, -16] !! 718 st %g2, [%o0] 202 and tmp1, dstend, 15 !! 719 add %o0, 4, %o0 203 sub srcend, srcend, tmp1 !! 720 1: 204 sub count, count, tmp1 !! 721 be 1f 205 ldp A_l, A_h, [srcend, -16] !! 722 andcc %g1, 1, %g0 206 stp D_l, D_h, [dstend, -16] !! 723 207 ldp B_l, B_h, [srcend, -32] !! 724 lduh [%o1], %g2 208 ldp C_l, C_h, [srcend, -48] !! 725 add %o1, 2, %o1 209 ldp D_l, D_h, [srcend, -64]! !! 726 sth %g2, [%o0] 210 sub dstend, dstend, tmp1 !! 727 add %o0, 2, %o0 211 subs count, count, 128 !! 728 1: 212 b.ls L(copy64_from_start) !! 729 be 1f 213 !! 730 nop 214 L(loop64_backwards): !! 731 215 stp A_l, A_h, [dstend, -16] !! 732 ldub [%o1], %g2 216 ldp A_l, A_h, [srcend, -16] !! 733 stb %g2, [%o0] 217 stp B_l, B_h, [dstend, -32] !! 734 1: 218 ldp B_l, B_h, [srcend, -32] !! 735 retl 219 stp C_l, C_h, [dstend, -48] !! 736 RETL_INSN 220 ldp C_l, C_h, [srcend, -48] !! 737 221 stp D_l, D_h, [dstend, -64]! !! 738 #endif /* FASTER_ALIGNED */ 222 ldp D_l, D_h, [srcend, -64]! !! 739 223 subs count, count, 64 !! 740 86: /* non_aligned */ 224 b.hi L(loop64_backwards) !! 741 cmp %o2, 6 225 !! 742 bleu 88f 226 /* Write the last iteration and copy 6 !! 743 227 L(copy64_from_start): !! 744 #ifdef FASTER_NONALIGNED 228 ldp G_l, G_h, [src, 48] !! 745 229 stp A_l, A_h, [dstend, -16] !! 746 cmp %o2, 256 230 ldp A_l, A_h, [src, 32] !! 747 bcc 87f 231 stp B_l, B_h, [dstend, -32] !! 748 232 ldp B_l, B_h, [src, 16] !! 749 #endif /* FASTER_NONALIGNED */ 233 stp C_l, C_h, [dstend, -48] !! 750 234 ldp C_l, C_h, [src] !! 751 andcc %o0, 3, %g0 235 stp D_l, D_h, [dstend, -64] !! 752 be 61f 236 stp G_l, G_h, [dstin, 48] !! 753 andcc %o0, 1, %g0 237 stp A_l, A_h, [dstin, 32] !! 754 be 60f 238 stp B_l, B_h, [dstin, 16] !! 755 andcc %o0, 2, %g0 239 stp C_l, C_h, [dstin] !! 756 240 ret !! 757 ldub [%o1], %g5 241 SYM_FUNC_END(__pi_memcpy) !! 758 add %o1, 1, %o1 242 !! 759 stb %g5, [%o0] 243 SYM_FUNC_ALIAS(__memcpy, __pi_memcpy) !! 760 sub %o2, 1, %o2 244 EXPORT_SYMBOL(__memcpy) !! 761 bne 61f 245 SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) !! 762 add %o0, 1, %o0 246 EXPORT_SYMBOL(memcpy) !! 763 60: 247 !! 764 ldub [%o1], %g3 248 SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy) !! 765 add %o1, 2, %o1 249 !! 766 stb %g3, [%o0] 250 SYM_FUNC_ALIAS(__memmove, __pi_memmove) !! 767 sub %o2, 2, %o2 251 EXPORT_SYMBOL(__memmove) !! 768 ldub [%o1 - 1], %g3 252 SYM_FUNC_ALIAS_WEAK(memmove, __memmove) !! 769 add %o0, 2, %o0 253 EXPORT_SYMBOL(memmove) !! 770 stb %g3, [%o0 - 1] >> 771 61: >> 772 and %o1, 3, %g2 >> 773 and %o2, 0xc, %g3 >> 774 and %o1, -4, %o1 >> 775 cmp %g3, 4 >> 776 sll %g2, 3, %g4 >> 777 mov 32, %g2 >> 778 be 4f >> 779 sub %g2, %g4, %g7 >> 780 >> 781 blu 3f >> 782 cmp %g3, 0x8 >> 783 >> 784 be 2f >> 785 srl %o2, 2, %g3 >> 786 >> 787 ld [%o1], %o3 >> 788 add %o0, -8, %o0 >> 789 ld [%o1 + 4], %o4 >> 790 b 8f >> 791 add %g3, 1, %g3 >> 792 2: >> 793 ld [%o1], %o4 >> 794 add %o0, -12, %o0 >> 795 ld [%o1 + 4], %o5 >> 796 add %g3, 2, %g3 >> 797 b 9f >> 798 add %o1, -4, %o1 >> 799 3: >> 800 ld [%o1], %g1 >> 801 add %o0, -4, %o0 >> 802 ld [%o1 + 4], %o3 >> 803 srl %o2, 2, %g3 >> 804 b 7f >> 805 add %o1, 4, %o1 >> 806 4: >> 807 ld [%o1], %o5 >> 808 cmp %o2, 7 >> 809 ld [%o1 + 4], %g1 >> 810 srl %o2, 2, %g3 >> 811 bleu 10f >> 812 add %o1, 8, %o1 >> 813 >> 814 ld [%o1], %o3 >> 815 add %g3, -1, %g3 >> 816 5: >> 817 sll %o5, %g4, %g2 >> 818 srl %g1, %g7, %g5 >> 819 or %g2, %g5, %g2 >> 820 st %g2, [%o0] >> 821 7: >> 822 ld [%o1 + 4], %o4 >> 823 sll %g1, %g4, %g2 >> 824 srl %o3, %g7, %g5 >> 825 or %g2, %g5, %g2 >> 826 st %g2, [%o0 + 4] >> 827 8: >> 828 ld [%o1 + 8], %o5 >> 829 sll %o3, %g4, %g2 >> 830 srl %o4, %g7, %g5 >> 831 or %g2, %g5, %g2 >> 832 st %g2, [%o0 + 8] >> 833 9: >> 834 ld [%o1 + 12], %g1 >> 835 sll %o4, %g4, %g2 >> 836 srl %o5, %g7, %g5 >> 837 addcc %g3, -4, %g3 >> 838 or %g2, %g5, %g2 >> 839 add %o1, 16, %o1 >> 840 st %g2, [%o0 + 12] >> 841 add %o0, 16, %o0 >> 842 bne,a 5b >> 843 ld [%o1], %o3 >> 844 10: >> 845 sll %o5, %g4, %g2 >> 846 srl %g1, %g7, %g5 >> 847 srl %g7, 3, %g3 >> 848 or %g2, %g5, %g2 >> 849 sub %o1, %g3, %o1 >> 850 andcc %o2, 2, %g0 >> 851 st %g2, [%o0] >> 852 be 1f >> 853 andcc %o2, 1, %g0 >> 854 >> 855 ldub [%o1], %g2 >> 856 add %o1, 2, %o1 >> 857 stb %g2, [%o0 + 4] >> 858 add %o0, 2, %o0 >> 859 ldub [%o1 - 1], %g2 >> 860 stb %g2, [%o0 + 3] >> 861 1: >> 862 be 1f >> 863 nop >> 864 ldub [%o1], %g2 >> 865 stb %g2, [%o0 + 4] >> 866 1: >> 867 retl >> 868 RETL_INSN >> 869 >> 870 #ifdef FASTER_NONALIGNED >> 871 >> 872 87: /* faster_nonaligned */ >> 873 >> 874 andcc %o1, 3, %g0 >> 875 be 3f >> 876 andcc %o1, 1, %g0 >> 877 >> 878 be 4f >> 879 andcc %o1, 2, %g0 >> 880 >> 881 ldub [%o1], %g2 >> 882 add %o1, 1, %o1 >> 883 stb %g2, [%o0] >> 884 sub %o2, 1, %o2 >> 885 bne 3f >> 886 add %o0, 1, %o0 >> 887 4: >> 888 lduh [%o1], %g2 >> 889 add %o1, 2, %o1 >> 890 srl %g2, 8, %g3 >> 891 sub %o2, 2, %o2 >> 892 stb %g3, [%o0] >> 893 add %o0, 2, %o0 >> 894 stb %g2, [%o0 - 1] >> 895 3: >> 896 andcc %o1, 4, %g0 >> 897 >> 898 bne 2f >> 899 cmp %o5, 1 >> 900 >> 901 ld [%o1], %o4 >> 902 srl %o4, 24, %g2 >> 903 stb %g2, [%o0] >> 904 srl %o4, 16, %g3 >> 905 stb %g3, [%o0 + 1] >> 906 srl %o4, 8, %g2 >> 907 stb %g2, [%o0 + 2] >> 908 sub %o2, 4, %o2 >> 909 stb %o4, [%o0 + 3] >> 910 add %o1, 4, %o1 >> 911 add %o0, 4, %o0 >> 912 2: >> 913 be 33f >> 914 cmp %o5, 2 >> 915 be 32f >> 916 sub %o2, 4, %o2 >> 917 31: >> 918 ld [%o1], %g2 >> 919 add %o1, 4, %o1 >> 920 srl %g2, 24, %g3 >> 921 and %o0, 7, %g5 >> 922 stb %g3, [%o0] >> 923 cmp %g5, 7 >> 924 sll %g2, 8, %g1 >> 925 add %o0, 4, %o0 >> 926 be 41f >> 927 and %o2, 0xffffffc0, %o3 >> 928 ld [%o0 - 7], %o4 >> 929 4: >> 930 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 931 SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 932 SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 933 SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 934 subcc %o3, 64, %o3 >> 935 add %o1, 64, %o1 >> 936 bne 4b >> 937 add %o0, 64, %o0 >> 938 >> 939 andcc %o2, 0x30, %o3 >> 940 be,a 1f >> 941 srl %g1, 16, %g2 >> 942 4: >> 943 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 944 subcc %o3, 16, %o3 >> 945 add %o1, 16, %o1 >> 946 bne 4b >> 947 add %o0, 16, %o0 >> 948 >> 949 srl %g1, 16, %g2 >> 950 1: >> 951 st %o4, [%o0 - 7] >> 952 sth %g2, [%o0 - 3] >> 953 srl %g1, 8, %g4 >> 954 b 88f >> 955 stb %g4, [%o0 - 1] >> 956 32: >> 957 ld [%o1], %g2 >> 958 add %o1, 4, %o1 >> 959 srl %g2, 16, %g3 >> 960 and %o0, 7, %g5 >> 961 sth %g3, [%o0] >> 962 cmp %g5, 6 >> 963 sll %g2, 16, %g1 >> 964 add %o0, 4, %o0 >> 965 be 42f >> 966 and %o2, 0xffffffc0, %o3 >> 967 ld [%o0 - 6], %o4 >> 968 4: >> 969 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 970 SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 971 SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 972 SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 973 subcc %o3, 64, %o3 >> 974 add %o1, 64, %o1 >> 975 bne 4b >> 976 add %o0, 64, %o0 >> 977 >> 978 andcc %o2, 0x30, %o3 >> 979 be,a 1f >> 980 srl %g1, 16, %g2 >> 981 4: >> 982 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 983 subcc %o3, 16, %o3 >> 984 add %o1, 16, %o1 >> 985 bne 4b >> 986 add %o0, 16, %o0 >> 987 >> 988 srl %g1, 16, %g2 >> 989 1: >> 990 st %o4, [%o0 - 6] >> 991 b 88f >> 992 sth %g2, [%o0 - 2] >> 993 33: >> 994 ld [%o1], %g2 >> 995 sub %o2, 4, %o2 >> 996 srl %g2, 24, %g3 >> 997 and %o0, 7, %g5 >> 998 stb %g3, [%o0] >> 999 cmp %g5, 5 >> 1000 srl %g2, 8, %g4 >> 1001 sll %g2, 24, %g1 >> 1002 sth %g4, [%o0 + 1] >> 1003 add %o1, 4, %o1 >> 1004 be 43f >> 1005 and %o2, 0xffffffc0, %o3 >> 1006 >> 1007 ld [%o0 - 1], %o4 >> 1008 add %o0, 4, %o0 >> 1009 4: >> 1010 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) >> 1011 SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) >> 1012 SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) >> 1013 SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) >> 1014 subcc %o3, 64, %o3 >> 1015 add %o1, 64, %o1 >> 1016 bne 4b >> 1017 add %o0, 64, %o0 >> 1018 >> 1019 andcc %o2, 0x30, %o3 >> 1020 be,a 1f >> 1021 srl %g1, 24, %g2 >> 1022 4: >> 1023 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) >> 1024 subcc %o3, 16, %o3 >> 1025 add %o1, 16, %o1 >> 1026 bne 4b >> 1027 add %o0, 16, %o0 >> 1028 >> 1029 srl %g1, 24, %g2 >> 1030 1: >> 1031 st %o4, [%o0 - 5] >> 1032 b 88f >> 1033 stb %g2, [%o0 - 1] >> 1034 41: >> 1035 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 1036 SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 1037 SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 1038 SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 1039 subcc %o3, 64, %o3 >> 1040 add %o1, 64, %o1 >> 1041 bne 41b >> 1042 add %o0, 64, %o0 >> 1043 >> 1044 andcc %o2, 0x30, %o3 >> 1045 be,a 1f >> 1046 srl %g1, 16, %g2 >> 1047 4: >> 1048 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) >> 1049 subcc %o3, 16, %o3 >> 1050 add %o1, 16, %o1 >> 1051 bne 4b >> 1052 add %o0, 16, %o0 >> 1053 >> 1054 srl %g1, 16, %g2 >> 1055 1: >> 1056 sth %g2, [%o0 - 3] >> 1057 srl %g1, 8, %g4 >> 1058 b 88f >> 1059 stb %g4, [%o0 - 1] >> 1060 43: >> 1061 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) >> 1062 SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) >> 1063 SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) >> 1064 SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) >> 1065 subcc %o3, 64, %o3 >> 1066 add %o1, 64, %o1 >> 1067 bne 43b >> 1068 add %o0, 64, %o0 >> 1069 >> 1070 andcc %o2, 0x30, %o3 >> 1071 be,a 1f >> 1072 srl %g1, 24, %g2 >> 1073 4: >> 1074 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) >> 1075 subcc %o3, 16, %o3 >> 1076 add %o1, 16, %o1 >> 1077 bne 4b >> 1078 add %o0, 16, %o0 >> 1079 >> 1080 srl %g1, 24, %g2 >> 1081 1: >> 1082 stb %g2, [%o0 + 3] >> 1083 b 88f >> 1084 add %o0, 4, %o0 >> 1085 42: >> 1086 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 1087 SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 1088 SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 1089 SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 1090 subcc %o3, 64, %o3 >> 1091 add %o1, 64, %o1 >> 1092 bne 42b >> 1093 add %o0, 64, %o0 >> 1094 >> 1095 andcc %o2, 0x30, %o3 >> 1096 be,a 1f >> 1097 srl %g1, 16, %g2 >> 1098 4: >> 1099 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) >> 1100 subcc %o3, 16, %o3 >> 1101 add %o1, 16, %o1 >> 1102 bne 4b >> 1103 add %o0, 16, %o0 >> 1104 >> 1105 srl %g1, 16, %g2 >> 1106 1: >> 1107 sth %g2, [%o0 - 2] >> 1108 >> 1109 /* Fall through */ >> 1110 >> 1111 #endif /* FASTER_NONALIGNED */ >> 1112 >> 1113 88: /* short_end */ >> 1114 >> 1115 and %o2, 0xe, %o3 >> 1116 20: >> 1117 sethi %hi(89f), %o5 >> 1118 sll %o3, 3, %o4 >> 1119 add %o0, %o3, %o0 >> 1120 sub %o5, %o4, %o5 >> 1121 add %o1, %o3, %o1 >> 1122 jmpl %o5 + %lo(89f), %g0 >> 1123 andcc %o2, 1, %g0 >> 1124 >> 1125 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) >> 1126 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) >> 1127 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) >> 1128 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) >> 1129 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) >> 1130 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) >> 1131 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) >> 1132 >> 1133 89: /* short_table_end */ >> 1134 >> 1135 be 1f >> 1136 nop >> 1137 >> 1138 ldub [%o1], %g2 >> 1139 stb %g2, [%o0] >> 1140 1: >> 1141 retl >> 1142 RETL_INSN >> 1143 >> 1144 90: /* short_aligned_end */ >> 1145 bne 88b >> 1146 andcc %o2, 8, %g0 >> 1147 >> 1148 be 1f >> 1149 andcc %o2, 4, %g0 >> 1150 >> 1151 ld [%o1 + 0x00], %g2 >> 1152 ld [%o1 + 0x04], %g3 >> 1153 add %o1, 8, %o1 >> 1154 st %g2, [%o0 + 0x00] >> 1155 st %g3, [%o0 + 0x04] >> 1156 add %o0, 8, %o0 >> 1157 1: >> 1158 b 81b >> 1159 mov %o2, %g1
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.