1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * "memcpy" implementation of SuperH 4 * 5 * Copyright (C) 1999 Niibe Yutaka 6 * Copyright (c) 2002 STMicroelectronics Ltd 7 * Modified from memcpy.S and micro-optimise 8 * Stuart Menefy (stuart.menefy@st.com) 9 * 10 */ 11 #include <linux/linkage.h> 12 13 /* 14 * void *memcpy(void *dst, const void *src, si 15 * 16 * It is assumed that there is no overlap betw 17 * If there is an overlap, then the results ar 18 */ 19 20 ! 21 ! GHIJ KLMN OPQR --> ...G HIJK 22 ! 23 24 ! Size is 16 or greater, and may have 25 26 .balign 32 27 .Lcase1: 28 ! Read a long word and write a long wo 29 ! At the start of each iteration, r7 c 30 add #-1,r5 ! 79 EX 31 mov r4,r2 ! 5 MT (0 cy 32 33 mov.l @(r0,r5),r7 ! 21 LS (2 cy 34 add #-4,r5 ! 50 EX 35 36 add #7,r2 ! 79 EX 37 ! 38 #ifdef CONFIG_CPU_LITTLE_ENDIAN 39 ! 6 cycles, 4 bytes per iteration 40 3: mov.l @(r0,r5),r1 ! 21 LS (late 41 mov r7, r3 ! 5 MT (late 42 43 cmp/hi r2,r0 ! 57 MT 44 shll16 r3 ! 103 EX 45 46 mov r1,r6 ! 5 MT (late 47 shll8 r3 ! 102 EX 48 49 shlr8 r6 ! 106 EX 50 mov r1, r7 ! 5 MT (late 51 52 or r6,r3 ! 82 EX 53 bt/s 3b ! 109 BR 54 55 mov.l r3,@-r0 ! 30 LS 56 #else 57 3: mov.l @(r0,r5),r1 ! 21 LS (late 58 mov r7,r3 ! 5 MT (late 59 60 cmp/hi r2,r0 ! 57 MT 61 shlr16 r3 ! 107 EX 62 63 shlr8 r3 ! 106 EX 64 mov r1,r6 ! 5 MT (late 65 66 shll8 r6 ! 102 EX 67 mov r1,r7 ! 5 MT (late 68 69 or r6,r3 ! 82 EX 70 bt/s 3b ! 109 BR 71 72 mov.l r3,@-r0 ! 30 LS 73 #endif 74 ! Finally, copy a byte at once, if nec 75 76 add #4,r5 ! 50 EX 77 cmp/eq r4,r0 ! 54 MT 78 79 add #-6,r2 ! 50 EX 80 bt 9f ! 109 BR 81 82 8: cmp/hi r2,r0 ! 57 MT 83 mov.b @(r0,r5),r1 ! 20 LS (late 84 85 bt/s 8b ! 109 BR 86 87 mov.b r1,@-r0 ! 29 LS 88 89 9: rts 90 nop 91 92 93 ! 94 ! GHIJ KLMN OPQR --> .GHI JKLM 95 ! 96 97 ! Size is 16 or greater, and may have 98 99 .balign 32 100 .Lcase3: 101 ! Read a long word and write a long wo 102 ! At the start of each iteration, r7 c 103 add #-3,r5 ! 79 EX 104 mov r4,r2 ! 5 MT (0 cyc 105 106 mov.l @(r0,r5),r7 ! 21 LS (2 cyc 107 add #-4,r5 ! 50 EX 108 109 add #7,r2 ! 79 EX 110 ! 111 #ifdef CONFIG_CPU_LITTLE_ENDIAN 112 ! 6 cycles, 4 bytes per iteration 113 3: mov.l @(r0,r5),r1 ! 21 LS (late 114 mov r7, r3 ! 5 MT (late 115 116 cmp/hi r2,r0 ! 57 MT 117 shll8 r3 ! 102 EX 118 119 mov r1,r6 ! 5 MT (late 120 shlr16 r6 ! 107 EX 121 122 shlr8 r6 ! 106 EX 123 mov r1, r7 ! 5 MT (late 124 125 or r6,r3 ! 82 EX 126 bt/s 3b ! 109 BR 127 128 mov.l r3,@-r0 ! 30 LS 129 #else 130 3: mov r7,r3 ! OPQR 131 shlr8 r3 ! xOPQ 132 mov.l @(r0,r5),r7 ! KLMN 133 mov r7,r6 134 shll16 r6 135 shll8 r6 ! Nxxx 136 or r6,r3 ! NOPQ 137 cmp/hi r2,r0 138 bt/s 3b 139 mov.l r3,@-r0 140 #endif 141 142 ! Finally, copy a byte at once, if nec 143 144 add #6,r5 ! 50 EX 145 cmp/eq r4,r0 ! 54 MT 146 147 add #-6,r2 ! 50 EX 148 bt 9f ! 109 BR 149 150 8: cmp/hi r2,r0 ! 57 MT 151 mov.b @(r0,r5),r1 ! 20 LS (late 152 153 bt/s 8b ! 109 BR 154 155 mov.b r1,@-r0 ! 29 LS 156 157 9: rts 158 nop 159 160 ENTRY(memcpy) 161 162 ! Calculate the invariants which will 163 ! of the code: 164 ! 165 ! r4 --> [ ... ] DST 166 ! [ ... ] 167 ! : 168 ! r0 --> [ ... ] r0+r5 169 ! 170 ! 171 172 ! Short circuit the common case of src 173 ! and test for zero length move 174 175 mov r6, r0 ! 5 MT (0 cy 176 or r4, r0 ! 82 EX 177 178 or r5, r0 ! 82 EX 179 tst r6, r6 ! 86 MT 180 181 bt/s 99f ! 111 BR 182 tst #3, r0 ! 87 MT 183 184 mov r4, r0 ! 5 MT (0 cy 185 add r6, r0 ! 49 EX 186 187 mov #16, r1 ! 6 EX 188 bt/s .Lcase00 ! 111 BR 189 190 sub r4, r5 ! 75 EX 191 192 ! Arguments are not nicely long word a 193 ! Check for small copies, and if so do 194 ! 195 ! Deciding on an exact value of 'small 196 ! using the optimised routines become 197 ! cycle counts for differnet sizes usi 198 ! size byte-at-time long 199 ! 16 42 39-40 200 ! 24 58 43-44 201 ! 36 82 49-50 202 ! However the penalty for getting it ' 203 ! aligned data (and this is more commo 204 205 cmp/gt r6,r1 ! 56 MT 206 207 add #-1,r5 ! 50 EX 208 bf/s 6f ! 108 BR 209 210 mov r5, r3 ! 5 MT (late 211 shlr r6 ! 104 EX 212 213 mov.b @(r0,r5),r1 ! 20 LS (late 214 bf/s 4f ! 111 BR 215 216 add #-1,r3 ! 50 EX 217 tst r6, r6 ! 86 MT 218 219 bt/s 98f ! 110 BR 220 mov.b r1,@-r0 ! 29 LS 221 222 ! 4 cycles, 2 bytes per iteration 223 3: mov.b @(r0,r5),r1 ! 20 LS (late 224 225 4: mov.b @(r0,r3),r2 ! 20 LS (late 226 dt r6 ! 67 EX 227 228 mov.b r1,@-r0 ! 29 LS 229 bf/s 3b ! 111 BR 230 231 mov.b r2,@-r0 ! 29 LS 232 98: 233 rts 234 nop 235 236 99: rts 237 mov r4, r0 238 239 ! Size is not small, so its worthwhile 240 ! First align destination to a long wo 241 ! 242 ! r5 = normal value -1 243 244 6: tst #3, r0 ! 87 MT 245 mov #3, r3 ! 6 EX 246 247 bt/s 2f ! 111 BR 248 and r0,r3 ! 78 EX 249 250 ! 3 cycles, 1 byte per iteration 251 1: dt r3 ! 67 EX 252 mov.b @(r0,r5),r1 ! 19 LS (late 253 254 add #-1, r6 ! 79 EX 255 bf/s 1b ! 109 BR 256 257 mov.b r1,@-r0 ! 28 LS 258 259 2: add #1, r5 ! 79 EX 260 261 ! Now select the appropriate bulk tran 262 ! alignment of src and dst. 263 264 mov r0, r3 ! 5 MT (late 265 266 mov r5, r0 ! 5 MT (late 267 tst #1, r0 ! 87 MT 268 269 bf/s 1f ! 111 BR 270 mov #64, r7 ! 6 EX 271 272 ! bit 0 clear 273 274 cmp/ge r7, r6 ! 55 MT 275 276 bt/s 2f ! 111 BR 277 tst #2, r0 ! 87 MT 278 279 ! small 280 bt/s .Lcase0 281 mov r3, r0 282 283 bra .Lcase2 284 nop 285 286 ! big 287 2: bt/s .Lcase0b 288 mov r3, r0 289 290 bra .Lcase2b 291 nop 292 293 ! bit 0 set 294 1: tst #2, r0 ! 87 MT 295 296 bt/s .Lcase1 297 mov r3, r0 298 299 bra .Lcase3 300 nop 301 302 303 ! 304 ! GHIJ KLMN OPQR --> GHIJ KLMN 305 ! 306 307 ! src, dst and size are all long word 308 ! size is non-zero 309 310 .balign 32 311 .Lcase00: 312 mov #64, r1 ! 6 EX 313 mov r5, r3 ! 5 MT (late 314 315 cmp/gt r6, r1 ! 56 MT 316 add #-4, r5 ! 50 EX 317 318 bf .Lcase00b ! 108 BR 319 shlr2 r6 ! 105 EX 320 321 shlr r6 ! 104 EX 322 mov.l @(r0, r5), r1 ! 21 LS (late 323 324 bf/s 4f ! 111 BR 325 add #-8, r3 ! 50 EX 326 327 tst r6, r6 ! 86 MT 328 bt/s 5f ! 110 BR 329 330 mov.l r1,@-r0 ! 30 LS 331 332 ! 4 cycles, 2 long words per iteration 333 3: mov.l @(r0, r5), r1 ! 21 LS (late 334 335 4: mov.l @(r0, r3), r2 ! 21 LS (late 336 dt r6 ! 67 EX 337 338 mov.l r1, @-r0 ! 30 LS 339 bf/s 3b ! 109 BR 340 341 mov.l r2, @-r0 ! 30 LS 342 343 5: rts 344 nop 345 346 347 ! Size is 16 or greater and less than 348 349 .balign 32 350 .Lcase0: 351 add #-4, r5 ! 50 EX 352 mov r4, r7 ! 5 MT (late 353 354 mov.l @(r0, r5), r1 ! 21 LS (late 355 mov #4, r2 ! 6 EX 356 357 add #11, r7 ! 50 EX 358 tst r2, r6 ! 86 MT 359 360 mov r5, r3 ! 5 MT (late 361 bt/s 4f ! 111 BR 362 363 add #-4, r3 ! 50 EX 364 mov.l r1,@-r0 ! 30 LS 365 366 ! 4 cycles, 2 long words per iteration 367 3: mov.l @(r0, r5), r1 ! 21 LS (late 368 369 4: mov.l @(r0, r3), r2 ! 21 LS (late 370 cmp/hi r7, r0 371 372 mov.l r1, @-r0 ! 30 LS 373 bt/s 3b ! 109 BR 374 375 mov.l r2, @-r0 ! 30 LS 376 377 ! Copy the final 0-3 bytes 378 379 add #3,r5 ! 50 EX 380 381 cmp/eq r0, r4 ! 54 MT 382 add #-10, r7 ! 50 EX 383 384 bt 9f ! 110 BR 385 386 ! 3 cycles, 1 byte per iteration 387 1: mov.b @(r0,r5),r1 ! 19 LS 388 cmp/hi r7,r0 ! 57 MT 389 390 bt/s 1b ! 111 BR 391 mov.b r1,@-r0 ! 28 LS 392 393 9: rts 394 nop 395 396 ! Size is at least 64 bytes, so will b 397 ! 398 ! r2 = rounded up r4 399 ! r3 = rounded down r0 400 401 .balign 32 402 .Lcase0b: 403 add #-4, r5 ! 50 EX 404 405 .Lcase00b: 406 mov r0, r3 ! 5 MT (late 407 mov #(~0x1f), r1 ! 6 EX 408 409 and r1, r3 ! 78 EX 410 mov r4, r2 ! 5 MT (late 411 412 cmp/eq r3, r0 ! 54 MT 413 add #0x1f, r2 ! 50 EX 414 415 bt/s 1f ! 110 BR 416 and r1, r2 ! 78 EX 417 418 ! copy initial words until cache line 419 420 mov.l @(r0, r5), r1 ! 21 LS (late 421 tst #4, r0 ! 87 MT 422 423 mov r5, r6 ! 5 MT (late 424 add #-4, r6 ! 50 EX 425 426 bt/s 4f ! 111 BR 427 add #8, r3 ! 50 EX 428 429 tst #0x18, r0 ! 87 MT 430 431 bt/s 1f ! 109 BR 432 mov.l r1,@-r0 ! 30 LS 433 434 ! 4 cycles, 2 long words per iteration 435 3: mov.l @(r0, r5), r1 ! 21 LS (late 436 437 4: mov.l @(r0, r6), r7 ! 21 LS (late 438 cmp/eq r3, r0 ! 54 MT 439 440 mov.l r1, @-r0 ! 30 LS 441 bf/s 3b ! 109 BR 442 443 mov.l r7, @-r0 ! 30 LS 444 445 ! Copy the cache line aligned blocks 446 ! 447 ! In use: r0, r2, r4, r5 448 ! Scratch: r1, r3, r6, r7 449 ! 450 ! We could do this with the four scrat 451 ! and dest hit the same cache line, th 452 ! use of additional registers. 453 ! 454 ! We also need r0 as a temporary (for 455 ! r5: src (was r0+r5) 456 ! r1: dest (was r0) 457 ! this can be reversed at the end, so 458 ! state. 459 ! 460 1: mov.l r8, @-r15 ! 30 LS 461 add r0, r5 ! 49 EX 462 463 mov.l r9, @-r15 ! 30 LS 464 mov r0, r1 ! 5 MT (late 465 466 mov.l r10, @-r15 ! 30 LS 467 add #-0x1c, r5 ! 50 EX 468 469 mov.l r11, @-r15 ! 30 LS 470 471 ! 16 cycles, 32 bytes per iteration 472 2: mov.l @(0x00,r5),r0 ! 18 LS (laten 473 add #-0x20, r1 ! 50 EX 474 mov.l @(0x04,r5),r3 ! 18 LS (laten 475 mov.l @(0x08,r5),r6 ! 18 LS (laten 476 mov.l @(0x0c,r5),r7 ! 18 LS (laten 477 mov.l @(0x10,r5),r8 ! 18 LS (laten 478 mov.l @(0x14,r5),r9 ! 18 LS (laten 479 mov.l @(0x18,r5),r10 ! 18 LS (laten 480 mov.l @(0x1c,r5),r11 ! 18 LS (laten 481 movca.l r0,@r1 ! 40 LS (laten 482 mov.l r3,@(0x04,r1) ! 33 LS 483 mov.l r6,@(0x08,r1) ! 33 LS 484 mov.l r7,@(0x0c,r1) ! 33 LS 485 486 mov.l r8,@(0x10,r1) ! 33 LS 487 add #-0x20, r5 ! 50 EX 488 489 mov.l r9,@(0x14,r1) ! 33 LS 490 cmp/eq r2,r1 ! 54 MT 491 492 mov.l r10,@(0x18,r1) ! 33 LS 493 bf/s 2b ! 109 BR 494 495 mov.l r11,@(0x1c,r1) ! 33 LS 496 497 mov r1, r0 ! 5 MT (late 498 499 mov.l @r15+, r11 ! 15 LS 500 sub r1, r5 ! 75 EX 501 502 mov.l @r15+, r10 ! 15 LS 503 cmp/eq r4, r0 ! 54 MT 504 505 bf/s 1f ! 109 BR 506 mov.l @r15+, r9 ! 15 LS 507 508 rts 509 1: mov.l @r15+, r8 ! 15 LS 510 sub r4, r1 ! 75 EX 511 512 ! number of trailing bytes is non-zero 513 ! 514 ! invariants restored (r5 already decr 515 ! also r1=num bytes remaining 516 517 mov #4, r2 ! 6 EX 518 mov r4, r7 ! 5 MT (late 519 520 add #0x1c, r5 ! 50 EX 521 cmp/hs r2, r1 ! 58 MT 522 523 bf/s 5f ! 108 BR 524 add #11, r7 ! 50 EX 525 526 mov.l @(r0, r5), r6 ! 21 LS (late 527 tst r2, r1 ! 86 MT 528 529 mov r5, r3 ! 5 MT (late 530 bt/s 4f ! 111 BR 531 532 add #-4, r3 ! 50 EX 533 cmp/hs r2, r1 ! 58 MT 534 535 bt/s 5f ! 111 BR 536 mov.l r6,@-r0 ! 30 LS 537 538 ! 4 cycles, 2 long words per iteration 539 3: mov.l @(r0, r5), r6 ! 21 LS (late 540 541 4: mov.l @(r0, r3), r2 ! 21 LS (late 542 cmp/hi r7, r0 543 544 mov.l r6, @-r0 ! 30 LS 545 bt/s 3b ! 109 BR 546 547 mov.l r2, @-r0 ! 30 LS 548 549 ! Copy the final 0-3 bytes 550 551 5: cmp/eq r0, r4 ! 54 MT 552 add #-10, r7 ! 50 EX 553 554 bt 9f ! 110 BR 555 add #3,r5 ! 50 EX 556 557 ! 3 cycles, 1 byte per iteration 558 1: mov.b @(r0,r5),r1 ! 19 LS 559 cmp/hi r7,r0 ! 57 MT 560 561 bt/s 1b ! 111 BR 562 mov.b r1,@-r0 ! 28 LS 563 564 9: rts 565 nop 566 567 ! 568 ! GHIJ KLMN OPQR --> ..GH IJKL 569 ! 570 571 .balign 32 572 .Lcase2: 573 ! Size is 16 or greater and less then 574 575 2: mov r5, r6 ! 5 MT (late 576 add #-2,r5 ! 50 EX 577 578 mov r4,r2 ! 5 MT (late 579 add #-4,r6 ! 50 EX 580 581 add #7,r2 ! 50 EX 582 3: mov.w @(r0,r5),r1 ! 20 LS (late 583 584 mov.w @(r0,r6),r3 ! 20 LS (late 585 cmp/hi r2,r0 ! 57 MT 586 587 mov.w r1,@-r0 ! 29 LS 588 bt/s 3b ! 111 BR 589 590 mov.w r3,@-r0 ! 29 LS 591 592 bra 10f 593 nop 594 595 596 .balign 32 597 .Lcase2b: 598 ! Size is at least 64 bytes, so will b 599 ! 600 ! r2 = rounded up r4 601 ! r3 = rounded down r0 602 603 mov r0, r3 ! 5 MT (late 604 mov #(~0x1f), r1 ! 6 EX 605 606 and r1, r3 ! 78 EX 607 mov r4, r2 ! 5 MT (late 608 609 cmp/eq r3, r0 ! 54 MT 610 add #0x1f, r2 ! 50 EX 611 612 add #-2, r5 ! 50 EX 613 bt/s 1f ! 110 BR 614 and r1, r2 ! 78 EX 615 616 ! Copy a short word one at a time unti 617 ! Normal values: r0, r2, r3, r4 618 ! Unused: r1, r6, r7 619 ! Mod: r5 (=r5-2) 620 ! 621 add #2, r3 ! 50 EX 622 623 2: mov.w @(r0,r5),r1 ! 20 LS (late 624 cmp/eq r3,r0 ! 54 MT 625 626 bf/s 2b ! 111 BR 627 628 mov.w r1,@-r0 ! 29 LS 629 630 ! Copy the cache line aligned blocks 631 ! 632 ! In use: r0, r2, r4, r5 (=r5-2) 633 ! Scratch: r1, r3, r6, r7 634 ! 635 ! We could do this with the four scrat 636 ! and dest hit the same cache line, th 637 ! use of additional registers. 638 ! 639 ! We also need r0 as a temporary (for 640 ! r5: src (was r0+r5) 641 ! r1: dest (was r0) 642 ! this can be reversed at the end, so 643 ! state. 644 ! 645 1: mov.l r8, @-r15 ! 30 LS 646 add r0, r5 ! 49 EX 647 648 mov.l r9, @-r15 ! 30 LS 649 mov r0, r1 ! 5 MT (late 650 651 mov.l r10, @-r15 ! 30 LS 652 add #-0x1e, r5 ! 50 EX 653 654 mov.l r11, @-r15 ! 30 LS 655 656 mov.l r12, @-r15 ! 30 LS 657 658 ! 17 cycles, 32 bytes per iteration 659 #ifdef CONFIG_CPU_LITTLE_ENDIAN 660 2: mov.w @r5+, r0 ! 14 LS (late 661 add #-0x20, r1 ! 50 EX 662 663 mov.l @r5+, r3 ! 15 LS (late 664 665 mov.l @r5+, r6 ! 15 LS (late 666 shll16 r0 ! 103 EX 667 668 mov.l @r5+, r7 ! 15 LS (late 669 xtrct r3, r0 ! 48 EX 670 671 mov.l @r5+, r8 ! 15 LS (late 672 xtrct r6, r3 ! 48 EX 673 674 mov.l @r5+, r9 ! 15 LS (late 675 xtrct r7, r6 ! 48 EX 676 677 mov.l @r5+, r10 ! 15 LS (late 678 xtrct r8, r7 ! 48 EX 679 680 mov.l @r5+, r11 ! 15 LS (late 681 xtrct r9, r8 ! 48 EX 682 683 mov.w @r5+, r12 ! 15 LS (late 684 xtrct r10, r9 ! 48 EX 685 686 movca.l r0,@r1 ! 40 LS (late 687 xtrct r11, r10 ! 48 EX 688 689 mov.l r3, @(0x04,r1) ! 33 LS 690 xtrct r12, r11 ! 48 EX 691 692 mov.l r6, @(0x08,r1) ! 33 LS 693 694 mov.l r7, @(0x0c,r1) ! 33 LS 695 696 mov.l r8, @(0x10,r1) ! 33 LS 697 add #-0x40, r5 ! 50 EX 698 699 mov.l r9, @(0x14,r1) ! 33 LS 700 cmp/eq r2,r1 ! 54 MT 701 702 mov.l r10, @(0x18,r1) ! 33 LS 703 bf/s 2b ! 109 BR 704 705 mov.l r11, @(0x1c,r1) ! 33 LS 706 #else 707 2: mov.w @(0x1e,r5), r0 ! 17 LS (late 708 add #-2, r5 ! 50 EX 709 710 mov.l @(0x1c,r5), r3 ! 18 LS (late 711 add #-4, r1 ! 50 EX 712 713 mov.l @(0x18,r5), r6 ! 18 LS (late 714 shll16 r0 ! 103 EX 715 716 mov.l @(0x14,r5), r7 ! 18 LS (late 717 xtrct r3, r0 ! 48 EX 718 719 mov.l @(0x10,r5), r8 ! 18 LS (late 720 xtrct r6, r3 ! 48 EX 721 722 mov.l @(0x0c,r5), r9 ! 18 LS (late 723 xtrct r7, r6 ! 48 EX 724 725 mov.l @(0x08,r5), r10 ! 18 LS (late 726 xtrct r8, r7 ! 48 EX 727 728 mov.l @(0x04,r5), r11 ! 18 LS (late 729 xtrct r9, r8 ! 48 EX 730 731 mov.l @(0x00,r5), r12 ! 18 LS (late 732 xtrct r10, r9 ! 48 EX 733 734 movca.l r0,@r1 ! 40 LS (late 735 add #-0x1c, r1 ! 50 EX 736 737 mov.l r3, @(0x18,r1) ! 33 LS 738 xtrct r11, r10 ! 48 EX 739 740 mov.l r6, @(0x14,r1) ! 33 LS 741 xtrct r12, r11 ! 48 EX 742 743 mov.l r7, @(0x10,r1) ! 33 LS 744 745 mov.l r8, @(0x0c,r1) ! 33 LS 746 add #-0x1e, r5 ! 50 EX 747 748 mov.l r9, @(0x08,r1) ! 33 LS 749 cmp/eq r2,r1 ! 54 MT 750 751 mov.l r10, @(0x04,r1) ! 33 LS 752 bf/s 2b ! 109 BR 753 754 mov.l r11, @(0x00,r1) ! 33 LS 755 #endif 756 757 mov.l @r15+, r12 758 mov r1, r0 ! 5 MT (late 759 760 mov.l @r15+, r11 ! 15 LS 761 sub r1, r5 ! 75 EX 762 763 mov.l @r15+, r10 ! 15 LS 764 cmp/eq r4, r0 ! 54 MT 765 766 bf/s 1f ! 109 BR 767 mov.l @r15+, r9 ! 15 LS 768 769 rts 770 1: mov.l @r15+, r8 ! 15 LS 771 772 add #0x1e, r5 ! 50 EX 773 774 ! Finish off a short word at a time 775 ! r5 must be invariant - 2 776 10: mov r4,r2 ! 5 MT (late 777 add #1,r2 ! 50 EX 778 779 cmp/hi r2, r0 ! 57 MT 780 bf/s 1f ! 109 BR 781 782 add #2, r2 ! 50 EX 783 784 3: mov.w @(r0,r5),r1 ! 20 LS 785 cmp/hi r2,r0 ! 57 MT 786 787 bt/s 3b ! 109 BR 788 789 mov.w r1,@-r0 ! 29 LS 790 1: 791 792 ! 793 ! Finally, copy the last byte if neces 794 cmp/eq r4,r0 ! 54 MT 795 bt/s 9b 796 add #1,r5 797 mov.b @(r0,r5),r1 798 rts 799 mov.b r1,@-r0 800
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.