1 /* SPDX-License-Identifier: GPL-2.0-only */ !! 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* !! 2 /* memcpy.S: Sparc optimized memcpy and memmove code 3 * linux/arch/arm/lib/memcpy.S !! 3 * Hand optimized from GNU libc's memcpy and memmove 4 * !! 4 * Copyright (C) 1991,1996 Free Software Foundation 5 * Author: Nicolas Pitre !! 5 * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi) 6 * Created: Sep 28, 2005 !! 6 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 7 * Copyright: MontaVista Software, Inc. !! 7 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) >> 8 * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 8 */ 9 */ 9 10 10 #include <linux/linkage.h> !! 11 #include <asm/export.h> 11 #include <asm/assembler.h> !! 12 #define FUNC(x) \ 12 #include <asm/unwind.h> !! 13 .globl x; \ 13 !! 14 .type x,@function; \ 14 #define LDR1W_SHIFT 0 !! 15 .align 4; \ 15 #define STR1W_SHIFT 0 !! 16 x: 16 !! 17 17 .macro ldr1w ptr reg abort !! 18 /* Both these macros have to start with exactly the same insn */ 18 W(ldr) \reg, [\ptr], #4 !! 19 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 19 .endm !! 20 ldd [%src + (offset) + 0x00], %t0; \ 20 !! 21 ldd [%src + (offset) + 0x08], %t2; \ 21 .macro ldr4w ptr reg1 reg2 reg3 reg4 a !! 22 ldd [%src + (offset) + 0x10], %t4; \ 22 ldmia \ptr!, {\reg1, \reg2, \reg3, \re !! 23 ldd [%src + (offset) + 0x18], %t6; \ 23 .endm !! 24 st %t0, [%dst + (offset) + 0x00]; \ 24 !! 25 st %t1, [%dst + (offset) + 0x04]; \ 25 .macro ldr8w ptr reg1 reg2 reg3 reg4 r !! 26 st %t2, [%dst + (offset) + 0x08]; \ 26 ldmia \ptr!, {\reg1, \reg2, \reg3, \re !! 27 st %t3, [%dst + (offset) + 0x0c]; \ 27 .endm !! 28 st %t4, [%dst + (offset) + 0x10]; \ 28 !! 29 st %t5, [%dst + (offset) + 0x14]; \ 29 .macro ldr1b ptr reg cond=al abort !! 30 st %t6, [%dst + (offset) + 0x18]; \ 30 ldrb\cond \reg, [\ptr], #1 !! 31 st %t7, [%dst + (offset) + 0x1c]; 31 .endm !! 32 32 !! 33 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 33 .macro str1w ptr reg abort !! 34 ldd [%src + (offset) + 0x00], %t0; \ 34 W(str) \reg, [\ptr], #4 !! 35 ldd [%src + (offset) + 0x08], %t2; \ 35 .endm !! 36 ldd [%src + (offset) + 0x10], %t4; \ 36 !! 37 ldd [%src + (offset) + 0x18], %t6; \ 37 .macro str8w ptr reg1 reg2 reg3 reg4 r !! 38 std %t0, [%dst + (offset) + 0x00]; \ 38 stmia \ptr!, {\reg1, \reg2, \reg3, \re !! 39 std %t2, [%dst + (offset) + 0x08]; \ 39 .endm !! 40 std %t4, [%dst + (offset) + 0x10]; \ 40 !! 41 std %t6, [%dst + (offset) + 0x18]; 41 .macro str1b ptr reg cond=al abort !! 42 42 strb\cond \reg, [\ptr], #1 !! 43 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ 43 .endm !! 44 ldd [%src - (offset) - 0x10], %t0; \ 44 !! 45 ldd [%src - (offset) - 0x08], %t2; \ 45 .macro enter regs:vararg !! 46 st %t0, [%dst - (offset) - 0x10]; \ 46 UNWIND( .save {r0, \regs} ) !! 47 st %t1, [%dst - (offset) - 0x0c]; \ 47 stmdb sp!, {r0, \regs} !! 48 st %t2, [%dst - (offset) - 0x08]; \ 48 .endm !! 49 st %t3, [%dst - (offset) - 0x04]; 49 !! 50 50 .macro exit regs:vararg !! 51 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ 51 ldmfd sp!, {r0, \regs} !! 52 ldd [%src - (offset) - 0x10], %t0; \ 52 .endm !! 53 ldd [%src - (offset) - 0x08], %t2; \ >> 54 std %t0, [%dst - (offset) - 0x10]; \ >> 55 std %t2, [%dst - (offset) - 0x08]; >> 56 >> 57 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ >> 58 ldub [%src - (offset) - 0x02], %t0; \ >> 59 ldub [%src - (offset) - 0x01], %t1; \ >> 60 stb %t0, [%dst - (offset) - 0x02]; \ >> 61 stb %t1, [%dst - (offset) - 0x01]; 53 62 54 .text 63 .text >> 64 .align 4 55 65 56 /* Prototype: void *memcpy(void *dest, const v !! 66 FUNC(memmove) 57 !! 67 EXPORT_SYMBOL(memmove) 58 ENTRY(__memcpy) !! 68 cmp %o0, %o1 59 ENTRY(mmiocpy) !! 69 mov %o0, %g7 60 WEAK(memcpy) !! 70 bleu 9f 61 !! 71 sub %o0, %o1, %o4 62 #include "copy_template.S" !! 72 >> 73 add %o1, %o2, %o3 >> 74 cmp %o3, %o0 >> 75 bleu 0f >> 76 andcc %o4, 3, %o5 >> 77 >> 78 add %o1, %o2, %o1 >> 79 add %o0, %o2, %o0 >> 80 sub %o1, 1, %o1 >> 81 sub %o0, 1, %o0 >> 82 >> 83 1: /* reverse_bytes */ >> 84 >> 85 ldub [%o1], %o4 >> 86 subcc %o2, 1, %o2 >> 87 stb %o4, [%o0] >> 88 sub %o1, 1, %o1 >> 89 bne 1b >> 90 sub %o0, 1, %o0 >> 91 >> 92 retl >> 93 mov %g7, %o0 >> 94 >> 95 /* NOTE: This code is executed just for the cases, >> 96 where %src (=%o1) & 3 is != 0. >> 97 We need to align it to 4. So, for (%src & 3) >> 98 1 we need to do ldub,lduh >> 99 2 lduh >> 100 3 just ldub >> 101 so even if it looks weird, the branches >> 102 are correct here. -jj >> 103 */ >> 104 78: /* dword_align */ 63 105 64 ENDPROC(memcpy) !! 106 andcc %o1, 1, %g0 65 ENDPROC(mmiocpy) !! 107 be 4f 66 ENDPROC(__memcpy) !! 108 andcc %o1, 2, %g0 >> 109 >> 110 ldub [%o1], %g2 >> 111 add %o1, 1, %o1 >> 112 stb %g2, [%o0] >> 113 sub %o2, 1, %o2 >> 114 bne 3f >> 115 add %o0, 1, %o0 >> 116 4: >> 117 lduh [%o1], %g2 >> 118 add %o1, 2, %o1 >> 119 sth %g2, [%o0] >> 120 sub %o2, 2, %o2 >> 121 b 3f >> 122 add %o0, 2, %o0 >> 123 >> 124 FUNC(memcpy) /* %o0=dst %o1=src %o2=len */ >> 125 EXPORT_SYMBOL(memcpy) >> 126 >> 127 sub %o0, %o1, %o4 >> 128 mov %o0, %g7 >> 129 9: >> 130 andcc %o4, 3, %o5 >> 131 0: >> 132 bne 86f >> 133 cmp %o2, 15 >> 134 >> 135 bleu 90f >> 136 andcc %o1, 3, %g0 >> 137 >> 138 bne 78b >> 139 3: >> 140 andcc %o1, 4, %g0 >> 141 >> 142 be 2f >> 143 mov %o2, %g1 >> 144 >> 145 ld [%o1], %o4 >> 146 sub %g1, 4, %g1 >> 147 st %o4, [%o0] >> 148 add %o1, 4, %o1 >> 149 add %o0, 4, %o0 >> 150 2: >> 151 andcc %g1, 0xffffff80, %g0 >> 152 be 3f >> 153 andcc %o0, 4, %g0 >> 154 >> 155 be 82f + 4 >> 156 5: >> 157 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 158 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 159 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 160 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 161 sub %g1, 128, %g1 >> 162 add %o1, 128, %o1 >> 163 cmp %g1, 128 >> 164 bge 5b >> 165 add %o0, 128, %o0 >> 166 3: >> 167 andcc %g1, 0x70, %g4 >> 168 be 80f >> 169 andcc %g1, 8, %g0 >> 170 >> 171 sethi %hi(80f), %o5 >> 172 srl %g4, 1, %o4 >> 173 add %g4, %o4, %o4 >> 174 add %o1, %g4, %o1 >> 175 sub %o5, %o4, %o5 >> 176 jmpl %o5 + %lo(80f), %g0 >> 177 add %o0, %g4, %o0 >> 178 >> 179 79: /* memcpy_table */ >> 180 >> 181 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 182 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 183 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 184 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 185 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 186 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 187 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 188 >> 189 80: /* memcpy_table_end */ >> 190 be 81f >> 191 andcc %g1, 4, %g0 >> 192 >> 193 ldd [%o1], %g2 >> 194 add %o0, 8, %o0 >> 195 st %g2, [%o0 - 0x08] >> 196 add %o1, 8, %o1 >> 197 st %g3, [%o0 - 0x04] >> 198 >> 199 81: /* memcpy_last7 */ >> 200 >> 201 be 1f >> 202 andcc %g1, 2, %g0 >> 203 >> 204 ld [%o1], %g2 >> 205 add %o1, 4, %o1 >> 206 st %g2, [%o0] >> 207 add %o0, 4, %o0 >> 208 1: >> 209 be 1f >> 210 andcc %g1, 1, %g0 >> 211 >> 212 lduh [%o1], %g2 >> 213 add %o1, 2, %o1 >> 214 sth %g2, [%o0] >> 215 add %o0, 2, %o0 >> 216 1: >> 217 be 1f >> 218 nop >> 219 >> 220 ldub [%o1], %g2 >> 221 stb %g2, [%o0] >> 222 1: >> 223 retl >> 224 mov %g7, %o0 >> 225 >> 226 82: /* ldd_std */ >> 227 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 228 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 229 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 230 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 231 subcc %g1, 128, %g1 >> 232 add %o1, 128, %o1 >> 233 cmp %g1, 128 >> 234 bge 82b >> 235 add %o0, 128, %o0 >> 236 >> 237 andcc %g1, 0x70, %g4 >> 238 be 84f >> 239 andcc %g1, 8, %g0 >> 240 >> 241 sethi %hi(84f), %o5 >> 242 add %o1, %g4, %o1 >> 243 sub %o5, %g4, %o5 >> 244 jmpl %o5 + %lo(84f), %g0 >> 245 add %o0, %g4, %o0 >> 246 >> 247 83: /* amemcpy_table */ >> 248 >> 249 MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 250 MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 251 MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 252 MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 253 MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 254 MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 255 MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 256 >> 257 84: /* amemcpy_table_end */ >> 258 be 85f >> 259 andcc %g1, 4, %g0 >> 260 >> 261 ldd [%o1], %g2 >> 262 add %o0, 8, %o0 >> 263 std %g2, [%o0 - 0x08] >> 264 add %o1, 8, %o1 >> 265 85: /* amemcpy_last7 */ >> 266 be 1f >> 267 andcc %g1, 2, %g0 >> 268 >> 269 ld [%o1], %g2 >> 270 add %o1, 4, %o1 >> 271 st %g2, [%o0] >> 272 add %o0, 4, %o0 >> 273 1: >> 274 be 1f >> 275 andcc %g1, 1, %g0 >> 276 >> 277 lduh [%o1], %g2 >> 278 add %o1, 2, %o1 >> 279 sth %g2, [%o0] >> 280 add %o0, 2, %o0 >> 281 1: >> 282 be 1f >> 283 nop >> 284 >> 285 ldub [%o1], %g2 >> 286 stb %g2, [%o0] >> 287 1: >> 288 retl >> 289 mov %g7, %o0 >> 290 >> 291 86: /* non_aligned */ >> 292 cmp %o2, 6 >> 293 bleu 88f >> 294 nop >> 295 >> 296 save %sp, -96, %sp >> 297 andcc %i0, 3, %g0 >> 298 be 61f >> 299 andcc %i0, 1, %g0 >> 300 be 60f >> 301 andcc %i0, 2, %g0 >> 302 >> 303 ldub [%i1], %g5 >> 304 add %i1, 1, %i1 >> 305 stb %g5, [%i0] >> 306 sub %i2, 1, %i2 >> 307 bne 61f >> 308 add %i0, 1, %i0 >> 309 60: >> 310 ldub [%i1], %g3 >> 311 add %i1, 2, %i1 >> 312 stb %g3, [%i0] >> 313 sub %i2, 2, %i2 >> 314 ldub [%i1 - 1], %g3 >> 315 add %i0, 2, %i0 >> 316 stb %g3, [%i0 - 1] >> 317 61: >> 318 and %i1, 3, %g2 >> 319 and %i2, 0xc, %g3 >> 320 and %i1, -4, %i1 >> 321 cmp %g3, 4 >> 322 sll %g2, 3, %g4 >> 323 mov 32, %g2 >> 324 be 4f >> 325 sub %g2, %g4, %l0 >> 326 >> 327 blu 3f >> 328 cmp %g3, 0x8 >> 329 >> 330 be 2f >> 331 srl %i2, 2, %g3 >> 332 >> 333 ld [%i1], %i3 >> 334 add %i0, -8, %i0 >> 335 ld [%i1 + 4], %i4 >> 336 b 8f >> 337 add %g3, 1, %g3 >> 338 2: >> 339 ld [%i1], %i4 >> 340 add %i0, -12, %i0 >> 341 ld [%i1 + 4], %i5 >> 342 add %g3, 2, %g3 >> 343 b 9f >> 344 add %i1, -4, %i1 >> 345 3: >> 346 ld [%i1], %g1 >> 347 add %i0, -4, %i0 >> 348 ld [%i1 + 4], %i3 >> 349 srl %i2, 2, %g3 >> 350 b 7f >> 351 add %i1, 4, %i1 >> 352 4: >> 353 ld [%i1], %i5 >> 354 cmp %i2, 7 >> 355 ld [%i1 + 4], %g1 >> 356 srl %i2, 2, %g3 >> 357 bleu 10f >> 358 add %i1, 8, %i1 >> 359 >> 360 ld [%i1], %i3 >> 361 add %g3, -1, %g3 >> 362 5: >> 363 sll %i5, %g4, %g2 >> 364 srl %g1, %l0, %g5 >> 365 or %g2, %g5, %g2 >> 366 st %g2, [%i0] >> 367 7: >> 368 ld [%i1 + 4], %i4 >> 369 sll %g1, %g4, %g2 >> 370 srl %i3, %l0, %g5 >> 371 or %g2, %g5, %g2 >> 372 st %g2, [%i0 + 4] >> 373 8: >> 374 ld [%i1 + 8], %i5 >> 375 sll %i3, %g4, %g2 >> 376 srl %i4, %l0, %g5 >> 377 or %g2, %g5, %g2 >> 378 st %g2, [%i0 + 8] >> 379 9: >> 380 ld [%i1 + 12], %g1 >> 381 sll %i4, %g4, %g2 >> 382 srl %i5, %l0, %g5 >> 383 addcc %g3, -4, %g3 >> 384 or %g2, %g5, %g2 >> 385 add %i1, 16, %i1 >> 386 st %g2, [%i0 + 12] >> 387 add %i0, 16, %i0 >> 388 bne,a 5b >> 389 ld [%i1], %i3 >> 390 10: >> 391 sll %i5, %g4, %g2 >> 392 srl %g1, %l0, %g5 >> 393 srl %l0, 3, %g3 >> 394 or %g2, %g5, %g2 >> 395 sub %i1, %g3, %i1 >> 396 andcc %i2, 2, %g0 >> 397 st %g2, [%i0] >> 398 be 1f >> 399 andcc %i2, 1, %g0 >> 400 >> 401 ldub [%i1], %g2 >> 402 add %i1, 2, %i1 >> 403 stb %g2, [%i0 + 4] >> 404 add %i0, 2, %i0 >> 405 ldub [%i1 - 1], %g2 >> 406 stb %g2, [%i0 + 3] >> 407 1: >> 408 be 1f >> 409 nop >> 410 ldub [%i1], %g2 >> 411 stb %g2, [%i0 + 4] >> 412 1: >> 413 ret >> 414 restore %g7, %g0, %o0 >> 415 >> 416 88: /* short_end */ >> 417 >> 418 and %o2, 0xe, %o3 >> 419 20: >> 420 sethi %hi(89f), %o5 >> 421 sll %o3, 3, %o4 >> 422 add %o0, %o3, %o0 >> 423 sub %o5, %o4, %o5 >> 424 add %o1, %o3, %o1 >> 425 jmpl %o5 + %lo(89f), %g0 >> 426 andcc %o2, 1, %g0 >> 427 >> 428 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) >> 429 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) >> 430 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) >> 431 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) >> 432 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) >> 433 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) >> 434 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) >> 435 >> 436 89: /* short_table_end */ >> 437 >> 438 be 1f >> 439 nop >> 440 >> 441 ldub [%o1], %g2 >> 442 stb %g2, [%o0] >> 443 1: >> 444 retl >> 445 mov %g7, %o0 >> 446 >> 447 90: /* short_aligned_end */ >> 448 bne 88b >> 449 andcc %o2, 8, %g0 >> 450 >> 451 be 1f >> 452 andcc %o2, 4, %g0 >> 453 >> 454 ld [%o1 + 0x00], %g2 >> 455 ld [%o1 + 0x04], %g3 >> 456 add %o1, 8, %o1 >> 457 st %g2, [%o0 + 0x00] >> 458 st %g3, [%o0 + 0x04] >> 459 add %o0, 8, %o0 >> 460 1: >> 461 b 81b >> 462 mov %o2, %g1
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.