1 /* SPDX-License-Identifier: GPL-2.0-only */ !! 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* !! 2 /* memcpy.S: Sparc optimized memcpy and memmove code 3 * linux/arch/arm/lib/memcpy.S !! 3 * Hand optimized from GNU libc's memcpy and memmove 4 * !! 4 * Copyright (C) 1991,1996 Free Software Foundation 5 * Author: Nicolas Pitre !! 5 * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi) 6 * Created: Sep 28, 2005 !! 6 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 7 * Copyright: MontaVista Software, Inc. !! 7 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) >> 8 * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 8 */ 9 */ 9 10 10 #include <linux/linkage.h> !! 11 #include <linux/export.h> 11 #include <asm/assembler.h> << 12 #include <asm/unwind.h> << 13 << 14 #define LDR1W_SHIFT 0 << 15 #define STR1W_SHIFT 0 << 16 << 17 .macro ldr1w ptr reg abort << 18 W(ldr) \reg, [\ptr], #4 << 19 .endm << 20 << 21 .macro ldr4w ptr reg1 reg2 reg3 reg4 a << 22 ldmia \ptr!, {\reg1, \reg2, \reg3, \re << 23 .endm << 24 << 25 .macro ldr8w ptr reg1 reg2 reg3 reg4 r << 26 ldmia \ptr!, {\reg1, \reg2, \reg3, \re << 27 .endm << 28 << 29 .macro ldr1b ptr reg cond=al abort << 30 ldrb\cond \reg, [\ptr], #1 << 31 .endm << 32 << 33 .macro str1w ptr reg abort << 34 W(str) \reg, [\ptr], #4 << 35 .endm << 36 << 37 .macro str8w ptr reg1 reg2 reg3 reg4 r << 38 stmia \ptr!, {\reg1, \reg2, \reg3, \re << 39 .endm << 40 << 41 .macro str1b ptr reg cond=al abort << 42 strb\cond \reg, [\ptr], #1 << 43 .endm << 44 << 45 .macro enter regs:vararg << 46 UNWIND( .save {r0, \regs} ) << 47 stmdb sp!, {r0, \regs} << 48 .endm << 49 << 50 .macro exit regs:vararg << 51 ldmfd sp!, {r0, \regs} << 52 .endm << 53 12 54 .text !! 13 #define FUNC(x) \ 55 !! 14 .globl x; \ 56 /* Prototype: void *memcpy(void *dest, const v !! 15 .type x,@function; \ >> 16 .align 4; \ >> 17 x: >> 18 >> 19 /* Both these macros have to start with exactly the same insn */ >> 20 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 21 ldd [%src + (offset) + 0x00], %t0; \ >> 22 ldd [%src + (offset) + 0x08], %t2; \ >> 23 ldd [%src + (offset) + 0x10], %t4; \ >> 24 ldd [%src + (offset) + 0x18], %t6; \ >> 25 st %t0, [%dst + (offset) + 0x00]; \ >> 26 st %t1, [%dst + (offset) + 0x04]; \ >> 27 st %t2, [%dst + (offset) + 0x08]; \ >> 28 st %t3, [%dst + (offset) + 0x0c]; \ >> 29 st %t4, [%dst + (offset) + 0x10]; \ >> 30 st %t5, [%dst + (offset) + 0x14]; \ >> 31 st %t6, [%dst + (offset) + 0x18]; \ >> 32 st %t7, [%dst + (offset) + 0x1c]; >> 33 >> 34 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 35 ldd [%src + (offset) + 0x00], %t0; \ >> 36 ldd [%src + (offset) + 0x08], %t2; \ >> 37 ldd [%src + (offset) + 0x10], %t4; \ >> 38 ldd [%src + (offset) + 0x18], %t6; \ >> 39 std %t0, [%dst + (offset) + 0x00]; \ >> 40 std %t2, [%dst + (offset) + 0x08]; \ >> 41 std %t4, [%dst + (offset) + 0x10]; \ >> 42 std %t6, [%dst + (offset) + 0x18]; >> 43 >> 44 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ >> 45 ldd [%src - (offset) - 0x10], %t0; \ >> 46 ldd [%src - (offset) - 0x08], %t2; \ >> 47 st %t0, [%dst - (offset) - 0x10]; \ >> 48 st %t1, [%dst - (offset) - 0x0c]; \ >> 49 st %t2, [%dst - (offset) - 0x08]; \ >> 50 st %t3, [%dst - (offset) - 0x04]; >> 51 >> 52 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ >> 53 ldd [%src - (offset) - 0x10], %t0; \ >> 54 ldd [%src - (offset) - 0x08], %t2; \ >> 55 std %t0, [%dst - (offset) - 0x10]; \ >> 56 std %t2, [%dst - (offset) - 0x08]; >> 57 >> 58 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ >> 59 ldub [%src - (offset) - 0x02], %t0; \ >> 60 ldub [%src - (offset) - 0x01], %t1; \ >> 61 stb %t0, [%dst - (offset) - 0x02]; \ >> 62 stb %t1, [%dst - (offset) - 0x01]; 57 63 58 ENTRY(__memcpy) !! 64 .text 59 ENTRY(mmiocpy) !! 65 .align 4 60 WEAK(memcpy) << 61 66 62 #include "copy_template.S" !! 67 FUNC(memmove) >> 68 EXPORT_SYMBOL(memmove) >> 69 cmp %o0, %o1 >> 70 mov %o0, %g7 >> 71 bleu 9f >> 72 sub %o0, %o1, %o4 >> 73 >> 74 add %o1, %o2, %o3 >> 75 cmp %o3, %o0 >> 76 bleu 0f >> 77 andcc %o4, 3, %o5 >> 78 >> 79 add %o1, %o2, %o1 >> 80 add %o0, %o2, %o0 >> 81 sub %o1, 1, %o1 >> 82 sub %o0, 1, %o0 >> 83 >> 84 1: /* reverse_bytes */ >> 85 >> 86 ldub [%o1], %o4 >> 87 subcc %o2, 1, %o2 >> 88 stb %o4, [%o0] >> 89 sub %o1, 1, %o1 >> 90 bne 1b >> 91 sub %o0, 1, %o0 >> 92 >> 93 retl >> 94 mov %g7, %o0 >> 95 >> 96 /* NOTE: This code is executed just for the cases, >> 97 where %src (=%o1) & 3 is != 0. >> 98 We need to align it to 4. So, for (%src & 3) >> 99 1 we need to do ldub,lduh >> 100 2 lduh >> 101 3 just ldub >> 102 so even if it looks weird, the branches >> 103 are correct here. -jj >> 104 */ >> 105 78: /* dword_align */ 63 106 64 ENDPROC(memcpy) !! 107 andcc %o1, 1, %g0 65 ENDPROC(mmiocpy) !! 108 be 4f 66 ENDPROC(__memcpy) !! 109 andcc %o1, 2, %g0 >> 110 >> 111 ldub [%o1], %g2 >> 112 add %o1, 1, %o1 >> 113 stb %g2, [%o0] >> 114 sub %o2, 1, %o2 >> 115 bne 3f >> 116 add %o0, 1, %o0 >> 117 4: >> 118 lduh [%o1], %g2 >> 119 add %o1, 2, %o1 >> 120 sth %g2, [%o0] >> 121 sub %o2, 2, %o2 >> 122 b 3f >> 123 add %o0, 2, %o0 >> 124 >> 125 FUNC(memcpy) /* %o0=dst %o1=src %o2=len */ >> 126 EXPORT_SYMBOL(memcpy) >> 127 >> 128 sub %o0, %o1, %o4 >> 129 mov %o0, %g7 >> 130 9: >> 131 andcc %o4, 3, %o5 >> 132 0: >> 133 bne 86f >> 134 cmp %o2, 15 >> 135 >> 136 bleu 90f >> 137 andcc %o1, 3, %g0 >> 138 >> 139 bne 78b >> 140 3: >> 141 andcc %o1, 4, %g0 >> 142 >> 143 be 2f >> 144 mov %o2, %g1 >> 145 >> 146 ld [%o1], %o4 >> 147 sub %g1, 4, %g1 >> 148 st %o4, [%o0] >> 149 add %o1, 4, %o1 >> 150 add %o0, 4, %o0 >> 151 2: >> 152 andcc %g1, 0xffffff80, %g0 >> 153 be 3f >> 154 andcc %o0, 4, %g0 >> 155 >> 156 be 82f + 4 >> 157 5: >> 158 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 159 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 160 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 161 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 162 sub %g1, 128, %g1 >> 163 add %o1, 128, %o1 >> 164 cmp %g1, 128 >> 165 bge 5b >> 166 add %o0, 128, %o0 >> 167 3: >> 168 andcc %g1, 0x70, %g4 >> 169 be 80f >> 170 andcc %g1, 8, %g0 >> 171 >> 172 sethi %hi(80f), %o5 >> 173 srl %g4, 1, %o4 >> 174 add %g4, %o4, %o4 >> 175 add %o1, %g4, %o1 >> 176 sub %o5, %o4, %o5 >> 177 jmpl %o5 + %lo(80f), %g0 >> 178 add %o0, %g4, %o0 >> 179 >> 180 79: /* memcpy_table */ >> 181 >> 182 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 183 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 184 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 185 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 186 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 187 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 188 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 189 >> 190 80: /* memcpy_table_end */ >> 191 be 81f >> 192 andcc %g1, 4, %g0 >> 193 >> 194 ldd [%o1], %g2 >> 195 add %o0, 8, %o0 >> 196 st %g2, [%o0 - 0x08] >> 197 add %o1, 8, %o1 >> 198 st %g3, [%o0 - 0x04] >> 199 >> 200 81: /* memcpy_last7 */ >> 201 >> 202 be 1f >> 203 andcc %g1, 2, %g0 >> 204 >> 205 ld [%o1], %g2 >> 206 add %o1, 4, %o1 >> 207 st %g2, [%o0] >> 208 add %o0, 4, %o0 >> 209 1: >> 210 be 1f >> 211 andcc %g1, 1, %g0 >> 212 >> 213 lduh [%o1], %g2 >> 214 add %o1, 2, %o1 >> 215 sth %g2, [%o0] >> 216 add %o0, 2, %o0 >> 217 1: >> 218 be 1f >> 219 nop >> 220 >> 221 ldub [%o1], %g2 >> 222 stb %g2, [%o0] >> 223 1: >> 224 retl >> 225 mov %g7, %o0 >> 226 >> 227 82: /* ldd_std */ >> 228 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 229 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 230 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 231 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 232 subcc %g1, 128, %g1 >> 233 add %o1, 128, %o1 >> 234 cmp %g1, 128 >> 235 bge 82b >> 236 add %o0, 128, %o0 >> 237 >> 238 andcc %g1, 0x70, %g4 >> 239 be 84f >> 240 andcc %g1, 8, %g0 >> 241 >> 242 sethi %hi(84f), %o5 >> 243 add %o1, %g4, %o1 >> 244 sub %o5, %g4, %o5 >> 245 jmpl %o5 + %lo(84f), %g0 >> 246 add %o0, %g4, %o0 >> 247 >> 248 83: /* amemcpy_table */ >> 249 >> 250 MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 251 MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 252 MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 253 MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 254 MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 255 MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 256 MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 257 >> 258 84: /* amemcpy_table_end */ >> 259 be 85f >> 260 andcc %g1, 4, %g0 >> 261 >> 262 ldd [%o1], %g2 >> 263 add %o0, 8, %o0 >> 264 std %g2, [%o0 - 0x08] >> 265 add %o1, 8, %o1 >> 266 85: /* amemcpy_last7 */ >> 267 be 1f >> 268 andcc %g1, 2, %g0 >> 269 >> 270 ld [%o1], %g2 >> 271 add %o1, 4, %o1 >> 272 st %g2, [%o0] >> 273 add %o0, 4, %o0 >> 274 1: >> 275 be 1f >> 276 andcc %g1, 1, %g0 >> 277 >> 278 lduh [%o1], %g2 >> 279 add %o1, 2, %o1 >> 280 sth %g2, [%o0] >> 281 add %o0, 2, %o0 >> 282 1: >> 283 be 1f >> 284 nop >> 285 >> 286 ldub [%o1], %g2 >> 287 stb %g2, [%o0] >> 288 1: >> 289 retl >> 290 mov %g7, %o0 >> 291 >> 292 86: /* non_aligned */ >> 293 cmp %o2, 6 >> 294 bleu 88f >> 295 nop >> 296 >> 297 save %sp, -96, %sp >> 298 andcc %i0, 3, %g0 >> 299 be 61f >> 300 andcc %i0, 1, %g0 >> 301 be 60f >> 302 andcc %i0, 2, %g0 >> 303 >> 304 ldub [%i1], %g5 >> 305 add %i1, 1, %i1 >> 306 stb %g5, [%i0] >> 307 sub %i2, 1, %i2 >> 308 bne 61f >> 309 add %i0, 1, %i0 >> 310 60: >> 311 ldub [%i1], %g3 >> 312 add %i1, 2, %i1 >> 313 stb %g3, [%i0] >> 314 sub %i2, 2, %i2 >> 315 ldub [%i1 - 1], %g3 >> 316 add %i0, 2, %i0 >> 317 stb %g3, [%i0 - 1] >> 318 61: >> 319 and %i1, 3, %g2 >> 320 and %i2, 0xc, %g3 >> 321 and %i1, -4, %i1 >> 322 cmp %g3, 4 >> 323 sll %g2, 3, %g4 >> 324 mov 32, %g2 >> 325 be 4f >> 326 sub %g2, %g4, %l0 >> 327 >> 328 blu 3f >> 329 cmp %g3, 0x8 >> 330 >> 331 be 2f >> 332 srl %i2, 2, %g3 >> 333 >> 334 ld [%i1], %i3 >> 335 add %i0, -8, %i0 >> 336 ld [%i1 + 4], %i4 >> 337 b 8f >> 338 add %g3, 1, %g3 >> 339 2: >> 340 ld [%i1], %i4 >> 341 add %i0, -12, %i0 >> 342 ld [%i1 + 4], %i5 >> 343 add %g3, 2, %g3 >> 344 b 9f >> 345 add %i1, -4, %i1 >> 346 3: >> 347 ld [%i1], %g1 >> 348 add %i0, -4, %i0 >> 349 ld [%i1 + 4], %i3 >> 350 srl %i2, 2, %g3 >> 351 b 7f >> 352 add %i1, 4, %i1 >> 353 4: >> 354 ld [%i1], %i5 >> 355 cmp %i2, 7 >> 356 ld [%i1 + 4], %g1 >> 357 srl %i2, 2, %g3 >> 358 bleu 10f >> 359 add %i1, 8, %i1 >> 360 >> 361 ld [%i1], %i3 >> 362 add %g3, -1, %g3 >> 363 5: >> 364 sll %i5, %g4, %g2 >> 365 srl %g1, %l0, %g5 >> 366 or %g2, %g5, %g2 >> 367 st %g2, [%i0] >> 368 7: >> 369 ld [%i1 + 4], %i4 >> 370 sll %g1, %g4, %g2 >> 371 srl %i3, %l0, %g5 >> 372 or %g2, %g5, %g2 >> 373 st %g2, [%i0 + 4] >> 374 8: >> 375 ld [%i1 + 8], %i5 >> 376 sll %i3, %g4, %g2 >> 377 srl %i4, %l0, %g5 >> 378 or %g2, %g5, %g2 >> 379 st %g2, [%i0 + 8] >> 380 9: >> 381 ld [%i1 + 12], %g1 >> 382 sll %i4, %g4, %g2 >> 383 srl %i5, %l0, %g5 >> 384 addcc %g3, -4, %g3 >> 385 or %g2, %g5, %g2 >> 386 add %i1, 16, %i1 >> 387 st %g2, [%i0 + 12] >> 388 add %i0, 16, %i0 >> 389 bne,a 5b >> 390 ld [%i1], %i3 >> 391 10: >> 392 sll %i5, %g4, %g2 >> 393 srl %g1, %l0, %g5 >> 394 srl %l0, 3, %g3 >> 395 or %g2, %g5, %g2 >> 396 sub %i1, %g3, %i1 >> 397 andcc %i2, 2, %g0 >> 398 st %g2, [%i0] >> 399 be 1f >> 400 andcc %i2, 1, %g0 >> 401 >> 402 ldub [%i1], %g2 >> 403 add %i1, 2, %i1 >> 404 stb %g2, [%i0 + 4] >> 405 add %i0, 2, %i0 >> 406 ldub [%i1 - 1], %g2 >> 407 stb %g2, [%i0 + 3] >> 408 1: >> 409 be 1f >> 410 nop >> 411 ldub [%i1], %g2 >> 412 stb %g2, [%i0 + 4] >> 413 1: >> 414 ret >> 415 restore %g7, %g0, %o0 >> 416 >> 417 88: /* short_end */ >> 418 >> 419 and %o2, 0xe, %o3 >> 420 20: >> 421 sethi %hi(89f), %o5 >> 422 sll %o3, 3, %o4 >> 423 add %o0, %o3, %o0 >> 424 sub %o5, %o4, %o5 >> 425 add %o1, %o3, %o1 >> 426 jmpl %o5 + %lo(89f), %g0 >> 427 andcc %o2, 1, %g0 >> 428 >> 429 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) >> 430 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) >> 431 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) >> 432 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) >> 433 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) >> 434 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) >> 435 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) >> 436 >> 437 89: /* short_table_end */ >> 438 >> 439 be 1f >> 440 nop >> 441 >> 442 ldub [%o1], %g2 >> 443 stb %g2, [%o0] >> 444 1: >> 445 retl >> 446 mov %g7, %o0 >> 447 >> 448 90: /* short_aligned_end */ >> 449 bne 88b >> 450 andcc %o2, 8, %g0 >> 451 >> 452 be 1f >> 453 andcc %o2, 4, %g0 >> 454 >> 455 ld [%o1 + 0x00], %g2 >> 456 ld [%o1 + 0x04], %g3 >> 457 add %o1, 8, %o1 >> 458 st %g2, [%o0 + 0x00] >> 459 st %g3, [%o0 + 0x04] >> 460 add %o0, 8, %o0 >> 461 1: >> 462 b 81b >> 463 mov %o2, %g1
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.