1 /* SPDX-License-Identifier: GPL-2.0-only */ !! 1 /* memcpy.S: Sparc optimized memcpy and memmove code 2 /* !! 2 * Hand optimized from GNU libc's memcpy and memmove 3 * linux/arch/arm/lib/memcpy.S !! 3 * Copyright (C) 1991,1996 Free Software Foundation 4 * !! 4 * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi) 5 * Author: Nicolas Pitre !! 5 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 6 * Created: Sep 28, 2005 !! 6 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) 7 * Copyright: MontaVista Software, Inc. !! 7 * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 8 */ 8 */ 9 9 10 #include <linux/linkage.h> !! 10 #include <asm/export.h> 11 #include <asm/assembler.h> !! 11 #define FUNC(x) \ 12 #include <asm/unwind.h> !! 12 .globl x; \ 13 !! 13 .type x,@function; \ 14 #define LDR1W_SHIFT 0 !! 14 .align 4; \ 15 #define STR1W_SHIFT 0 !! 15 x: 16 !! 16 17 .macro ldr1w ptr reg abort !! 17 /* Both these macros have to start with exactly the same insn */ 18 W(ldr) \reg, [\ptr], #4 !! 18 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 19 .endm !! 19 ldd [%src + (offset) + 0x00], %t0; \ 20 !! 20 ldd [%src + (offset) + 0x08], %t2; \ 21 .macro ldr4w ptr reg1 reg2 reg3 reg4 a !! 21 ldd [%src + (offset) + 0x10], %t4; \ 22 ldmia \ptr!, {\reg1, \reg2, \reg3, \re !! 22 ldd [%src + (offset) + 0x18], %t6; \ 23 .endm !! 23 st %t0, [%dst + (offset) + 0x00]; \ 24 !! 24 st %t1, [%dst + (offset) + 0x04]; \ 25 .macro ldr8w ptr reg1 reg2 reg3 reg4 r !! 25 st %t2, [%dst + (offset) + 0x08]; \ 26 ldmia \ptr!, {\reg1, \reg2, \reg3, \re !! 26 st %t3, [%dst + (offset) + 0x0c]; \ 27 .endm !! 27 st %t4, [%dst + (offset) + 0x10]; \ 28 !! 28 st %t5, [%dst + (offset) + 0x14]; \ 29 .macro ldr1b ptr reg cond=al abort !! 29 st %t6, [%dst + (offset) + 0x18]; \ 30 ldrb\cond \reg, [\ptr], #1 !! 30 st %t7, [%dst + (offset) + 0x1c]; 31 .endm !! 31 32 !! 32 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 33 .macro str1w ptr reg abort !! 33 ldd [%src + (offset) + 0x00], %t0; \ 34 W(str) \reg, [\ptr], #4 !! 34 ldd [%src + (offset) + 0x08], %t2; \ 35 .endm !! 35 ldd [%src + (offset) + 0x10], %t4; \ 36 !! 36 ldd [%src + (offset) + 0x18], %t6; \ 37 .macro str8w ptr reg1 reg2 reg3 reg4 r !! 37 std %t0, [%dst + (offset) + 0x00]; \ 38 stmia \ptr!, {\reg1, \reg2, \reg3, \re !! 38 std %t2, [%dst + (offset) + 0x08]; \ 39 .endm !! 39 std %t4, [%dst + (offset) + 0x10]; \ 40 !! 40 std %t6, [%dst + (offset) + 0x18]; 41 .macro str1b ptr reg cond=al abort !! 41 42 strb\cond \reg, [\ptr], #1 !! 42 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ 43 .endm !! 43 ldd [%src - (offset) - 0x10], %t0; \ 44 !! 44 ldd [%src - (offset) - 0x08], %t2; \ 45 .macro enter regs:vararg !! 45 st %t0, [%dst - (offset) - 0x10]; \ 46 UNWIND( .save {r0, \regs} ) !! 46 st %t1, [%dst - (offset) - 0x0c]; \ 47 stmdb sp!, {r0, \regs} !! 47 st %t2, [%dst - (offset) - 0x08]; \ 48 .endm !! 48 st %t3, [%dst - (offset) - 0x04]; 49 !! 49 50 .macro exit regs:vararg !! 50 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ 51 ldmfd sp!, {r0, \regs} !! 51 ldd [%src - (offset) - 0x10], %t0; \ 52 .endm !! 52 ldd [%src - (offset) - 0x08], %t2; \ >> 53 std %t0, [%dst - (offset) - 0x10]; \ >> 54 std %t2, [%dst - (offset) - 0x08]; >> 55 >> 56 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ >> 57 ldub [%src - (offset) - 0x02], %t0; \ >> 58 ldub [%src - (offset) - 0x01], %t1; \ >> 59 stb %t0, [%dst - (offset) - 0x02]; \ >> 60 stb %t1, [%dst - (offset) - 0x01]; 53 61 54 .text 62 .text >> 63 .align 4 55 64 56 /* Prototype: void *memcpy(void *dest, const v !! 65 FUNC(memmove) 57 !! 66 EXPORT_SYMBOL(memmove) 58 ENTRY(__memcpy) !! 67 cmp %o0, %o1 59 ENTRY(mmiocpy) !! 68 mov %o0, %g7 60 WEAK(memcpy) !! 69 bleu 9f 61 !! 70 sub %o0, %o1, %o4 62 #include "copy_template.S" !! 71 >> 72 add %o1, %o2, %o3 >> 73 cmp %o3, %o0 >> 74 bleu 0f >> 75 andcc %o4, 3, %o5 >> 76 >> 77 add %o1, %o2, %o1 >> 78 add %o0, %o2, %o0 >> 79 sub %o1, 1, %o1 >> 80 sub %o0, 1, %o0 >> 81 >> 82 1: /* reverse_bytes */ >> 83 >> 84 ldub [%o1], %o4 >> 85 subcc %o2, 1, %o2 >> 86 stb %o4, [%o0] >> 87 sub %o1, 1, %o1 >> 88 bne 1b >> 89 sub %o0, 1, %o0 >> 90 >> 91 retl >> 92 mov %g7, %o0 >> 93 >> 94 /* NOTE: This code is executed just for the cases, >> 95 where %src (=%o1) & 3 is != 0. >> 96 We need to align it to 4. So, for (%src & 3) >> 97 1 we need to do ldub,lduh >> 98 2 lduh >> 99 3 just ldub >> 100 so even if it looks weird, the branches >> 101 are correct here. -jj >> 102 */ >> 103 78: /* dword_align */ 63 104 64 ENDPROC(memcpy) !! 105 andcc %o1, 1, %g0 65 ENDPROC(mmiocpy) !! 106 be 4f 66 ENDPROC(__memcpy) !! 107 andcc %o1, 2, %g0 >> 108 >> 109 ldub [%o1], %g2 >> 110 add %o1, 1, %o1 >> 111 stb %g2, [%o0] >> 112 sub %o2, 1, %o2 >> 113 bne 3f >> 114 add %o0, 1, %o0 >> 115 4: >> 116 lduh [%o1], %g2 >> 117 add %o1, 2, %o1 >> 118 sth %g2, [%o0] >> 119 sub %o2, 2, %o2 >> 120 b 3f >> 121 add %o0, 2, %o0 >> 122 >> 123 FUNC(memcpy) /* %o0=dst %o1=src %o2=len */ >> 124 EXPORT_SYMBOL(memcpy) >> 125 >> 126 sub %o0, %o1, %o4 >> 127 mov %o0, %g7 >> 128 9: >> 129 andcc %o4, 3, %o5 >> 130 0: >> 131 bne 86f >> 132 cmp %o2, 15 >> 133 >> 134 bleu 90f >> 135 andcc %o1, 3, %g0 >> 136 >> 137 bne 78b >> 138 3: >> 139 andcc %o1, 4, %g0 >> 140 >> 141 be 2f >> 142 mov %o2, %g1 >> 143 >> 144 ld [%o1], %o4 >> 145 sub %g1, 4, %g1 >> 146 st %o4, [%o0] >> 147 add %o1, 4, %o1 >> 148 add %o0, 4, %o0 >> 149 2: >> 150 andcc %g1, 0xffffff80, %g0 >> 151 be 3f >> 152 andcc %o0, 4, %g0 >> 153 >> 154 be 82f + 4 >> 155 5: >> 156 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 157 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 158 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 159 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 160 sub %g1, 128, %g1 >> 161 add %o1, 128, %o1 >> 162 cmp %g1, 128 >> 163 bge 5b >> 164 add %o0, 128, %o0 >> 165 3: >> 166 andcc %g1, 0x70, %g4 >> 167 be 80f >> 168 andcc %g1, 8, %g0 >> 169 >> 170 sethi %hi(80f), %o5 >> 171 srl %g4, 1, %o4 >> 172 add %g4, %o4, %o4 >> 173 add %o1, %g4, %o1 >> 174 sub %o5, %o4, %o5 >> 175 jmpl %o5 + %lo(80f), %g0 >> 176 add %o0, %g4, %o0 >> 177 >> 178 79: /* memcpy_table */ >> 179 >> 180 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 181 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 182 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 183 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 184 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 185 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 186 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 187 >> 188 80: /* memcpy_table_end */ >> 189 be 81f >> 190 andcc %g1, 4, %g0 >> 191 >> 192 ldd [%o1], %g2 >> 193 add %o0, 8, %o0 >> 194 st %g2, [%o0 - 0x08] >> 195 add %o1, 8, %o1 >> 196 st %g3, [%o0 - 0x04] >> 197 >> 198 81: /* memcpy_last7 */ >> 199 >> 200 be 1f >> 201 andcc %g1, 2, %g0 >> 202 >> 203 ld [%o1], %g2 >> 204 add %o1, 4, %o1 >> 205 st %g2, [%o0] >> 206 add %o0, 4, %o0 >> 207 1: >> 208 be 1f >> 209 andcc %g1, 1, %g0 >> 210 >> 211 lduh [%o1], %g2 >> 212 add %o1, 2, %o1 >> 213 sth %g2, [%o0] >> 214 add %o0, 2, %o0 >> 215 1: >> 216 be 1f >> 217 nop >> 218 >> 219 ldub [%o1], %g2 >> 220 stb %g2, [%o0] >> 221 1: >> 222 retl >> 223 mov %g7, %o0 >> 224 >> 225 82: /* ldd_std */ >> 226 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 227 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 228 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 229 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 230 subcc %g1, 128, %g1 >> 231 add %o1, 128, %o1 >> 232 cmp %g1, 128 >> 233 bge 82b >> 234 add %o0, 128, %o0 >> 235 >> 236 andcc %g1, 0x70, %g4 >> 237 be 84f >> 238 andcc %g1, 8, %g0 >> 239 >> 240 sethi %hi(84f), %o5 >> 241 add %o1, %g4, %o1 >> 242 sub %o5, %g4, %o5 >> 243 jmpl %o5 + %lo(84f), %g0 >> 244 add %o0, %g4, %o0 >> 245 >> 246 83: /* amemcpy_table */ >> 247 >> 248 MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 249 MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 250 MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 251 MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 252 MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 253 MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 254 MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 255 >> 256 84: /* amemcpy_table_end */ >> 257 be 85f >> 258 andcc %g1, 4, %g0 >> 259 >> 260 ldd [%o1], %g2 >> 261 add %o0, 8, %o0 >> 262 std %g2, [%o0 - 0x08] >> 263 add %o1, 8, %o1 >> 264 85: /* amemcpy_last7 */ >> 265 be 1f >> 266 andcc %g1, 2, %g0 >> 267 >> 268 ld [%o1], %g2 >> 269 add %o1, 4, %o1 >> 270 st %g2, [%o0] >> 271 add %o0, 4, %o0 >> 272 1: >> 273 be 1f >> 274 andcc %g1, 1, %g0 >> 275 >> 276 lduh [%o1], %g2 >> 277 add %o1, 2, %o1 >> 278 sth %g2, [%o0] >> 279 add %o0, 2, %o0 >> 280 1: >> 281 be 1f >> 282 nop >> 283 >> 284 ldub [%o1], %g2 >> 285 stb %g2, [%o0] >> 286 1: >> 287 retl >> 288 mov %g7, %o0 >> 289 >> 290 86: /* non_aligned */ >> 291 cmp %o2, 6 >> 292 bleu 88f >> 293 nop >> 294 >> 295 save %sp, -96, %sp >> 296 andcc %i0, 3, %g0 >> 297 be 61f >> 298 andcc %i0, 1, %g0 >> 299 be 60f >> 300 andcc %i0, 2, %g0 >> 301 >> 302 ldub [%i1], %g5 >> 303 add %i1, 1, %i1 >> 304 stb %g5, [%i0] >> 305 sub %i2, 1, %i2 >> 306 bne 61f >> 307 add %i0, 1, %i0 >> 308 60: >> 309 ldub [%i1], %g3 >> 310 add %i1, 2, %i1 >> 311 stb %g3, [%i0] >> 312 sub %i2, 2, %i2 >> 313 ldub [%i1 - 1], %g3 >> 314 add %i0, 2, %i0 >> 315 stb %g3, [%i0 - 1] >> 316 61: >> 317 and %i1, 3, %g2 >> 318 and %i2, 0xc, %g3 >> 319 and %i1, -4, %i1 >> 320 cmp %g3, 4 >> 321 sll %g2, 3, %g4 >> 322 mov 32, %g2 >> 323 be 4f >> 324 sub %g2, %g4, %l0 >> 325 >> 326 blu 3f >> 327 cmp %g3, 0x8 >> 328 >> 329 be 2f >> 330 srl %i2, 2, %g3 >> 331 >> 332 ld [%i1], %i3 >> 333 add %i0, -8, %i0 >> 334 ld [%i1 + 4], %i4 >> 335 b 8f >> 336 add %g3, 1, %g3 >> 337 2: >> 338 ld [%i1], %i4 >> 339 add %i0, -12, %i0 >> 340 ld [%i1 + 4], %i5 >> 341 add %g3, 2, %g3 >> 342 b 9f >> 343 add %i1, -4, %i1 >> 344 3: >> 345 ld [%i1], %g1 >> 346 add %i0, -4, %i0 >> 347 ld [%i1 + 4], %i3 >> 348 srl %i2, 2, %g3 >> 349 b 7f >> 350 add %i1, 4, %i1 >> 351 4: >> 352 ld [%i1], %i5 >> 353 cmp %i2, 7 >> 354 ld [%i1 + 4], %g1 >> 355 srl %i2, 2, %g3 >> 356 bleu 10f >> 357 add %i1, 8, %i1 >> 358 >> 359 ld [%i1], %i3 >> 360 add %g3, -1, %g3 >> 361 5: >> 362 sll %i5, %g4, %g2 >> 363 srl %g1, %l0, %g5 >> 364 or %g2, %g5, %g2 >> 365 st %g2, [%i0] >> 366 7: >> 367 ld [%i1 + 4], %i4 >> 368 sll %g1, %g4, %g2 >> 369 srl %i3, %l0, %g5 >> 370 or %g2, %g5, %g2 >> 371 st %g2, [%i0 + 4] >> 372 8: >> 373 ld [%i1 + 8], %i5 >> 374 sll %i3, %g4, %g2 >> 375 srl %i4, %l0, %g5 >> 376 or %g2, %g5, %g2 >> 377 st %g2, [%i0 + 8] >> 378 9: >> 379 ld [%i1 + 12], %g1 >> 380 sll %i4, %g4, %g2 >> 381 srl %i5, %l0, %g5 >> 382 addcc %g3, -4, %g3 >> 383 or %g2, %g5, %g2 >> 384 add %i1, 16, %i1 >> 385 st %g2, [%i0 + 12] >> 386 add %i0, 16, %i0 >> 387 bne,a 5b >> 388 ld [%i1], %i3 >> 389 10: >> 390 sll %i5, %g4, %g2 >> 391 srl %g1, %l0, %g5 >> 392 srl %l0, 3, %g3 >> 393 or %g2, %g5, %g2 >> 394 sub %i1, %g3, %i1 >> 395 andcc %i2, 2, %g0 >> 396 st %g2, [%i0] >> 397 be 1f >> 398 andcc %i2, 1, %g0 >> 399 >> 400 ldub [%i1], %g2 >> 401 add %i1, 2, %i1 >> 402 stb %g2, [%i0 + 4] >> 403 add %i0, 2, %i0 >> 404 ldub [%i1 - 1], %g2 >> 405 stb %g2, [%i0 + 3] >> 406 1: >> 407 be 1f >> 408 nop >> 409 ldub [%i1], %g2 >> 410 stb %g2, [%i0 + 4] >> 411 1: >> 412 ret >> 413 restore %g7, %g0, %o0 >> 414 >> 415 88: /* short_end */ >> 416 >> 417 and %o2, 0xe, %o3 >> 418 20: >> 419 sethi %hi(89f), %o5 >> 420 sll %o3, 3, %o4 >> 421 add %o0, %o3, %o0 >> 422 sub %o5, %o4, %o5 >> 423 add %o1, %o3, %o1 >> 424 jmpl %o5 + %lo(89f), %g0 >> 425 andcc %o2, 1, %g0 >> 426 >> 427 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) >> 428 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) >> 429 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) >> 430 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) >> 431 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) >> 432 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) >> 433 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) >> 434 >> 435 89: /* short_table_end */ >> 436 >> 437 be 1f >> 438 nop >> 439 >> 440 ldub [%o1], %g2 >> 441 stb %g2, [%o0] >> 442 1: >> 443 retl >> 444 mov %g7, %o0 >> 445 >> 446 90: /* short_aligned_end */ >> 447 bne 88b >> 448 andcc %o2, 8, %g0 >> 449 >> 450 be 1f >> 451 andcc %o2, 4, %g0 >> 452 >> 453 ld [%o1 + 0x00], %g2 >> 454 ld [%o1 + 0x04], %g3 >> 455 add %o1, 8, %o1 >> 456 st %g2, [%o0 + 0x00] >> 457 st %g3, [%o0 + 0x04] >> 458 add %o0, 8, %o0 >> 459 1: >> 460 b 81b >> 461 mov %o2, %g1
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.