1 /* SPDX-License-Identifier: GPL-2.0-only */ !! 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* !! 2 /* memcpy.S: Sparc optimized memcpy and memmove code 3 * Copyright (C) 2013 Regents of the Universit !! 3 * Hand optimized from GNU libc's memcpy and memmove >> 4 * Copyright (C) 1991,1996 Free Software Foundation >> 5 * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi) >> 6 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) >> 7 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) >> 8 * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 4 */ 9 */ 5 10 6 #include <linux/linkage.h> !! 11 #include <linux/export.h> 7 #include <asm/asm.h> << 8 12 9 /* void *memcpy(void *, const void *, size_t) !! 13 #define FUNC(x) \ 10 SYM_FUNC_START(__memcpy) !! 14 .globl x; \ 11 move t6, a0 /* Preserve return value !! 15 .type x,@function; \ 12 !! 16 .align 4; \ 13 /* Defer to byte-oriented copy for sma !! 17 x: 14 sltiu a3, a2, 128 << 15 bnez a3, 4f << 16 /* Use word-oriented copy only if low- << 17 andi a3, t6, SZREG-1 << 18 andi a4, a1, SZREG-1 << 19 bne a3, a4, 4f << 20 << 21 beqz a3, 2f /* Skip if already aligne << 22 /* << 23 * Round to nearest double word-aligne << 24 * greater than or equal to start addr << 25 */ << 26 andi a3, a1, ~(SZREG-1) << 27 addi a3, a3, SZREG << 28 /* Handle initial misalignment */ << 29 sub a4, a3, a1 << 30 1: << 31 lb a5, 0(a1) << 32 addi a1, a1, 1 << 33 sb a5, 0(t6) << 34 addi t6, t6, 1 << 35 bltu a1, a3, 1b << 36 sub a2, a2, a4 /* Update count */ << 37 18 >> 19 /* Both these macros have to start with exactly the same insn */ >> 20 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 21 ldd [%src + (offset) + 0x00], %t0; \ >> 22 ldd [%src + (offset) + 0x08], %t2; \ >> 23 ldd [%src + (offset) + 0x10], %t4; \ >> 24 ldd [%src + (offset) + 0x18], %t6; \ >> 25 st %t0, [%dst + (offset) + 0x00]; \ >> 26 st %t1, [%dst + (offset) + 0x04]; \ >> 27 st %t2, [%dst + (offset) + 0x08]; \ >> 28 st %t3, [%dst + (offset) + 0x0c]; \ >> 29 st %t4, [%dst + (offset) + 0x10]; \ >> 30 st %t5, [%dst + (offset) + 0x14]; \ >> 31 st %t6, [%dst + (offset) + 0x18]; \ >> 32 st %t7, [%dst + (offset) + 0x1c]; >> 33 >> 34 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 35 ldd [%src + (offset) + 0x00], %t0; \ >> 36 ldd [%src + (offset) + 0x08], %t2; \ >> 37 ldd [%src + (offset) + 0x10], %t4; \ >> 38 ldd [%src + (offset) + 0x18], %t6; \ >> 39 std %t0, [%dst + (offset) + 0x00]; \ >> 40 std %t2, [%dst + (offset) + 0x08]; \ >> 41 std %t4, [%dst + (offset) + 0x10]; \ >> 42 std %t6, [%dst + (offset) + 0x18]; >> 43 >> 44 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ >> 45 ldd [%src - (offset) - 0x10], %t0; \ >> 46 ldd [%src - (offset) - 0x08], %t2; \ >> 47 st %t0, [%dst - (offset) - 0x10]; \ >> 48 st %t1, [%dst - (offset) - 0x0c]; \ >> 49 st %t2, [%dst - (offset) - 0x08]; \ >> 50 st %t3, [%dst - (offset) - 0x04]; >> 51 >> 52 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ >> 53 ldd [%src - (offset) - 0x10], %t0; \ >> 54 ldd [%src - (offset) - 0x08], %t2; \ >> 55 std %t0, [%dst - (offset) - 0x10]; \ >> 56 std %t2, [%dst - (offset) - 0x08]; >> 57 >> 58 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ >> 59 ldub [%src - (offset) - 0x02], %t0; \ >> 60 ldub [%src - (offset) - 0x01], %t1; \ >> 61 stb %t0, [%dst - (offset) - 0x02]; \ >> 62 stb %t1, [%dst - (offset) - 0x01]; >> 63 >> 64 .text >> 65 .align 4 >> 66 >> 67 FUNC(memmove) >> 68 EXPORT_SYMBOL(memmove) >> 69 cmp %o0, %o1 >> 70 mov %o0, %g7 >> 71 bleu 9f >> 72 sub %o0, %o1, %o4 >> 73 >> 74 add %o1, %o2, %o3 >> 75 cmp %o3, %o0 >> 76 bleu 0f >> 77 andcc %o4, 3, %o5 >> 78 >> 79 add %o1, %o2, %o1 >> 80 add %o0, %o2, %o0 >> 81 sub %o1, 1, %o1 >> 82 sub %o0, 1, %o0 >> 83 >> 84 1: /* reverse_bytes */ >> 85 >> 86 ldub [%o1], %o4 >> 87 subcc %o2, 1, %o2 >> 88 stb %o4, [%o0] >> 89 sub %o1, 1, %o1 >> 90 bne 1b >> 91 sub %o0, 1, %o0 >> 92 >> 93 retl >> 94 mov %g7, %o0 >> 95 >> 96 /* NOTE: This code is executed just for the cases, >> 97 where %src (=%o1) & 3 is != 0. >> 98 We need to align it to 4. So, for (%src & 3) >> 99 1 we need to do ldub,lduh >> 100 2 lduh >> 101 3 just ldub >> 102 so even if it looks weird, the branches >> 103 are correct here. -jj >> 104 */ >> 105 78: /* dword_align */ >> 106 >> 107 andcc %o1, 1, %g0 >> 108 be 4f >> 109 andcc %o1, 2, %g0 >> 110 >> 111 ldub [%o1], %g2 >> 112 add %o1, 1, %o1 >> 113 stb %g2, [%o0] >> 114 sub %o2, 1, %o2 >> 115 bne 3f >> 116 add %o0, 1, %o0 >> 117 4: >> 118 lduh [%o1], %g2 >> 119 add %o1, 2, %o1 >> 120 sth %g2, [%o0] >> 121 sub %o2, 2, %o2 >> 122 b 3f >> 123 add %o0, 2, %o0 >> 124 >> 125 FUNC(memcpy) /* %o0=dst %o1=src %o2=len */ >> 126 EXPORT_SYMBOL(memcpy) >> 127 >> 128 sub %o0, %o1, %o4 >> 129 mov %o0, %g7 >> 130 9: >> 131 andcc %o4, 3, %o5 >> 132 0: >> 133 bne 86f >> 134 cmp %o2, 15 >> 135 >> 136 bleu 90f >> 137 andcc %o1, 3, %g0 >> 138 >> 139 bne 78b >> 140 3: >> 141 andcc %o1, 4, %g0 >> 142 >> 143 be 2f >> 144 mov %o2, %g1 >> 145 >> 146 ld [%o1], %o4 >> 147 sub %g1, 4, %g1 >> 148 st %o4, [%o0] >> 149 add %o1, 4, %o1 >> 150 add %o0, 4, %o0 38 2: 151 2: 39 andi a4, a2, ~((16*SZREG)-1) !! 152 andcc %g1, 0xffffff80, %g0 40 beqz a4, 4f !! 153 be 3f 41 add a3, a1, a4 !! 154 andcc %o0, 4, %g0 >> 155 >> 156 be 82f + 4 >> 157 5: >> 158 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 159 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 160 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 161 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 162 sub %g1, 128, %g1 >> 163 add %o1, 128, %o1 >> 164 cmp %g1, 128 >> 165 bge 5b >> 166 add %o0, 128, %o0 42 3: 167 3: 43 REG_L a4, 0(a1) !! 168 andcc %g1, 0x70, %g4 44 REG_L a5, SZREG(a1) !! 169 be 80f 45 REG_L a6, 2*SZREG(a1) !! 170 andcc %g1, 8, %g0 46 REG_L a7, 3*SZREG(a1) !! 171 47 REG_L t0, 4*SZREG(a1) !! 172 sethi %hi(80f), %o5 48 REG_L t1, 5*SZREG(a1) !! 173 srl %g4, 1, %o4 49 REG_L t2, 6*SZREG(a1) !! 174 add %g4, %o4, %o4 50 REG_L t3, 7*SZREG(a1) !! 175 add %o1, %g4, %o1 51 REG_L t4, 8*SZREG(a1) !! 176 sub %o5, %o4, %o5 52 REG_L t5, 9*SZREG(a1) !! 177 jmpl %o5 + %lo(80f), %g0 53 REG_S a4, 0(t6) !! 178 add %o0, %g4, %o0 54 REG_S a5, SZREG(t6) !! 179 55 REG_S a6, 2*SZREG(t6) !! 180 79: /* memcpy_table */ 56 REG_S a7, 3*SZREG(t6) !! 181 57 REG_S t0, 4*SZREG(t6) !! 182 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) 58 REG_S t1, 5*SZREG(t6) !! 183 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) 59 REG_S t2, 6*SZREG(t6) !! 184 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) 60 REG_S t3, 7*SZREG(t6) !! 185 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) 61 REG_S t4, 8*SZREG(t6) !! 186 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) 62 REG_S t5, 9*SZREG(t6) !! 187 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) 63 REG_L a4, 10*SZREG(a1) !! 188 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 64 REG_L a5, 11*SZREG(a1) !! 189 65 REG_L a6, 12*SZREG(a1) !! 190 80: /* memcpy_table_end */ 66 REG_L a7, 13*SZREG(a1) !! 191 be 81f 67 REG_L t0, 14*SZREG(a1) !! 192 andcc %g1, 4, %g0 68 REG_L t1, 15*SZREG(a1) !! 193 69 addi a1, a1, 16*SZREG !! 194 ldd [%o1], %g2 70 REG_S a4, 10*SZREG(t6) !! 195 add %o0, 8, %o0 71 REG_S a5, 11*SZREG(t6) !! 196 st %g2, [%o0 - 0x08] 72 REG_S a6, 12*SZREG(t6) !! 197 add %o1, 8, %o1 73 REG_S a7, 13*SZREG(t6) !! 198 st %g3, [%o0 - 0x04] 74 REG_S t0, 14*SZREG(t6) !! 199 75 REG_S t1, 15*SZREG(t6) !! 200 81: /* memcpy_last7 */ 76 addi t6, t6, 16*SZREG !! 201 77 bltu a1, a3, 3b !! 202 be 1f 78 andi a2, a2, (16*SZREG)-1 /* Update c !! 203 andcc %g1, 2, %g0 >> 204 >> 205 ld [%o1], %g2 >> 206 add %o1, 4, %o1 >> 207 st %g2, [%o0] >> 208 add %o0, 4, %o0 >> 209 1: >> 210 be 1f >> 211 andcc %g1, 1, %g0 >> 212 >> 213 lduh [%o1], %g2 >> 214 add %o1, 2, %o1 >> 215 sth %g2, [%o0] >> 216 add %o0, 2, %o0 >> 217 1: >> 218 be 1f >> 219 nop >> 220 >> 221 ldub [%o1], %g2 >> 222 stb %g2, [%o0] >> 223 1: >> 224 retl >> 225 mov %g7, %o0 >> 226 >> 227 82: /* ldd_std */ >> 228 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 229 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 230 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 231 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 232 subcc %g1, 128, %g1 >> 233 add %o1, 128, %o1 >> 234 cmp %g1, 128 >> 235 bge 82b >> 236 add %o0, 128, %o0 >> 237 >> 238 andcc %g1, 0x70, %g4 >> 239 be 84f >> 240 andcc %g1, 8, %g0 >> 241 >> 242 sethi %hi(84f), %o5 >> 243 add %o1, %g4, %o1 >> 244 sub %o5, %g4, %o5 >> 245 jmpl %o5 + %lo(84f), %g0 >> 246 add %o0, %g4, %o0 >> 247 >> 248 83: /* amemcpy_table */ >> 249 >> 250 MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 251 MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 252 MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 253 MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 254 MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 255 MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 256 MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 257 >> 258 84: /* amemcpy_table_end */ >> 259 be 85f >> 260 andcc %g1, 4, %g0 >> 261 >> 262 ldd [%o1], %g2 >> 263 add %o0, 8, %o0 >> 264 std %g2, [%o0 - 0x08] >> 265 add %o1, 8, %o1 >> 266 85: /* amemcpy_last7 */ >> 267 be 1f >> 268 andcc %g1, 2, %g0 >> 269 >> 270 ld [%o1], %g2 >> 271 add %o1, 4, %o1 >> 272 st %g2, [%o0] >> 273 add %o0, 4, %o0 >> 274 1: >> 275 be 1f >> 276 andcc %g1, 1, %g0 >> 277 >> 278 lduh [%o1], %g2 >> 279 add %o1, 2, %o1 >> 280 sth %g2, [%o0] >> 281 add %o0, 2, %o0 >> 282 1: >> 283 be 1f >> 284 nop >> 285 >> 286 ldub [%o1], %g2 >> 287 stb %g2, [%o0] >> 288 1: >> 289 retl >> 290 mov %g7, %o0 >> 291 >> 292 86: /* non_aligned */ >> 293 cmp %o2, 6 >> 294 bleu 88f >> 295 nop >> 296 >> 297 save %sp, -96, %sp >> 298 andcc %i0, 3, %g0 >> 299 be 61f >> 300 andcc %i0, 1, %g0 >> 301 be 60f >> 302 andcc %i0, 2, %g0 >> 303 >> 304 ldub [%i1], %g5 >> 305 add %i1, 1, %i1 >> 306 stb %g5, [%i0] >> 307 sub %i2, 1, %i2 >> 308 bne 61f >> 309 add %i0, 1, %i0 >> 310 60: >> 311 ldub [%i1], %g3 >> 312 add %i1, 2, %i1 >> 313 stb %g3, [%i0] >> 314 sub %i2, 2, %i2 >> 315 ldub [%i1 - 1], %g3 >> 316 add %i0, 2, %i0 >> 317 stb %g3, [%i0 - 1] >> 318 61: >> 319 and %i1, 3, %g2 >> 320 and %i2, 0xc, %g3 >> 321 and %i1, -4, %i1 >> 322 cmp %g3, 4 >> 323 sll %g2, 3, %g4 >> 324 mov 32, %g2 >> 325 be 4f >> 326 sub %g2, %g4, %l0 >> 327 >> 328 blu 3f >> 329 cmp %g3, 0x8 >> 330 >> 331 be 2f >> 332 srl %i2, 2, %g3 79 333 >> 334 ld [%i1], %i3 >> 335 add %i0, -8, %i0 >> 336 ld [%i1 + 4], %i4 >> 337 b 8f >> 338 add %g3, 1, %g3 >> 339 2: >> 340 ld [%i1], %i4 >> 341 add %i0, -12, %i0 >> 342 ld [%i1 + 4], %i5 >> 343 add %g3, 2, %g3 >> 344 b 9f >> 345 add %i1, -4, %i1 >> 346 3: >> 347 ld [%i1], %g1 >> 348 add %i0, -4, %i0 >> 349 ld [%i1 + 4], %i3 >> 350 srl %i2, 2, %g3 >> 351 b 7f >> 352 add %i1, 4, %i1 80 4: 353 4: 81 /* Handle trailing misalignment */ !! 354 ld [%i1], %i5 82 beqz a2, 6f !! 355 cmp %i2, 7 83 add a3, a1, a2 !! 356 ld [%i1 + 4], %g1 84 !! 357 srl %i2, 2, %g3 85 /* Use word-oriented copy if co-aligne !! 358 bleu 10f 86 or a5, a1, t6 !! 359 add %i1, 8, %i1 87 or a5, a5, a3 !! 360 88 andi a5, a5, 3 !! 361 ld [%i1], %i3 89 bnez a5, 5f !! 362 add %g3, -1, %g3 >> 363 5: >> 364 sll %i5, %g4, %g2 >> 365 srl %g1, %l0, %g5 >> 366 or %g2, %g5, %g2 >> 367 st %g2, [%i0] 90 7: 368 7: 91 lw a4, 0(a1) !! 369 ld [%i1 + 4], %i4 92 addi a1, a1, 4 !! 370 sll %g1, %g4, %g2 93 sw a4, 0(t6) !! 371 srl %i3, %l0, %g5 94 addi t6, t6, 4 !! 372 or %g2, %g5, %g2 95 bltu a1, a3, 7b !! 373 st %g2, [%i0 + 4] >> 374 8: >> 375 ld [%i1 + 8], %i5 >> 376 sll %i3, %g4, %g2 >> 377 srl %i4, %l0, %g5 >> 378 or %g2, %g5, %g2 >> 379 st %g2, [%i0 + 8] >> 380 9: >> 381 ld [%i1 + 12], %g1 >> 382 sll %i4, %g4, %g2 >> 383 srl %i5, %l0, %g5 >> 384 addcc %g3, -4, %g3 >> 385 or %g2, %g5, %g2 >> 386 add %i1, 16, %i1 >> 387 st %g2, [%i0 + 12] >> 388 add %i0, 16, %i0 >> 389 bne,a 5b >> 390 ld [%i1], %i3 >> 391 10: >> 392 sll %i5, %g4, %g2 >> 393 srl %g1, %l0, %g5 >> 394 srl %l0, 3, %g3 >> 395 or %g2, %g5, %g2 >> 396 sub %i1, %g3, %i1 >> 397 andcc %i2, 2, %g0 >> 398 st %g2, [%i0] >> 399 be 1f >> 400 andcc %i2, 1, %g0 96 401 >> 402 ldub [%i1], %g2 >> 403 add %i1, 2, %i1 >> 404 stb %g2, [%i0 + 4] >> 405 add %i0, 2, %i0 >> 406 ldub [%i1 - 1], %g2 >> 407 stb %g2, [%i0 + 3] >> 408 1: >> 409 be 1f >> 410 nop >> 411 ldub [%i1], %g2 >> 412 stb %g2, [%i0 + 4] >> 413 1: 97 ret 414 ret >> 415 restore %g7, %g0, %o0 98 416 99 5: !! 417 88: /* short_end */ 100 lb a4, 0(a1) !! 418 101 addi a1, a1, 1 !! 419 and %o2, 0xe, %o3 102 sb a4, 0(t6) !! 420 20: 103 addi t6, t6, 1 !! 421 sethi %hi(89f), %o5 104 bltu a1, a3, 5b !! 422 sll %o3, 3, %o4 105 6: !! 423 add %o0, %o3, %o0 106 ret !! 424 sub %o5, %o4, %o5 107 SYM_FUNC_END(__memcpy) !! 425 add %o1, %o3, %o1 108 SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) !! 426 jmpl %o5 + %lo(89f), %g0 109 SYM_FUNC_ALIAS(__pi_memcpy, __memcpy) !! 427 andcc %o2, 1, %g0 110 SYM_FUNC_ALIAS(__pi___memcpy, __memcpy) !! 428 >> 429 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) >> 430 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) >> 431 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) >> 432 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) >> 433 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) >> 434 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) >> 435 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) >> 436 >> 437 89: /* short_table_end */ >> 438 >> 439 be 1f >> 440 nop >> 441 >> 442 ldub [%o1], %g2 >> 443 stb %g2, [%o0] >> 444 1: >> 445 retl >> 446 mov %g7, %o0 >> 447 >> 448 90: /* short_aligned_end */ >> 449 bne 88b >> 450 andcc %o2, 8, %g0 >> 451 >> 452 be 1f >> 453 andcc %o2, 4, %g0 >> 454 >> 455 ld [%o1 + 0x00], %g2 >> 456 ld [%o1 + 0x04], %g3 >> 457 add %o1, 8, %o1 >> 458 st %g2, [%o0 + 0x00] >> 459 st %g3, [%o0 + 0x04] >> 460 add %o0, 8, %o0 >> 461 1: >> 462 b 81b >> 463 mov %o2, %g1
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.