1 /* SPDX-License-Identifier: GPL-2.0-only */ << 2 /* 1 /* 3 * Copyright (C) 2013 Regents of the Universit !! 2 * This file is subject to the terms and conditions of the GNU General Public >> 3 * License. See the file "COPYING" in the main directory of this archive >> 4 * for more details. >> 5 * >> 6 * Unified implementation of memcpy, memmove and the __copy_user backend. >> 7 * >> 8 * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org) >> 9 * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. >> 10 * Copyright (C) 2002 Broadcom, Inc. >> 11 * memcpy/copy_user author: Mark Vandevoorde >> 12 * Copyright (C) 2007 Maciej W. Rozycki >> 13 * >> 14 * Mnemonic names for arguments to memcpy/__copy_user 4 */ 15 */ 5 16 6 #include <linux/linkage.h> !! 17 /* >> 18 * Hack to resolve longstanding prefetch issue >> 19 * >> 20 * Prefetching may be fatal on some systems if we're prefetching beyond the >> 21 * end of memory on some systems. It's also a seriously bad idea on non >> 22 * dma-coherent systems. >> 23 */ >> 24 #ifdef CONFIG_DMA_NONCOHERENT >> 25 #undef CONFIG_CPU_HAS_PREFETCH >> 26 #endif >> 27 #ifdef CONFIG_MIPS_MALTA >> 28 #undef CONFIG_CPU_HAS_PREFETCH >> 29 #endif >> 30 7 #include <asm/asm.h> 31 #include <asm/asm.h> >> 32 #include <asm/asm-offsets.h> >> 33 #include <asm/regdef.h> >> 34 >> 35 #define dst a0 >> 36 #define src a1 >> 37 #define len a2 >> 38 >> 39 /* >> 40 * Spec >> 41 * >> 42 * memcpy copies len bytes from src to dst and sets v0 to dst. >> 43 * It assumes that >> 44 * - src and dst don't overlap >> 45 * - src is readable >> 46 * - dst is writable >> 47 * memcpy uses the standard calling convention >> 48 * >> 49 * __copy_user copies up to len bytes from src to dst and sets a2 (len) to >> 50 * the number of uncopied bytes due to an exception caused by a read or write. >> 51 * __copy_user assumes that src and dst don't overlap, and that the call is >> 52 * implementing one of the following: >> 53 * copy_to_user >> 54 * - src is readable (no exceptions when reading src) >> 55 * copy_from_user >> 56 * - dst is writable (no exceptions when writing dst) >> 57 * __copy_user uses a non-standard calling convention; see >> 58 * include/asm-mips/uaccess.h >> 59 * >> 60 * When an exception happens on a load, the handler must >> 61 # ensure that all of the destination buffer is overwritten to prevent >> 62 * leaking information to user mode programs. >> 63 */ >> 64 >> 65 /* >> 66 * Implementation >> 67 */ 8 68 9 /* void *memcpy(void *, const void *, size_t) !! 69 /* 10 SYM_FUNC_START(__memcpy) !! 70 * The exception handler for loads requires that: 11 move t6, a0 /* Preserve return value !! 71 * 1- AT contain the address of the byte just past the end of the source 12 !! 72 * of the copy, 13 /* Defer to byte-oriented copy for sma !! 73 * 2- src_entry <= src < AT, and 14 sltiu a3, a2, 128 !! 74 * 3- (dst - src) == (dst_entry - src_entry), 15 bnez a3, 4f !! 75 * The _entry suffix denotes values when __copy_user was called. 16 /* Use word-oriented copy only if low- !! 76 * 17 andi a3, t6, SZREG-1 !! 77 * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user 18 andi a4, a1, SZREG-1 !! 78 * (2) is met by incrementing src by the number of bytes copied 19 bne a3, a4, 4f !! 79 * (3) is met by not doing loads between a pair of increments of dst and src 20 !! 80 * 21 beqz a3, 2f /* Skip if already aligne !! 81 * The exception handlers for stores adjust len (if necessary) and return. 22 /* !! 82 * These handlers do not need to overwrite any data. 23 * Round to nearest double word-aligne !! 83 * 24 * greater than or equal to start addr !! 84 * For __rmemcpy and memmove an exception is always a kernel bug, therefore 25 */ !! 85 * they're not protected. 26 andi a3, a1, ~(SZREG-1) !! 86 */ 27 addi a3, a3, SZREG !! 87 28 /* Handle initial misalignment */ !! 88 #define EXC(inst_reg,addr,handler) \ 29 sub a4, a3, a1 !! 89 9: inst_reg, addr; \ >> 90 .section __ex_table,"a"; \ >> 91 PTR 9b, handler; \ >> 92 .previous >> 93 >> 94 /* >> 95 * Only on the 64-bit kernel we can made use of 64-bit registers. >> 96 */ >> 97 #ifdef CONFIG_64BIT >> 98 #define USE_DOUBLE >> 99 #endif >> 100 >> 101 #ifdef USE_DOUBLE >> 102 >> 103 #define LOAD ld >> 104 #define LOADL ldl >> 105 #define LOADR ldr >> 106 #define STOREL sdl >> 107 #define STORER sdr >> 108 #define STORE sd >> 109 #define ADD daddu >> 110 #define SUB dsubu >> 111 #define SRL dsrl >> 112 #define SRA dsra >> 113 #define SLL dsll >> 114 #define SLLV dsllv >> 115 #define SRLV dsrlv >> 116 #define NBYTES 8 >> 117 #define LOG_NBYTES 3 >> 118 >> 119 /* >> 120 * As we are sharing code base with the mips32 tree (which use the o32 ABI >> 121 * register definitions). We need to redefine the register definitions from >> 122 * the n64 ABI register naming to the o32 ABI register naming. >> 123 */ >> 124 #undef t0 >> 125 #undef t1 >> 126 #undef t2 >> 127 #undef t3 >> 128 #define t0 $8 >> 129 #define t1 $9 >> 130 #define t2 $10 >> 131 #define t3 $11 >> 132 #define t4 $12 >> 133 #define t5 $13 >> 134 #define t6 $14 >> 135 #define t7 $15 >> 136 >> 137 #else >> 138 >> 139 #define LOAD lw >> 140 #define LOADL lwl >> 141 #define LOADR lwr >> 142 #define STOREL swl >> 143 #define STORER swr >> 144 #define STORE sw >> 145 #define ADD addu >> 146 #define SUB subu >> 147 #define SRL srl >> 148 #define SLL sll >> 149 #define SRA sra >> 150 #define SLLV sllv >> 151 #define SRLV srlv >> 152 #define NBYTES 4 >> 153 #define LOG_NBYTES 2 >> 154 >> 155 #endif /* USE_DOUBLE */ >> 156 >> 157 #ifdef CONFIG_CPU_LITTLE_ENDIAN >> 158 #define LDFIRST LOADR >> 159 #define LDREST LOADL >> 160 #define STFIRST STORER >> 161 #define STREST STOREL >> 162 #define SHIFT_DISCARD SLLV >> 163 #else >> 164 #define LDFIRST LOADL >> 165 #define LDREST LOADR >> 166 #define STFIRST STOREL >> 167 #define STREST STORER >> 168 #define SHIFT_DISCARD SRLV >> 169 #endif >> 170 >> 171 #define FIRST(unit) ((unit)*NBYTES) >> 172 #define REST(unit) (FIRST(unit)+NBYTES-1) >> 173 #define UNIT(unit) FIRST(unit) >> 174 >> 175 #define ADDRMASK (NBYTES-1) >> 176 >> 177 .text >> 178 .set noreorder >> 179 #ifndef CONFIG_CPU_DADDI_WORKAROUNDS >> 180 .set noat >> 181 #else >> 182 .set at=v1 >> 183 #endif >> 184 >> 185 /* >> 186 * A combined memcpy/__copy_user >> 187 * __copy_user sets len to 0 for success; else to an upper bound of >> 188 * the number of uncopied bytes. >> 189 * memcpy sets v0 to dst. >> 190 */ >> 191 .align 5 >> 192 LEAF(memcpy) /* a0=dst a1=src a2=len */ >> 193 move v0, dst /* return value */ >> 194 .L__memcpy: >> 195 FEXPORT(__copy_user) >> 196 /* >> 197 * Note: dst & src may be unaligned, len may be 0 >> 198 * Temps >> 199 */ >> 200 #define rem t8 >> 201 >> 202 R10KCBARRIER(0(ra)) >> 203 /* >> 204 * The "issue break"s below are very approximate. >> 205 * Issue delays for dcache fills will perturb the schedule, as will >> 206 * load queue full replay traps, etc. >> 207 * >> 208 * If len < NBYTES use byte operations. >> 209 */ >> 210 PREF( 0, 0(src) ) >> 211 PREF( 1, 0(dst) ) >> 212 sltu t2, len, NBYTES >> 213 and t1, dst, ADDRMASK >> 214 PREF( 0, 1*32(src) ) >> 215 PREF( 1, 1*32(dst) ) >> 216 bnez t2, .Lcopy_bytes_checklen >> 217 and t0, src, ADDRMASK >> 218 PREF( 0, 2*32(src) ) >> 219 PREF( 1, 2*32(dst) ) >> 220 bnez t1, .Ldst_unaligned >> 221 nop >> 222 bnez t0, .Lsrc_unaligned_dst_aligned >> 223 /* >> 224 * use delay slot for fall-through >> 225 * src and dst are aligned; need to compute rem >> 226 */ >> 227 .Lboth_aligned: >> 228 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter >> 229 beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES >> 230 and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) >> 231 PREF( 0, 3*32(src) ) >> 232 PREF( 1, 3*32(dst) ) >> 233 .align 4 30 1: 234 1: 31 lb a5, 0(a1) !! 235 R10KCBARRIER(0(ra)) 32 addi a1, a1, 1 !! 236 EXC( LOAD t0, UNIT(0)(src), .Ll_exc) 33 sb a5, 0(t6) !! 237 EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) 34 addi t6, t6, 1 !! 238 EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) 35 bltu a1, a3, 1b !! 239 EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) 36 sub a2, a2, a4 /* Update count */ !! 240 SUB len, len, 8*NBYTES 37 !! 241 EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy) 38 2: !! 242 EXC( LOAD t7, UNIT(5)(src), .Ll_exc_copy) 39 andi a4, a2, ~((16*SZREG)-1) !! 243 EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p8u) 40 beqz a4, 4f !! 244 EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p7u) 41 add a3, a1, a4 !! 245 EXC( LOAD t0, UNIT(6)(src), .Ll_exc_copy) 42 3: !! 246 EXC( LOAD t1, UNIT(7)(src), .Ll_exc_copy) 43 REG_L a4, 0(a1) !! 247 ADD src, src, 8*NBYTES 44 REG_L a5, SZREG(a1) !! 248 ADD dst, dst, 8*NBYTES 45 REG_L a6, 2*SZREG(a1) !! 249 EXC( STORE t2, UNIT(-6)(dst), .Ls_exc_p6u) 46 REG_L a7, 3*SZREG(a1) !! 250 EXC( STORE t3, UNIT(-5)(dst), .Ls_exc_p5u) 47 REG_L t0, 4*SZREG(a1) !! 251 EXC( STORE t4, UNIT(-4)(dst), .Ls_exc_p4u) 48 REG_L t1, 5*SZREG(a1) !! 252 EXC( STORE t7, UNIT(-3)(dst), .Ls_exc_p3u) 49 REG_L t2, 6*SZREG(a1) !! 253 EXC( STORE t0, UNIT(-2)(dst), .Ls_exc_p2u) 50 REG_L t3, 7*SZREG(a1) !! 254 EXC( STORE t1, UNIT(-1)(dst), .Ls_exc_p1u) 51 REG_L t4, 8*SZREG(a1) !! 255 PREF( 0, 8*32(src) ) 52 REG_L t5, 9*SZREG(a1) !! 256 PREF( 1, 8*32(dst) ) 53 REG_S a4, 0(t6) !! 257 bne len, rem, 1b 54 REG_S a5, SZREG(t6) !! 258 nop 55 REG_S a6, 2*SZREG(t6) !! 259 56 REG_S a7, 3*SZREG(t6) !! 260 /* 57 REG_S t0, 4*SZREG(t6) !! 261 * len == rem == the number of bytes left to copy < 8*NBYTES 58 REG_S t1, 5*SZREG(t6) !! 262 */ 59 REG_S t2, 6*SZREG(t6) !! 263 .Lcleanup_both_aligned: 60 REG_S t3, 7*SZREG(t6) !! 264 beqz len, .Ldone 61 REG_S t4, 8*SZREG(t6) !! 265 sltu t0, len, 4*NBYTES 62 REG_S t5, 9*SZREG(t6) !! 266 bnez t0, .Lless_than_4units 63 REG_L a4, 10*SZREG(a1) !! 267 and rem, len, (NBYTES-1) # rem = len % NBYTES 64 REG_L a5, 11*SZREG(a1) !! 268 /* 65 REG_L a6, 12*SZREG(a1) !! 269 * len >= 4*NBYTES 66 REG_L a7, 13*SZREG(a1) !! 270 */ 67 REG_L t0, 14*SZREG(a1) !! 271 EXC( LOAD t0, UNIT(0)(src), .Ll_exc) 68 REG_L t1, 15*SZREG(a1) !! 272 EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) 69 addi a1, a1, 16*SZREG !! 273 EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) 70 REG_S a4, 10*SZREG(t6) !! 274 EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) 71 REG_S a5, 11*SZREG(t6) !! 275 SUB len, len, 4*NBYTES 72 REG_S a6, 12*SZREG(t6) !! 276 ADD src, src, 4*NBYTES 73 REG_S a7, 13*SZREG(t6) !! 277 R10KCBARRIER(0(ra)) 74 REG_S t0, 14*SZREG(t6) !! 278 EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u) 75 REG_S t1, 15*SZREG(t6) !! 279 EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u) 76 addi t6, t6, 16*SZREG !! 280 EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u) 77 bltu a1, a3, 3b !! 281 EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u) 78 andi a2, a2, (16*SZREG)-1 /* Update c !! 282 .set reorder /* DADDI_WAR */ 79 !! 283 ADD dst, dst, 4*NBYTES 80 4: !! 284 beqz len, .Ldone 81 /* Handle trailing misalignment */ !! 285 .set noreorder 82 beqz a2, 6f !! 286 .Lless_than_4units: 83 add a3, a1, a2 !! 287 /* 84 !! 288 * rem = len % NBYTES 85 /* Use word-oriented copy if co-aligne !! 289 */ 86 or a5, a1, t6 !! 290 beq rem, len, .Lcopy_bytes 87 or a5, a5, a3 !! 291 nop 88 andi a5, a5, 3 !! 292 1: 89 bnez a5, 5f !! 293 R10KCBARRIER(0(ra)) 90 7: !! 294 EXC( LOAD t0, 0(src), .Ll_exc) 91 lw a4, 0(a1) !! 295 ADD src, src, NBYTES 92 addi a1, a1, 4 !! 296 SUB len, len, NBYTES 93 sw a4, 0(t6) !! 297 EXC( STORE t0, 0(dst), .Ls_exc_p1u) 94 addi t6, t6, 4 !! 298 .set reorder /* DADDI_WAR */ 95 bltu a1, a3, 7b !! 299 ADD dst, dst, NBYTES 96 !! 300 bne rem, len, 1b 97 ret !! 301 .set noreorder 98 !! 302 99 5: !! 303 /* 100 lb a4, 0(a1) !! 304 * src and dst are aligned, need to copy rem bytes (rem < NBYTES) 101 addi a1, a1, 1 !! 305 * A loop would do only a byte at a time with possible branch 102 sb a4, 0(t6) !! 306 * mispredicts. Can't do an explicit LOAD dst,mask,or,STORE 103 addi t6, t6, 1 !! 307 * because can't assume read-access to dst. Instead, use 104 bltu a1, a3, 5b !! 308 * STREST dst, which doesn't require read access to dst. 105 6: !! 309 * 106 ret !! 310 * This code should perform better than a simple loop on modern, 107 SYM_FUNC_END(__memcpy) !! 311 * wide-issue mips processors because the code has fewer branches and 108 SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) !! 312 * more instruction-level parallelism. 109 SYM_FUNC_ALIAS(__pi_memcpy, __memcpy) !! 313 */ 110 SYM_FUNC_ALIAS(__pi___memcpy, __memcpy) !! 314 #define bits t2 >> 315 beqz len, .Ldone >> 316 ADD t1, dst, len # t1 is just past last byte of dst >> 317 li bits, 8*NBYTES >> 318 SLL rem, len, 3 # rem = number of bits to keep >> 319 EXC( LOAD t0, 0(src), .Ll_exc) >> 320 SUB bits, bits, rem # bits = number of bits to discard >> 321 SHIFT_DISCARD t0, t0, bits >> 322 EXC( STREST t0, -1(t1), .Ls_exc) >> 323 jr ra >> 324 move len, zero >> 325 .Ldst_unaligned: >> 326 /* >> 327 * dst is unaligned >> 328 * t0 = src & ADDRMASK >> 329 * t1 = dst & ADDRMASK; T1 > 0 >> 330 * len >= NBYTES >> 331 * >> 332 * Copy enough bytes to align dst >> 333 * Set match = (src and dst have same alignment) >> 334 */ >> 335 #define match rem >> 336 EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) >> 337 ADD t2, zero, NBYTES >> 338 EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) >> 339 SUB t2, t2, t1 # t2 = number of bytes copied >> 340 xor match, t0, t1 >> 341 R10KCBARRIER(0(ra)) >> 342 EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc) >> 343 beq len, t2, .Ldone >> 344 SUB len, len, t2 >> 345 ADD dst, dst, t2 >> 346 beqz match, .Lboth_aligned >> 347 ADD src, src, t2 >> 348 >> 349 .Lsrc_unaligned_dst_aligned: >> 350 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter >> 351 PREF( 0, 3*32(src) ) >> 352 beqz t0, .Lcleanup_src_unaligned >> 353 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES >> 354 PREF( 1, 3*32(dst) ) >> 355 1: >> 356 /* >> 357 * Avoid consecutive LD*'s to the same register since some mips >> 358 * implementations can't issue them in the same cycle. >> 359 * It's OK to load FIRST(N+1) before REST(N) because the two addresses >> 360 * are to the same unit (unless src is aligned, but it's not). >> 361 */ >> 362 R10KCBARRIER(0(ra)) >> 363 EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) >> 364 EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) >> 365 SUB len, len, 4*NBYTES >> 366 EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) >> 367 EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) >> 368 EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) >> 369 EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) >> 370 EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) >> 371 EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) >> 372 PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) >> 373 ADD src, src, 4*NBYTES >> 374 #ifdef CONFIG_CPU_SB1 >> 375 nop # improves slotting >> 376 #endif >> 377 EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u) >> 378 EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u) >> 379 EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u) >> 380 EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u) >> 381 PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) >> 382 .set reorder /* DADDI_WAR */ >> 383 ADD dst, dst, 4*NBYTES >> 384 bne len, rem, 1b >> 385 .set noreorder >> 386 >> 387 .Lcleanup_src_unaligned: >> 388 beqz len, .Ldone >> 389 and rem, len, NBYTES-1 # rem = len % NBYTES >> 390 beq rem, len, .Lcopy_bytes >> 391 nop >> 392 1: >> 393 R10KCBARRIER(0(ra)) >> 394 EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) >> 395 EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) >> 396 ADD src, src, NBYTES >> 397 SUB len, len, NBYTES >> 398 EXC( STORE t0, 0(dst), .Ls_exc_p1u) >> 399 .set reorder /* DADDI_WAR */ >> 400 ADD dst, dst, NBYTES >> 401 bne len, rem, 1b >> 402 .set noreorder >> 403 >> 404 .Lcopy_bytes_checklen: >> 405 beqz len, .Ldone >> 406 nop >> 407 .Lcopy_bytes: >> 408 /* 0 < len < NBYTES */ >> 409 R10KCBARRIER(0(ra)) >> 410 #define COPY_BYTE(N) \ >> 411 EXC( lb t0, N(src), .Ll_exc); \ >> 412 SUB len, len, 1; \ >> 413 beqz len, .Ldone; \ >> 414 EXC( sb t0, N(dst), .Ls_exc_p1) >> 415 >> 416 COPY_BYTE(0) >> 417 COPY_BYTE(1) >> 418 #ifdef USE_DOUBLE >> 419 COPY_BYTE(2) >> 420 COPY_BYTE(3) >> 421 COPY_BYTE(4) >> 422 COPY_BYTE(5) >> 423 #endif >> 424 EXC( lb t0, NBYTES-2(src), .Ll_exc) >> 425 SUB len, len, 1 >> 426 jr ra >> 427 EXC( sb t0, NBYTES-2(dst), .Ls_exc_p1) >> 428 .Ldone: >> 429 jr ra >> 430 nop >> 431 END(memcpy) >> 432 >> 433 .Ll_exc_copy: >> 434 /* >> 435 * Copy bytes from src until faulting load address (or until a >> 436 * lb faults) >> 437 * >> 438 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28) >> 439 * may be more than a byte beyond the last address. >> 440 * Hence, the lb below may get an exception. >> 441 * >> 442 * Assumes src < THREAD_BUADDR($28) >> 443 */ >> 444 LOAD t0, TI_TASK($28) >> 445 nop >> 446 LOAD t0, THREAD_BUADDR(t0) >> 447 1: >> 448 EXC( lb t1, 0(src), .Ll_exc) >> 449 ADD src, src, 1 >> 450 sb t1, 0(dst) # can't fault -- we're copy_from_user >> 451 .set reorder /* DADDI_WAR */ >> 452 ADD dst, dst, 1 >> 453 bne src, t0, 1b >> 454 .set noreorder >> 455 .Ll_exc: >> 456 LOAD t0, TI_TASK($28) >> 457 nop >> 458 LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address >> 459 nop >> 460 SUB len, AT, t0 # len number of uncopied bytes >> 461 /* >> 462 * Here's where we rely on src and dst being incremented in tandem, >> 463 * See (3) above. >> 464 * dst += (fault addr - src) to put dst at first byte to clear >> 465 */ >> 466 ADD dst, t0 # compute start address in a1 >> 467 SUB dst, src >> 468 /* >> 469 * Clear len bytes starting at dst. Can't call __bzero because it >> 470 * might modify len. An inefficient loop for these rare times... >> 471 */ >> 472 .set reorder /* DADDI_WAR */ >> 473 SUB src, len, 1 >> 474 beqz len, .Ldone >> 475 .set noreorder >> 476 1: sb zero, 0(dst) >> 477 ADD dst, dst, 1 >> 478 #ifndef CONFIG_CPU_DADDI_WORKAROUNDS >> 479 bnez src, 1b >> 480 SUB src, src, 1 >> 481 #else >> 482 .set push >> 483 .set noat >> 484 li v1, 1 >> 485 bnez src, 1b >> 486 SUB src, src, v1 >> 487 .set pop >> 488 #endif >> 489 jr ra >> 490 nop >> 491 >> 492 >> 493 #define SEXC(n) \ >> 494 .set reorder; /* DADDI_WAR */ \ >> 495 .Ls_exc_p ## n ## u: \ >> 496 ADD len, len, n*NBYTES; \ >> 497 jr ra; \ >> 498 .set noreorder >> 499 >> 500 SEXC(8) >> 501 SEXC(7) >> 502 SEXC(6) >> 503 SEXC(5) >> 504 SEXC(4) >> 505 SEXC(3) >> 506 SEXC(2) >> 507 SEXC(1) >> 508 >> 509 .Ls_exc_p1: >> 510 .set reorder /* DADDI_WAR */ >> 511 ADD len, len, 1 >> 512 jr ra >> 513 .set noreorder >> 514 .Ls_exc: >> 515 jr ra >> 516 nop >> 517 >> 518 .align 5 >> 519 LEAF(memmove) >> 520 ADD t0, a0, a2 >> 521 ADD t1, a1, a2 >> 522 sltu t0, a1, t0 # dst + len <= src -> memcpy >> 523 sltu t1, a0, t1 # dst >= src + len -> memcpy >> 524 and t0, t1 >> 525 beqz t0, .L__memcpy >> 526 move v0, a0 /* return value */ >> 527 beqz a2, .Lr_out >> 528 END(memmove) >> 529 >> 530 /* fall through to __rmemcpy */ >> 531 LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ >> 532 sltu t0, a1, a0 >> 533 beqz t0, .Lr_end_bytes_up # src >= dst >> 534 nop >> 535 ADD a0, a2 # dst = dst + len >> 536 ADD a1, a2 # src = src + len >> 537 >> 538 .Lr_end_bytes: >> 539 R10KCBARRIER(0(ra)) >> 540 lb t0, -1(a1) >> 541 SUB a2, a2, 0x1 >> 542 sb t0, -1(a0) >> 543 SUB a1, a1, 0x1 >> 544 .set reorder /* DADDI_WAR */ >> 545 SUB a0, a0, 0x1 >> 546 bnez a2, .Lr_end_bytes >> 547 .set noreorder >> 548 >> 549 .Lr_out: >> 550 jr ra >> 551 move a2, zero >> 552 >> 553 .Lr_end_bytes_up: >> 554 R10KCBARRIER(0(ra)) >> 555 lb t0, (a1) >> 556 SUB a2, a2, 0x1 >> 557 sb t0, (a0) >> 558 ADD a1, a1, 0x1 >> 559 .set reorder /* DADDI_WAR */ >> 560 ADD a0, a0, 0x1 >> 561 bnez a2, .Lr_end_bytes_up >> 562 .set noreorder >> 563 >> 564 jr ra >> 565 move a2, zero >> 566 END(__rmemcpy)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.