1 /* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Unified implementation of memcpy, memmove and the __copy_user backend. 7 * 8 * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org) 9 * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. 10 * Copyright (C) 2002 Broadcom, Inc. 11 * memcpy/copy_user author: Mark Vandevoorde 12 * 13 * Mnemonic names for arguments to memcpy/__copy_user 14 */ 15 16 #include <linux/export.h> 17 #include <asm/asm.h> 18 #include <asm/asm-offsets.h> 19 #include <asm/regdef.h> 20 21 #define dst a0 22 #define src a1 23 #define len a2 24 25 /* 26 * Spec 27 * 28 * memcpy copies len bytes from src to dst and sets v0 to dst. 29 * It assumes that 30 * - src and dst don't overlap 31 * - src is readable 32 * - dst is writable 33 * memcpy uses the standard calling convention 34 * 35 * __copy_user copies up to len bytes from src to dst and sets a2 (len) to 36 * the number of uncopied bytes due to an exception caused by a read or write. 37 * __copy_user assumes that src and dst don't overlap, and that the call is 38 * implementing one of the following: 39 * copy_to_user 40 * - src is readable (no exceptions when reading src) 41 * copy_from_user 42 * - dst is writable (no exceptions when writing dst) 43 * __copy_user uses a non-standard calling convention; see 44 * arch/mips/include/asm/uaccess.h 45 * 46 * When an exception happens on a load, the handler must 47 # ensure that all of the destination buffer is overwritten to prevent 48 * leaking information to user mode programs. 49 */ 50 51 /* 52 * Implementation 53 */ 54 55 /* 56 * The exception handler for loads requires that: 57 * 1- AT contain the address of the byte just past the end of the source 58 * of the copy, 59 * 2- src_entry <= src < AT, and 60 * 3- (dst - src) == (dst_entry - src_entry), 61 * The _entry suffix denotes values when __copy_user was called. 62 * 63 * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user 64 * (2) is met by incrementing src by the number of bytes copied 65 * (3) is met by not doing loads between a pair of increments of dst and src 66 * 67 * The exception handlers for stores adjust len (if necessary) and return. 68 * These handlers do not need to overwrite any data. 69 * 70 * For __rmemcpy and memmove an exception is always a kernel bug, therefore 71 * they're not protected. 72 */ 73 74 #define EXC(inst_reg,addr,handler) \ 75 9: inst_reg, addr; \ 76 .section __ex_table,"a"; \ 77 PTR_WD 9b, handler; \ 78 .previous 79 80 /* 81 * Only on the 64-bit kernel we can made use of 64-bit registers. 82 */ 83 84 #define LOAD ld 85 #define LOADL ldl 86 #define LOADR ldr 87 #define STOREL sdl 88 #define STORER sdr 89 #define STORE sd 90 #define ADD daddu 91 #define SUB dsubu 92 #define SRL dsrl 93 #define SRA dsra 94 #define SLL dsll 95 #define SLLV dsllv 96 #define SRLV dsrlv 97 #define NBYTES 8 98 #define LOG_NBYTES 3 99 100 /* 101 * As we are sharing code base with the mips32 tree (which use the o32 ABI 102 * register definitions). We need to redefine the register definitions from 103 * the n64 ABI register naming to the o32 ABI register naming. 104 */ 105 #undef t0 106 #undef t1 107 #undef t2 108 #undef t3 109 #define t0 $8 110 #define t1 $9 111 #define t2 $10 112 #define t3 $11 113 #define t4 $12 114 #define t5 $13 115 #define t6 $14 116 #define t7 $15 117 118 #ifdef CONFIG_CPU_LITTLE_ENDIAN 119 #define LDFIRST LOADR 120 #define LDREST LOADL 121 #define STFIRST STORER 122 #define STREST STOREL 123 #define SHIFT_DISCARD SLLV 124 #else 125 #define LDFIRST LOADL 126 #define LDREST LOADR 127 #define STFIRST STOREL 128 #define STREST STORER 129 #define SHIFT_DISCARD SRLV 130 #endif 131 132 #define FIRST(unit) ((unit)*NBYTES) 133 #define REST(unit) (FIRST(unit)+NBYTES-1) 134 #define UNIT(unit) FIRST(unit) 135 136 #define ADDRMASK (NBYTES-1) 137 138 .text 139 .set noreorder 140 .set noat 141 142 /* 143 * A combined memcpy/__copy_user 144 * __copy_user sets len to 0 for success; else to an upper bound of 145 * the number of uncopied bytes. 146 * memcpy sets v0 to dst. 147 */ 148 .align 5 149 LEAF(memcpy) /* a0=dst a1=src a2=len */ 150 EXPORT_SYMBOL(memcpy) 151 move v0, dst /* return value */ 152 __memcpy: 153 FEXPORT(__raw_copy_from_user) 154 EXPORT_SYMBOL(__raw_copy_from_user) 155 FEXPORT(__raw_copy_to_user) 156 EXPORT_SYMBOL(__raw_copy_to_user) 157 /* 158 * Note: dst & src may be unaligned, len may be 0 159 * Temps 160 */ 161 # 162 # Octeon doesn't care if the destination is unaligned. The hardware 163 # can fix it faster than we can special case the assembly. 164 # 165 pref 0, 0(src) 166 sltu t0, len, NBYTES # Check if < 1 word 167 bnez t0, copy_bytes_checklen 168 and t0, src, ADDRMASK # Check if src unaligned 169 bnez t0, src_unaligned 170 sltu t0, len, 4*NBYTES # Check if < 4 words 171 bnez t0, less_than_4units 172 sltu t0, len, 8*NBYTES # Check if < 8 words 173 bnez t0, less_than_8units 174 sltu t0, len, 16*NBYTES # Check if < 16 words 175 bnez t0, cleanup_both_aligned 176 sltu t0, len, 128+1 # Check if len < 129 177 bnez t0, 1f # Skip prefetch if len is too short 178 sltu t0, len, 256+1 # Check if len < 257 179 bnez t0, 1f # Skip prefetch if len is too short 180 pref 0, 128(src) # We must not prefetch invalid addresses 181 # 182 # This is where we loop if there is more than 128 bytes left 183 2: pref 0, 256(src) # We must not prefetch invalid addresses 184 # 185 # This is where we loop if we can't prefetch anymore 186 1: 187 EXC( LOAD t0, UNIT(0)(src), l_exc) 188 EXC( LOAD t1, UNIT(1)(src), l_exc_copy) 189 EXC( LOAD t2, UNIT(2)(src), l_exc_copy) 190 EXC( LOAD t3, UNIT(3)(src), l_exc_copy) 191 SUB len, len, 16*NBYTES 192 EXC( STORE t0, UNIT(0)(dst), s_exc_p16u) 193 EXC( STORE t1, UNIT(1)(dst), s_exc_p15u) 194 EXC( STORE t2, UNIT(2)(dst), s_exc_p14u) 195 EXC( STORE t3, UNIT(3)(dst), s_exc_p13u) 196 EXC( LOAD t0, UNIT(4)(src), l_exc_copy) 197 EXC( LOAD t1, UNIT(5)(src), l_exc_copy) 198 EXC( LOAD t2, UNIT(6)(src), l_exc_copy) 199 EXC( LOAD t3, UNIT(7)(src), l_exc_copy) 200 EXC( STORE t0, UNIT(4)(dst), s_exc_p12u) 201 EXC( STORE t1, UNIT(5)(dst), s_exc_p11u) 202 EXC( STORE t2, UNIT(6)(dst), s_exc_p10u) 203 ADD src, src, 16*NBYTES 204 EXC( STORE t3, UNIT(7)(dst), s_exc_p9u) 205 ADD dst, dst, 16*NBYTES 206 EXC( LOAD t0, UNIT(-8)(src), l_exc_copy_rewind16) 207 EXC( LOAD t1, UNIT(-7)(src), l_exc_copy_rewind16) 208 EXC( LOAD t2, UNIT(-6)(src), l_exc_copy_rewind16) 209 EXC( LOAD t3, UNIT(-5)(src), l_exc_copy_rewind16) 210 EXC( STORE t0, UNIT(-8)(dst), s_exc_p8u) 211 EXC( STORE t1, UNIT(-7)(dst), s_exc_p7u) 212 EXC( STORE t2, UNIT(-6)(dst), s_exc_p6u) 213 EXC( STORE t3, UNIT(-5)(dst), s_exc_p5u) 214 EXC( LOAD t0, UNIT(-4)(src), l_exc_copy_rewind16) 215 EXC( LOAD t1, UNIT(-3)(src), l_exc_copy_rewind16) 216 EXC( LOAD t2, UNIT(-2)(src), l_exc_copy_rewind16) 217 EXC( LOAD t3, UNIT(-1)(src), l_exc_copy_rewind16) 218 EXC( STORE t0, UNIT(-4)(dst), s_exc_p4u) 219 EXC( STORE t1, UNIT(-3)(dst), s_exc_p3u) 220 EXC( STORE t2, UNIT(-2)(dst), s_exc_p2u) 221 EXC( STORE t3, UNIT(-1)(dst), s_exc_p1u) 222 sltu t0, len, 256+1 # See if we can prefetch more 223 beqz t0, 2b 224 sltu t0, len, 128 # See if we can loop more time 225 beqz t0, 1b 226 nop 227 # 228 # Jump here if there are less than 16*NBYTES left. 229 # 230 cleanup_both_aligned: 231 beqz len, done 232 sltu t0, len, 8*NBYTES 233 bnez t0, less_than_8units 234 nop 235 EXC( LOAD t0, UNIT(0)(src), l_exc) 236 EXC( LOAD t1, UNIT(1)(src), l_exc_copy) 237 EXC( LOAD t2, UNIT(2)(src), l_exc_copy) 238 EXC( LOAD t3, UNIT(3)(src), l_exc_copy) 239 SUB len, len, 8*NBYTES 240 EXC( STORE t0, UNIT(0)(dst), s_exc_p8u) 241 EXC( STORE t1, UNIT(1)(dst), s_exc_p7u) 242 EXC( STORE t2, UNIT(2)(dst), s_exc_p6u) 243 EXC( STORE t3, UNIT(3)(dst), s_exc_p5u) 244 EXC( LOAD t0, UNIT(4)(src), l_exc_copy) 245 EXC( LOAD t1, UNIT(5)(src), l_exc_copy) 246 EXC( LOAD t2, UNIT(6)(src), l_exc_copy) 247 EXC( LOAD t3, UNIT(7)(src), l_exc_copy) 248 EXC( STORE t0, UNIT(4)(dst), s_exc_p4u) 249 EXC( STORE t1, UNIT(5)(dst), s_exc_p3u) 250 EXC( STORE t2, UNIT(6)(dst), s_exc_p2u) 251 EXC( STORE t3, UNIT(7)(dst), s_exc_p1u) 252 ADD src, src, 8*NBYTES 253 beqz len, done 254 ADD dst, dst, 8*NBYTES 255 # 256 # Jump here if there are less than 8*NBYTES left. 257 # 258 less_than_8units: 259 sltu t0, len, 4*NBYTES 260 bnez t0, less_than_4units 261 nop 262 EXC( LOAD t0, UNIT(0)(src), l_exc) 263 EXC( LOAD t1, UNIT(1)(src), l_exc_copy) 264 EXC( LOAD t2, UNIT(2)(src), l_exc_copy) 265 EXC( LOAD t3, UNIT(3)(src), l_exc_copy) 266 SUB len, len, 4*NBYTES 267 EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) 268 EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) 269 EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) 270 EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) 271 ADD src, src, 4*NBYTES 272 beqz len, done 273 ADD dst, dst, 4*NBYTES 274 # 275 # Jump here if there are less than 4*NBYTES left. This means 276 # we may need to copy up to 3 NBYTES words. 277 # 278 less_than_4units: 279 sltu t0, len, 1*NBYTES 280 bnez t0, copy_bytes_checklen 281 nop 282 # 283 # 1) Copy NBYTES, then check length again 284 # 285 EXC( LOAD t0, 0(src), l_exc) 286 SUB len, len, NBYTES 287 sltu t1, len, 8 288 EXC( STORE t0, 0(dst), s_exc_p1u) 289 ADD src, src, NBYTES 290 bnez t1, copy_bytes_checklen 291 ADD dst, dst, NBYTES 292 # 293 # 2) Copy NBYTES, then check length again 294 # 295 EXC( LOAD t0, 0(src), l_exc) 296 SUB len, len, NBYTES 297 sltu t1, len, 8 298 EXC( STORE t0, 0(dst), s_exc_p1u) 299 ADD src, src, NBYTES 300 bnez t1, copy_bytes_checklen 301 ADD dst, dst, NBYTES 302 # 303 # 3) Copy NBYTES, then check length again 304 # 305 EXC( LOAD t0, 0(src), l_exc) 306 SUB len, len, NBYTES 307 ADD src, src, NBYTES 308 ADD dst, dst, NBYTES 309 b copy_bytes_checklen 310 EXC( STORE t0, -8(dst), s_exc_p1u) 311 312 src_unaligned: 313 #define rem t8 314 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter 315 beqz t0, cleanup_src_unaligned 316 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES 317 1: 318 /* 319 * Avoid consecutive LD*'s to the same register since some mips 320 * implementations can't issue them in the same cycle. 321 * It's OK to load FIRST(N+1) before REST(N) because the two addresses 322 * are to the same unit (unless src is aligned, but it's not). 323 */ 324 EXC( LDFIRST t0, FIRST(0)(src), l_exc) 325 EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) 326 SUB len, len, 4*NBYTES 327 EXC( LDREST t0, REST(0)(src), l_exc_copy) 328 EXC( LDREST t1, REST(1)(src), l_exc_copy) 329 EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) 330 EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) 331 EXC( LDREST t2, REST(2)(src), l_exc_copy) 332 EXC( LDREST t3, REST(3)(src), l_exc_copy) 333 ADD src, src, 4*NBYTES 334 EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) 335 EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) 336 EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) 337 EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) 338 bne len, rem, 1b 339 ADD dst, dst, 4*NBYTES 340 341 cleanup_src_unaligned: 342 beqz len, done 343 and rem, len, NBYTES-1 # rem = len % NBYTES 344 beq rem, len, copy_bytes 345 nop 346 1: 347 EXC( LDFIRST t0, FIRST(0)(src), l_exc) 348 EXC( LDREST t0, REST(0)(src), l_exc_copy) 349 SUB len, len, NBYTES 350 EXC( STORE t0, 0(dst), s_exc_p1u) 351 ADD src, src, NBYTES 352 bne len, rem, 1b 353 ADD dst, dst, NBYTES 354 355 copy_bytes_checklen: 356 beqz len, done 357 nop 358 copy_bytes: 359 /* 0 < len < NBYTES */ 360 #define COPY_BYTE(N) \ 361 EXC( lb t0, N(src), l_exc); \ 362 SUB len, len, 1; \ 363 beqz len, done; \ 364 EXC( sb t0, N(dst), s_exc_p1) 365 366 COPY_BYTE(0) 367 COPY_BYTE(1) 368 COPY_BYTE(2) 369 COPY_BYTE(3) 370 COPY_BYTE(4) 371 COPY_BYTE(5) 372 EXC( lb t0, NBYTES-2(src), l_exc) 373 SUB len, len, 1 374 jr ra 375 EXC( sb t0, NBYTES-2(dst), s_exc_p1) 376 done: 377 jr ra 378 nop 379 END(memcpy) 380 381 l_exc_copy_rewind16: 382 /* Rewind src and dst by 16*NBYTES for l_exc_copy */ 383 SUB src, src, 16*NBYTES 384 SUB dst, dst, 16*NBYTES 385 l_exc_copy: 386 /* 387 * Copy bytes from src until faulting load address (or until a 388 * lb faults) 389 * 390 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28) 391 * may be more than a byte beyond the last address. 392 * Hence, the lb below may get an exception. 393 * 394 * Assumes src < THREAD_BUADDR($28) 395 */ 396 LOAD t0, TI_TASK($28) 397 LOAD t0, THREAD_BUADDR(t0) 398 1: 399 EXC( lb t1, 0(src), l_exc) 400 ADD src, src, 1 401 sb t1, 0(dst) # can't fault -- we're copy_from_user 402 bne src, t0, 1b 403 ADD dst, dst, 1 404 l_exc: 405 LOAD t0, TI_TASK($28) 406 LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address 407 SUB len, AT, t0 # len number of uncopied bytes 408 jr ra 409 nop 410 411 412 #define SEXC(n) \ 413 s_exc_p ## n ## u: \ 414 jr ra; \ 415 ADD len, len, n*NBYTES 416 417 SEXC(16) 418 SEXC(15) 419 SEXC(14) 420 SEXC(13) 421 SEXC(12) 422 SEXC(11) 423 SEXC(10) 424 SEXC(9) 425 SEXC(8) 426 SEXC(7) 427 SEXC(6) 428 SEXC(5) 429 SEXC(4) 430 SEXC(3) 431 SEXC(2) 432 SEXC(1) 433 434 s_exc_p1: 435 jr ra 436 ADD len, len, 1 437 s_exc: 438 jr ra 439 nop 440 441 .align 5 442 LEAF(memmove) 443 EXPORT_SYMBOL(memmove) 444 ADD t0, a0, a2 445 ADD t1, a1, a2 446 sltu t0, a1, t0 # dst + len <= src -> memcpy 447 sltu t1, a0, t1 # dst >= src + len -> memcpy 448 and t0, t1 449 beqz t0, __memcpy 450 move v0, a0 /* return value */ 451 beqz a2, r_out 452 END(memmove) 453 454 /* fall through to __rmemcpy */ 455 LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ 456 sltu t0, a1, a0 457 beqz t0, r_end_bytes_up # src >= dst 458 nop 459 ADD a0, a2 # dst = dst + len 460 ADD a1, a2 # src = src + len 461 462 r_end_bytes: 463 lb t0, -1(a1) 464 SUB a2, a2, 0x1 465 sb t0, -1(a0) 466 SUB a1, a1, 0x1 467 bnez a2, r_end_bytes 468 SUB a0, a0, 0x1 469 470 r_out: 471 jr ra 472 move a2, zero 473 474 r_end_bytes_up: 475 lb t0, (a1) 476 SUB a2, a2, 0x1 477 sb t0, (a0) 478 ADD a1, a1, 0x1 479 bnez a2, r_end_bytes_up 480 ADD a0, a0, 0x1 481 482 jr ra 483 move a2, zero 484 END(__rmemcpy)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.