1 /* !! 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 * This file is subject to the terms and condi !! 2 /* memcpy.S: Sparc optimized memcpy and memmove code 3 * License. See the file "COPYING" in the mai !! 3 * Hand optimized from GNU libc's memcpy and memmove 4 * for more details. !! 4 * Copyright (C) 1991,1996 Free Software Foundation 5 * !! 5 * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi) 6 * Unified implementation of memcpy, memmove a !! 6 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 7 * !! 7 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) 8 * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf !! 8 * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 9 * Copyright (C) 1999, 2000, 01, 2002 Silicon << 10 * Copyright (C) 2002 Broadcom, Inc. << 11 * memcpy/copy_user author: Mark Vandevoorde << 12 * Copyright (C) 2007 Maciej W. Rozycki << 13 * Copyright (C) 2014 Imagination Technologies << 14 * << 15 * Mnemonic names for arguments to memcpy/__co << 16 */ 9 */ 17 10 18 /* << 19 * Hack to resolve longstanding prefetch issue << 20 * << 21 * Prefetching may be fatal on some systems if << 22 * end of memory on some systems. It's also a << 23 * dma-coherent systems. << 24 */ << 25 #ifdef CONFIG_DMA_NONCOHERENT << 26 #undef CONFIG_CPU_HAS_PREFETCH << 27 #endif << 28 #ifdef CONFIG_MIPS_MALTA << 29 #undef CONFIG_CPU_HAS_PREFETCH << 30 #endif << 31 #ifdef CONFIG_CPU_MIPSR6 << 32 #undef CONFIG_CPU_HAS_PREFETCH << 33 #endif << 34 << 35 #include <linux/export.h> 11 #include <linux/export.h> 36 #include <asm/asm.h> << 37 #include <asm/asm-offsets.h> << 38 #include <asm/regdef.h> << 39 << 40 #define dst a0 << 41 #define src a1 << 42 #define len a2 << 43 << 44 /* << 45 * Spec << 46 * << 47 * memcpy copies len bytes from src to dst and << 48 * It assumes that << 49 * - src and dst don't overlap << 50 * - src is readable << 51 * - dst is writable << 52 * memcpy uses the standard calling convention << 53 * << 54 * __copy_user copies up to len bytes from src << 55 * the number of uncopied bytes due to an exce << 56 * __copy_user assumes that src and dst don't << 57 * implementing one of the following: << 58 * copy_to_user << 59 * - src is readable (no exceptions when << 60 * copy_from_user << 61 * - dst is writable (no exceptions when << 62 * __copy_user uses a non-standard calling con << 63 * include/asm-mips/uaccess.h << 64 * << 65 * When an exception happens on a load, the ha << 66 # ensure that all of the destination buffer i << 67 * leaking information to user mode programs. << 68 */ << 69 12 70 /* !! 13 #define FUNC(x) \ 71 * Implementation !! 14 .globl x; \ 72 */ !! 15 .type x,@function; \ 73 !! 16 .align 4; \ 74 /* !! 17 x: 75 * The exception handler for loads requires th !! 18 76 * 1- AT contain the address of the byte just !! 19 /* Both these macros have to start with exactly the same insn */ 77 * of the copy, !! 20 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 78 * 2- src_entry <= src < AT, and !! 21 ldd [%src + (offset) + 0x00], %t0; \ 79 * 3- (dst - src) == (dst_entry - src_entry), !! 22 ldd [%src + (offset) + 0x08], %t2; \ 80 * The _entry suffix denotes values when __cop !! 23 ldd [%src + (offset) + 0x10], %t4; \ 81 * !! 24 ldd [%src + (offset) + 0x18], %t6; \ 82 * (1) is set up up by uaccess.h and maintaine !! 25 st %t0, [%dst + (offset) + 0x00]; \ 83 * (2) is met by incrementing src by the numbe !! 26 st %t1, [%dst + (offset) + 0x04]; \ 84 * (3) is met by not doing loads between a pai !! 27 st %t2, [%dst + (offset) + 0x08]; \ 85 * !! 28 st %t3, [%dst + (offset) + 0x0c]; \ 86 * The exception handlers for stores adjust le !! 29 st %t4, [%dst + (offset) + 0x10]; \ 87 * These handlers do not need to overwrite any !! 30 st %t5, [%dst + (offset) + 0x14]; \ 88 * !! 31 st %t6, [%dst + (offset) + 0x18]; \ 89 * For __rmemcpy and memmove an exception is a !! 32 st %t7, [%dst + (offset) + 0x1c]; 90 * they're not protected. !! 33 91 */ !! 34 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ >> 35 ldd [%src + (offset) + 0x00], %t0; \ >> 36 ldd [%src + (offset) + 0x08], %t2; \ >> 37 ldd [%src + (offset) + 0x10], %t4; \ >> 38 ldd [%src + (offset) + 0x18], %t6; \ >> 39 std %t0, [%dst + (offset) + 0x00]; \ >> 40 std %t2, [%dst + (offset) + 0x08]; \ >> 41 std %t4, [%dst + (offset) + 0x10]; \ >> 42 std %t6, [%dst + (offset) + 0x18]; >> 43 >> 44 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ >> 45 ldd [%src - (offset) - 0x10], %t0; \ >> 46 ldd [%src - (offset) - 0x08], %t2; \ >> 47 st %t0, [%dst - (offset) - 0x10]; \ >> 48 st %t1, [%dst - (offset) - 0x0c]; \ >> 49 st %t2, [%dst - (offset) - 0x08]; \ >> 50 st %t3, [%dst - (offset) - 0x04]; >> 51 >> 52 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ >> 53 ldd [%src - (offset) - 0x10], %t0; \ >> 54 ldd [%src - (offset) - 0x08], %t2; \ >> 55 std %t0, [%dst - (offset) - 0x10]; \ >> 56 std %t2, [%dst - (offset) - 0x08]; >> 57 >> 58 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ >> 59 ldub [%src - (offset) - 0x02], %t0; \ >> 60 ldub [%src - (offset) - 0x01], %t1; \ >> 61 stb %t0, [%dst - (offset) - 0x02]; \ >> 62 stb %t1, [%dst - (offset) - 0x01]; 92 63 93 /* Instruction type */ !! 64 .text 94 #define LD_INSN 1 !! 65 .align 4 95 #define ST_INSN 2 << 96 /* Pretech type */ << 97 #define SRC_PREFETCH 1 << 98 #define DST_PREFETCH 2 << 99 #define LEGACY_MODE 1 << 100 #define EVA_MODE 2 << 101 #define USEROP 1 << 102 #define KERNELOP 2 << 103 << 104 /* << 105 * Wrapper to add an entry in the exception ta << 106 * in case the insn causes a memory exception. << 107 * Arguments: << 108 * insn : Load/store instruction << 109 * type : Instruction type << 110 * reg : Register << 111 * addr : Address << 112 * handler : Exception handler << 113 */ << 114 << 115 #define EXC(insn, type, reg, addr, handler) << 116 .if \mode == LEGACY_MODE; << 117 9: insn reg, addr; << 118 .section __ex_table,"a"; << 119 PTR_WD 9b, handler; << 120 .previous; << 121 /* This is assembled in EVA mode */ << 122 .else; << 123 /* If loading from user or sto << 124 .if ((\from == USEROP) && (typ << 125 ((\to == USEROP) && (type << 126 9: __BUILD_EVA_INSN(insn# << 127 .section __ex_table,"a << 128 PTR_WD 9b, handler; << 129 .previous; << 130 .else; << 131 /* << 132 * Still in EVA, but << 133 * exception handler o << 134 */ << 135 insn reg, addr; << 136 .endif; << 137 .endif << 138 66 139 /* !! 67 FUNC(memmove) 140 * Only on the 64-bit kernel we can made use o !! 68 EXPORT_SYMBOL(memmove) 141 */ !! 69 cmp %o0, %o1 142 #ifdef CONFIG_64BIT !! 70 mov %o0, %g7 143 #define USE_DOUBLE !! 71 bleu 9f 144 #endif !! 72 sub %o0, %o1, %o4 145 !! 73 146 #ifdef USE_DOUBLE !! 74 add %o1, %o2, %o3 147 !! 75 cmp %o3, %o0 148 #define LOADK ld /* No exception */ !! 76 bleu 0f 149 #define LOAD(reg, addr, handler) EXC(ld !! 77 andcc %o4, 3, %o5 150 #define LOADL(reg, addr, handler) EXC(ld !! 78 151 #define LOADR(reg, addr, handler) EXC(ld !! 79 add %o1, %o2, %o1 152 #define STOREL(reg, addr, handler) EXC(sd !! 80 add %o0, %o2, %o0 153 #define STORER(reg, addr, handler) EXC(sd !! 81 sub %o1, 1, %o1 154 #define STORE(reg, addr, handler) EXC(sd !! 82 sub %o0, 1, %o0 155 #define ADD daddu !! 83 156 #define SUB dsubu !! 84 1: /* reverse_bytes */ 157 #define SRL dsrl !! 85 158 #define SRA dsra !! 86 ldub [%o1], %o4 159 #define SLL dsll !! 87 subcc %o2, 1, %o2 160 #define SLLV dsllv !! 88 stb %o4, [%o0] 161 #define SRLV dsrlv !! 89 sub %o1, 1, %o1 162 #define NBYTES 8 !! 90 bne 1b 163 #define LOG_NBYTES 3 !! 91 sub %o0, 1, %o0 164 !! 92 165 /* !! 93 retl 166 * As we are sharing code base with the mips32 !! 94 mov %g7, %o0 167 * register definitions). We need to redefine !! 95 168 * the n64 ABI register naming to the o32 ABI !! 96 /* NOTE: This code is executed just for the cases, 169 */ !! 97 where %src (=%o1) & 3 is != 0. 170 #undef t0 !! 98 We need to align it to 4. So, for (%src & 3) 171 #undef t1 !! 99 1 we need to do ldub,lduh 172 #undef t2 !! 100 2 lduh 173 #undef t3 !! 101 3 just ldub 174 #define t0 $8 !! 102 so even if it looks weird, the branches 175 #define t1 $9 !! 103 are correct here. -jj 176 #define t2 $10 !! 104 */ 177 #define t3 $11 !! 105 78: /* dword_align */ 178 #define t4 $12 !! 106 179 #define t5 $13 !! 107 andcc %o1, 1, %g0 180 #define t6 $14 !! 108 be 4f 181 #define t7 $15 !! 109 andcc %o1, 2, %g0 182 !! 110 183 #else !! 111 ldub [%o1], %g2 184 !! 112 add %o1, 1, %o1 185 #define LOADK lw /* No exception */ !! 113 stb %g2, [%o0] 186 #define LOAD(reg, addr, handler) EXC(lw !! 114 sub %o2, 1, %o2 187 #define LOADL(reg, addr, handler) EXC(lw !! 115 bne 3f 188 #define LOADR(reg, addr, handler) EXC(lw !! 116 add %o0, 1, %o0 189 #define STOREL(reg, addr, handler) EXC(sw !! 117 4: 190 #define STORER(reg, addr, handler) EXC(sw !! 118 lduh [%o1], %g2 191 #define STORE(reg, addr, handler) EXC(sw !! 119 add %o1, 2, %o1 192 #define ADD addu !! 120 sth %g2, [%o0] 193 #define SUB subu !! 121 sub %o2, 2, %o2 194 #define SRL srl !! 122 b 3f 195 #define SLL sll !! 123 add %o0, 2, %o0 196 #define SRA sra << 197 #define SLLV sllv << 198 #define SRLV srlv << 199 #define NBYTES 4 << 200 #define LOG_NBYTES 2 << 201 << 202 #endif /* USE_DOUBLE */ << 203 << 204 #define LOADB(reg, addr, handler) EXC(lb << 205 #define STOREB(reg, addr, handler) EXC(sb << 206 << 207 #ifdef CONFIG_CPU_HAS_PREFETCH << 208 # define _PREF(hint, addr, type) << 209 .if \mode == LEGACY_MODE; << 210 kernel_pref(hint, addr); << 211 .else; << 212 .if ((\from == USEROP) && (typ << 213 ((\to == USEROP) && (type << 214 /* << 215 * PREFE has only 9 bi << 216 * compared to PREF wh << 217 * need to use the $at << 218 * register should rem << 219 * used later on. Ther << 220 */ << 221 .set at=v1; << 222 user_pref(hint, addr); << 223 .set noat; << 224 .else; << 225 kernel_pref(hint, addr << 226 .endif; << 227 .endif << 228 #else << 229 # define _PREF(hint, addr, type) << 230 #endif << 231 << 232 #define PREFS(hint, addr) _PREF(hint, addr, SR << 233 #define PREFD(hint, addr) _PREF(hint, addr, DS << 234 << 235 #ifdef CONFIG_CPU_LITTLE_ENDIAN << 236 #define LDFIRST LOADR << 237 #define LDREST LOADL << 238 #define STFIRST STORER << 239 #define STREST STOREL << 240 #define SHIFT_DISCARD SLLV << 241 #else << 242 #define LDFIRST LOADL << 243 #define LDREST LOADR << 244 #define STFIRST STOREL << 245 #define STREST STORER << 246 #define SHIFT_DISCARD SRLV << 247 #endif << 248 << 249 #define FIRST(unit) ((unit)*NBYTES) << 250 #define REST(unit) (FIRST(unit)+NBYTES-1) << 251 #define UNIT(unit) FIRST(unit) << 252 124 253 #define ADDRMASK (NBYTES-1) !! 125 FUNC(memcpy) /* %o0=dst %o1=src %o2=len */ >> 126 EXPORT_SYMBOL(memcpy) 254 127 255 .text !! 128 sub %o0, %o1, %o4 256 .set noreorder !! 129 mov %o0, %g7 257 #ifndef CONFIG_CPU_DADDI_WORKAROUNDS !! 130 9: 258 .set noat !! 131 andcc %o4, 3, %o5 259 #else !! 132 0: 260 .set at=v1 !! 133 bne 86f 261 #endif !! 134 cmp %o2, 15 262 !! 135 263 .align 5 !! 136 bleu 90f 264 !! 137 andcc %o1, 3, %g0 265 /* !! 138 266 * Macro to build the __copy_user comm !! 139 bne 78b 267 * Arguments: !! 140 3: 268 * mode : LEGACY_MODE or EVA_MODE !! 141 andcc %o1, 4, %g0 269 * from : Source operand. USEROP or KE !! 142 270 * to : Destination operand. USEROP !! 143 be 2f 271 */ !! 144 mov %o2, %g1 272 .macro __BUILD_COPY_USER mode, from, t !! 145 273 !! 146 ld [%o1], %o4 274 /* initialize __memcpy if this the fir !! 147 sub %g1, 4, %g1 275 .ifnotdef __memcpy !! 148 st %o4, [%o0] 276 .set __memcpy, 1 !! 149 add %o1, 4, %o1 277 .hidden __memcpy /* make sure it does !! 150 add %o0, 4, %o0 278 .endif !! 151 2: 279 !! 152 andcc %g1, 0xffffff80, %g0 280 /* !! 153 be 3f 281 * Note: dst & src may be unaligned, l !! 154 andcc %o0, 4, %g0 282 * Temps !! 155 283 */ !! 156 be 82f + 4 284 #define rem t8 !! 157 5: 285 !! 158 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 286 R10KCBARRIER(0(ra)) !! 159 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 287 /* !! 160 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 288 * The "issue break"s below are very a !! 161 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 289 * Issue delays for dcache fills will !! 162 sub %g1, 128, %g1 290 * load queue full replay traps, etc. !! 163 add %o1, 128, %o1 291 * !! 164 cmp %g1, 128 292 * If len < NBYTES use byte operations !! 165 bge 5b 293 */ !! 166 add %o0, 128, %o0 294 PREFS( 0, 0(src) ) !! 167 3: 295 PREFD( 1, 0(dst) ) !! 168 andcc %g1, 0x70, %g4 296 sltu t2, len, NBYTES !! 169 be 80f 297 and t1, dst, ADDRMASK !! 170 andcc %g1, 8, %g0 298 PREFS( 0, 1*32(src) ) !! 171 299 PREFD( 1, 1*32(dst) ) !! 172 sethi %hi(80f), %o5 300 bnez t2, .Lcopy_bytes_checklen\@ !! 173 srl %g4, 1, %o4 301 and t0, src, ADDRMASK !! 174 add %g4, %o4, %o4 302 PREFS( 0, 2*32(src) ) !! 175 add %o1, %g4, %o1 303 PREFD( 1, 2*32(dst) ) !! 176 sub %o5, %o4, %o5 304 #ifndef CONFIG_CPU_NO_LOAD_STORE_LR !! 177 jmpl %o5 + %lo(80f), %g0 305 bnez t1, .Ldst_unaligned\@ !! 178 add %o0, %g4, %o0 306 nop !! 179 307 bnez t0, .Lsrc_unaligned_dst_aligne !! 180 79: /* memcpy_table */ 308 #else /* CONFIG_CPU_NO_LOAD_STORE_LR */ !! 181 309 or t0, t0, t1 !! 182 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) 310 bnez t0, .Lcopy_unaligned_bytes\@ !! 183 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) 311 #endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ !! 184 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) 312 /* !! 185 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) 313 * use delay slot for fall-through !! 186 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) 314 * src and dst are aligned; need to co !! 187 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) 315 */ !! 188 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 316 .Lboth_aligned\@: !! 189 317 SRL t0, len, LOG_NBYTES+3 # +3 !! 190 80: /* memcpy_table_end */ 318 beqz t0, .Lcleanup_both_aligned\@ # !! 191 be 81f 319 and rem, len, (8*NBYTES-1) # rem !! 192 andcc %g1, 4, %g0 320 PREFS( 0, 3*32(src) ) !! 193 321 PREFD( 1, 3*32(dst) ) !! 194 ldd [%o1], %g2 322 .align 4 !! 195 add %o0, 8, %o0 >> 196 st %g2, [%o0 - 0x08] >> 197 add %o1, 8, %o1 >> 198 st %g3, [%o0 - 0x04] >> 199 >> 200 81: /* memcpy_last7 */ >> 201 >> 202 be 1f >> 203 andcc %g1, 2, %g0 >> 204 >> 205 ld [%o1], %g2 >> 206 add %o1, 4, %o1 >> 207 st %g2, [%o0] >> 208 add %o0, 4, %o0 323 1: 209 1: 324 R10KCBARRIER(0(ra)) !! 210 be 1f 325 LOAD(t0, UNIT(0)(src), .Ll_exc\@) !! 211 andcc %g1, 1, %g0 326 LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@) << 327 LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@) << 328 LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@) << 329 SUB len, len, 8*NBYTES << 330 LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@) << 331 LOAD(t7, UNIT(5)(src), .Ll_exc_copy\@) << 332 STORE(t0, UNIT(0)(dst), .Ls_exc_p8u\@) << 333 STORE(t1, UNIT(1)(dst), .Ls_exc_p7u\@) << 334 LOAD(t0, UNIT(6)(src), .Ll_exc_copy\@) << 335 LOAD(t1, UNIT(7)(src), .Ll_exc_copy\@) << 336 ADD src, src, 8*NBYTES << 337 ADD dst, dst, 8*NBYTES << 338 STORE(t2, UNIT(-6)(dst), .Ls_exc_p6u\@ << 339 STORE(t3, UNIT(-5)(dst), .Ls_exc_p5u\@ << 340 STORE(t4, UNIT(-4)(dst), .Ls_exc_p4u\@ << 341 STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u\@ << 342 STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u\@ << 343 STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u\@ << 344 PREFS( 0, 8*32(src) ) << 345 PREFD( 1, 8*32(dst) ) << 346 bne len, rem, 1b << 347 nop << 348 212 349 /* !! 213 lduh [%o1], %g2 350 * len == rem == the number of bytes l !! 214 add %o1, 2, %o1 351 */ !! 215 sth %g2, [%o0] 352 .Lcleanup_both_aligned\@: !! 216 add %o0, 2, %o0 353 beqz len, .Ldone\@ << 354 sltu t0, len, 4*NBYTES << 355 bnez t0, .Lless_than_4units\@ << 356 and rem, len, (NBYTES-1) # rem << 357 /* << 358 * len >= 4*NBYTES << 359 */ << 360 LOAD( t0, UNIT(0)(src), .Ll_exc\@) << 361 LOAD( t1, UNIT(1)(src), .Ll_exc_copy\@ << 362 LOAD( t2, UNIT(2)(src), .Ll_exc_copy\@ << 363 LOAD( t3, UNIT(3)(src), .Ll_exc_copy\@ << 364 SUB len, len, 4*NBYTES << 365 ADD src, src, 4*NBYTES << 366 R10KCBARRIER(0(ra)) << 367 STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@) << 368 STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@) << 369 STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@) << 370 STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@) << 371 .set reorder << 372 ADD dst, dst, 4*NBYTES << 373 beqz len, .Ldone\@ << 374 .set noreorder << 375 .Lless_than_4units\@: << 376 /* << 377 * rem = len % NBYTES << 378 */ << 379 beq rem, len, .Lcopy_bytes\@ << 380 nop << 381 1: << 382 R10KCBARRIER(0(ra)) << 383 LOAD(t0, 0(src), .Ll_exc\@) << 384 ADD src, src, NBYTES << 385 SUB len, len, NBYTES << 386 STORE(t0, 0(dst), .Ls_exc_p1u\@) << 387 .set reorder << 388 ADD dst, dst, NBYTES << 389 bne rem, len, 1b << 390 .set noreorder << 391 << 392 #ifndef CONFIG_CPU_NO_LOAD_STORE_LR << 393 /* << 394 * src and dst are aligned, need to co << 395 * A loop would do only a byte at a ti << 396 * mispredicts. Can't do an explicit << 397 * because can't assume read-access to << 398 * STREST dst, which doesn't require r << 399 * << 400 * This code should perform better tha << 401 * wide-issue mips processors because << 402 * more instruction-level parallelism. << 403 */ << 404 #define bits t2 << 405 beqz len, .Ldone\@ << 406 ADD t1, dst, len # t1 is just p << 407 li bits, 8*NBYTES << 408 SLL rem, len, 3 # rem = number << 409 LOAD(t0, 0(src), .Ll_exc\@) << 410 SUB bits, bits, rem # bits = numbe << 411 SHIFT_DISCARD t0, t0, bits << 412 STREST(t0, -1(t1), .Ls_exc\@) << 413 jr ra << 414 move len, zero << 415 .Ldst_unaligned\@: << 416 /* << 417 * dst is unaligned << 418 * t0 = src & ADDRMASK << 419 * t1 = dst & ADDRMASK; T1 > 0 << 420 * len >= NBYTES << 421 * << 422 * Copy enough bytes to align dst << 423 * Set match = (src and dst have same << 424 */ << 425 #define match rem << 426 LDFIRST(t3, FIRST(0)(src), .Ll_exc\@) << 427 ADD t2, zero, NBYTES << 428 LDREST(t3, REST(0)(src), .Ll_exc_copy\ << 429 SUB t2, t2, t1 # t2 = number << 430 xor match, t0, t1 << 431 R10KCBARRIER(0(ra)) << 432 STFIRST(t3, FIRST(0)(dst), .Ls_exc\@) << 433 beq len, t2, .Ldone\@ << 434 SUB len, len, t2 << 435 ADD dst, dst, t2 << 436 beqz match, .Lboth_aligned\@ << 437 ADD src, src, t2 << 438 << 439 .Lsrc_unaligned_dst_aligned\@: << 440 SRL t0, len, LOG_NBYTES+2 # +2 << 441 PREFS( 0, 3*32(src) ) << 442 beqz t0, .Lcleanup_src_unaligned\@ << 443 and rem, len, (4*NBYTES-1) # rem << 444 PREFD( 1, 3*32(dst) ) << 445 1: 217 1: 446 /* !! 218 be 1f 447 * Avoid consecutive LD*'s to the same registe << 448 * implementations can't issue them in the sam << 449 * It's OK to load FIRST(N+1) before REST(N) b << 450 * are to the same unit (unless src is aligned << 451 */ << 452 R10KCBARRIER(0(ra)) << 453 LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) << 454 LDFIRST(t1, FIRST(1)(src), .Ll_exc_cop << 455 SUB len, len, 4*NBYTES << 456 LDREST(t0, REST(0)(src), .Ll_exc_copy\ << 457 LDREST(t1, REST(1)(src), .Ll_exc_copy\ << 458 LDFIRST(t2, FIRST(2)(src), .Ll_exc_cop << 459 LDFIRST(t3, FIRST(3)(src), .Ll_exc_cop << 460 LDREST(t2, REST(2)(src), .Ll_exc_copy\ << 461 LDREST(t3, REST(3)(src), .Ll_exc_copy\ << 462 PREFS( 0, 9*32(src) ) # 0 is << 463 ADD src, src, 4*NBYTES << 464 #ifdef CONFIG_CPU_SB1 << 465 nop # impr << 466 #endif << 467 STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@) << 468 STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@) << 469 STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@) << 470 STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@) << 471 PREFD( 1, 9*32(dst) ) # 1 is << 472 .set reorder << 473 ADD dst, dst, 4*NBYTES << 474 bne len, rem, 1b << 475 .set noreorder << 476 << 477 .Lcleanup_src_unaligned\@: << 478 beqz len, .Ldone\@ << 479 and rem, len, NBYTES-1 # rem = le << 480 beq rem, len, .Lcopy_bytes\@ << 481 nop 219 nop >> 220 >> 221 ldub [%o1], %g2 >> 222 stb %g2, [%o0] 482 1: 223 1: 483 R10KCBARRIER(0(ra)) !! 224 retl 484 LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) !! 225 mov %g7, %o0 485 LDREST(t0, REST(0)(src), .Ll_exc_copy\ << 486 ADD src, src, NBYTES << 487 SUB len, len, NBYTES << 488 STORE(t0, 0(dst), .Ls_exc_p1u\@) << 489 .set reorder << 490 ADD dst, dst, NBYTES << 491 bne len, rem, 1b << 492 .set noreorder << 493 << 494 #endif /* !CONFIG_CPU_NO_LOAD_STORE_LR */ << 495 .Lcopy_bytes_checklen\@: << 496 beqz len, .Ldone\@ << 497 nop << 498 .Lcopy_bytes\@: << 499 /* 0 < len < NBYTES */ << 500 R10KCBARRIER(0(ra)) << 501 #define COPY_BYTE(N) \ << 502 LOADB(t0, N(src), .Ll_exc\@); \ << 503 SUB len, len, 1; \ << 504 beqz len, .Ldone\@; \ << 505 STOREB(t0, N(dst), .Ls_exc_p1\@) << 506 << 507 COPY_BYTE(0) << 508 COPY_BYTE(1) << 509 #ifdef USE_DOUBLE << 510 COPY_BYTE(2) << 511 COPY_BYTE(3) << 512 COPY_BYTE(4) << 513 COPY_BYTE(5) << 514 #endif << 515 LOADB(t0, NBYTES-2(src), .Ll_exc\@) << 516 SUB len, len, 1 << 517 jr ra << 518 STOREB(t0, NBYTES-2(dst), .Ls_exc_p1\@ << 519 .Ldone\@: << 520 jr ra << 521 nop << 522 226 523 #ifdef CONFIG_CPU_NO_LOAD_STORE_LR !! 227 82: /* ldd_std */ 524 .Lcopy_unaligned_bytes\@: !! 228 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) >> 229 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) >> 230 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) >> 231 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) >> 232 subcc %g1, 128, %g1 >> 233 add %o1, 128, %o1 >> 234 cmp %g1, 128 >> 235 bge 82b >> 236 add %o0, 128, %o0 >> 237 >> 238 andcc %g1, 0x70, %g4 >> 239 be 84f >> 240 andcc %g1, 8, %g0 >> 241 >> 242 sethi %hi(84f), %o5 >> 243 add %o1, %g4, %o1 >> 244 sub %o5, %g4, %o5 >> 245 jmpl %o5 + %lo(84f), %g0 >> 246 add %o0, %g4, %o0 >> 247 >> 248 83: /* amemcpy_table */ >> 249 >> 250 MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5) >> 251 MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5) >> 252 MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5) >> 253 MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5) >> 254 MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5) >> 255 MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5) >> 256 MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5) >> 257 >> 258 84: /* amemcpy_table_end */ >> 259 be 85f >> 260 andcc %g1, 4, %g0 >> 261 >> 262 ldd [%o1], %g2 >> 263 add %o0, 8, %o0 >> 264 std %g2, [%o0 - 0x08] >> 265 add %o1, 8, %o1 >> 266 85: /* amemcpy_last7 */ >> 267 be 1f >> 268 andcc %g1, 2, %g0 >> 269 >> 270 ld [%o1], %g2 >> 271 add %o1, 4, %o1 >> 272 st %g2, [%o0] >> 273 add %o0, 4, %o0 525 1: 274 1: 526 COPY_BYTE(0) !! 275 be 1f 527 COPY_BYTE(1) !! 276 andcc %g1, 1, %g0 528 COPY_BYTE(2) !! 277 529 COPY_BYTE(3) !! 278 lduh [%o1], %g2 530 COPY_BYTE(4) !! 279 add %o1, 2, %o1 531 COPY_BYTE(5) !! 280 sth %g2, [%o0] 532 COPY_BYTE(6) !! 281 add %o0, 2, %o0 533 COPY_BYTE(7) << 534 ADD src, src, 8 << 535 b 1b << 536 ADD dst, dst, 8 << 537 #endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ << 538 .if __memcpy == 1 << 539 END(memcpy) << 540 .set __memcpy, 0 << 541 .hidden __memcpy << 542 .endif << 543 << 544 .Ll_exc_copy\@: << 545 /* << 546 * Copy bytes from src until faulting << 547 * lb faults) << 548 * << 549 * When reached by a faulting LDFIRST/ << 550 * may be more than a byte beyond the << 551 * Hence, the lb below may get an exce << 552 * << 553 * Assumes src < THREAD_BUADDR($28) << 554 */ << 555 LOADK t0, TI_TASK($28) << 556 nop << 557 LOADK t0, THREAD_BUADDR(t0) << 558 1: 282 1: 559 LOADB(t1, 0(src), .Ll_exc\@) !! 283 be 1f 560 ADD src, src, 1 << 561 sb t1, 0(dst) # can't fault << 562 .set reorder << 563 ADD dst, dst, 1 << 564 bne src, t0, 1b << 565 .set noreorder << 566 .Ll_exc\@: << 567 LOADK t0, TI_TASK($28) << 568 nop << 569 LOADK t0, THREAD_BUADDR(t0) # t0 i << 570 nop << 571 SUB len, AT, t0 # len << 572 jr ra << 573 nop 284 nop 574 285 575 #define SEXC(n) !! 286 ldub [%o1], %g2 576 .set reorder; !! 287 stb %g2, [%o0] 577 .Ls_exc_p ## n ## u\@: !! 288 1: 578 ADD len, len, n*NBYTES; !! 289 retl 579 jr ra; !! 290 mov %g7, %o0 580 .set noreorder << 581 << 582 SEXC(8) << 583 SEXC(7) << 584 SEXC(6) << 585 SEXC(5) << 586 SEXC(4) << 587 SEXC(3) << 588 SEXC(2) << 589 SEXC(1) << 590 << 591 .Ls_exc_p1\@: << 592 .set reorder << 593 ADD len, len, 1 << 594 jr ra << 595 .set noreorder << 596 .Ls_exc\@: << 597 jr ra << 598 nop << 599 .endm << 600 291 601 #ifndef CONFIG_HAVE_PLAT_MEMCPY !! 292 86: /* non_aligned */ 602 .align 5 !! 293 cmp %o2, 6 603 LEAF(memmove) !! 294 bleu 88f 604 EXPORT_SYMBOL(memmove) !! 295 nop 605 ADD t0, a0, a2 !! 296 606 ADD t1, a1, a2 !! 297 save %sp, -96, %sp 607 sltu t0, a1, t0 !! 298 andcc %i0, 3, %g0 608 sltu t1, a0, t1 !! 299 be 61f 609 and t0, t1 !! 300 andcc %i0, 1, %g0 610 beqz t0, .L__memcpy !! 301 be 60f 611 move v0, a0 !! 302 andcc %i0, 2, %g0 612 beqz a2, .Lr_out !! 303 613 END(memmove) !! 304 ldub [%i1], %g5 614 !! 305 add %i1, 1, %i1 615 /* fall through to __rmemcpy */ !! 306 stb %g5, [%i0] 616 LEAF(__rmemcpy) !! 307 sub %i2, 1, %i2 617 sltu t0, a1, a0 !! 308 bne 61f 618 beqz t0, .Lr_end_bytes_up !! 309 add %i0, 1, %i0 >> 310 60: >> 311 ldub [%i1], %g3 >> 312 add %i1, 2, %i1 >> 313 stb %g3, [%i0] >> 314 sub %i2, 2, %i2 >> 315 ldub [%i1 - 1], %g3 >> 316 add %i0, 2, %i0 >> 317 stb %g3, [%i0 - 1] >> 318 61: >> 319 and %i1, 3, %g2 >> 320 and %i2, 0xc, %g3 >> 321 and %i1, -4, %i1 >> 322 cmp %g3, 4 >> 323 sll %g2, 3, %g4 >> 324 mov 32, %g2 >> 325 be 4f >> 326 sub %g2, %g4, %l0 >> 327 >> 328 blu 3f >> 329 cmp %g3, 0x8 >> 330 >> 331 be 2f >> 332 srl %i2, 2, %g3 >> 333 >> 334 ld [%i1], %i3 >> 335 add %i0, -8, %i0 >> 336 ld [%i1 + 4], %i4 >> 337 b 8f >> 338 add %g3, 1, %g3 >> 339 2: >> 340 ld [%i1], %i4 >> 341 add %i0, -12, %i0 >> 342 ld [%i1 + 4], %i5 >> 343 add %g3, 2, %g3 >> 344 b 9f >> 345 add %i1, -4, %i1 >> 346 3: >> 347 ld [%i1], %g1 >> 348 add %i0, -4, %i0 >> 349 ld [%i1 + 4], %i3 >> 350 srl %i2, 2, %g3 >> 351 b 7f >> 352 add %i1, 4, %i1 >> 353 4: >> 354 ld [%i1], %i5 >> 355 cmp %i2, 7 >> 356 ld [%i1 + 4], %g1 >> 357 srl %i2, 2, %g3 >> 358 bleu 10f >> 359 add %i1, 8, %i1 >> 360 >> 361 ld [%i1], %i3 >> 362 add %g3, -1, %g3 >> 363 5: >> 364 sll %i5, %g4, %g2 >> 365 srl %g1, %l0, %g5 >> 366 or %g2, %g5, %g2 >> 367 st %g2, [%i0] >> 368 7: >> 369 ld [%i1 + 4], %i4 >> 370 sll %g1, %g4, %g2 >> 371 srl %i3, %l0, %g5 >> 372 or %g2, %g5, %g2 >> 373 st %g2, [%i0 + 4] >> 374 8: >> 375 ld [%i1 + 8], %i5 >> 376 sll %i3, %g4, %g2 >> 377 srl %i4, %l0, %g5 >> 378 or %g2, %g5, %g2 >> 379 st %g2, [%i0 + 8] >> 380 9: >> 381 ld [%i1 + 12], %g1 >> 382 sll %i4, %g4, %g2 >> 383 srl %i5, %l0, %g5 >> 384 addcc %g3, -4, %g3 >> 385 or %g2, %g5, %g2 >> 386 add %i1, 16, %i1 >> 387 st %g2, [%i0 + 12] >> 388 add %i0, 16, %i0 >> 389 bne,a 5b >> 390 ld [%i1], %i3 >> 391 10: >> 392 sll %i5, %g4, %g2 >> 393 srl %g1, %l0, %g5 >> 394 srl %l0, 3, %g3 >> 395 or %g2, %g5, %g2 >> 396 sub %i1, %g3, %i1 >> 397 andcc %i2, 2, %g0 >> 398 st %g2, [%i0] >> 399 be 1f >> 400 andcc %i2, 1, %g0 >> 401 >> 402 ldub [%i1], %g2 >> 403 add %i1, 2, %i1 >> 404 stb %g2, [%i0 + 4] >> 405 add %i0, 2, %i0 >> 406 ldub [%i1 - 1], %g2 >> 407 stb %g2, [%i0 + 3] >> 408 1: >> 409 be 1f 619 nop 410 nop 620 ADD a0, a2 !! 411 ldub [%i1], %g2 621 ADD a1, a2 !! 412 stb %g2, [%i0 + 4] 622 !! 413 1: 623 .Lr_end_bytes: !! 414 ret 624 R10KCBARRIER(0(ra)) !! 415 restore %g7, %g0, %o0 625 lb t0, -1(a1) << 626 SUB a2, a2, 0x1 << 627 sb t0, -1(a0) << 628 SUB a1, a1, 0x1 << 629 .set reorder << 630 SUB a0, a0, 0x1 << 631 bnez a2, .Lr_end_bytes << 632 .set noreorder << 633 << 634 .Lr_out: << 635 jr ra << 636 move a2, zero << 637 << 638 .Lr_end_bytes_up: << 639 R10KCBARRIER(0(ra)) << 640 lb t0, (a1) << 641 SUB a2, a2, 0x1 << 642 sb t0, (a0) << 643 ADD a1, a1, 0x1 << 644 .set reorder << 645 ADD a0, a0, 0x1 << 646 bnez a2, .Lr_end_bytes_up << 647 .set noreorder << 648 << 649 jr ra << 650 move a2, zero << 651 END(__rmemcpy) << 652 << 653 /* << 654 * A combined memcpy/__copy_user << 655 * __copy_user sets len to 0 for success; else << 656 * the number of uncopied bytes. << 657 * memcpy sets v0 to dst. << 658 */ << 659 .align 5 << 660 LEAF(memcpy) << 661 EXPORT_SYMBOL(memcpy) << 662 move v0, dst << 663 .L__memcpy: << 664 #ifndef CONFIG_EVA << 665 FEXPORT(__raw_copy_from_user) << 666 EXPORT_SYMBOL(__raw_copy_from_user) << 667 FEXPORT(__raw_copy_to_user) << 668 EXPORT_SYMBOL(__raw_copy_to_user) << 669 #endif << 670 /* Legacy Mode, user <-> user */ << 671 __BUILD_COPY_USER LEGACY_MODE USEROP U << 672 << 673 #endif << 674 << 675 #ifdef CONFIG_EVA << 676 << 677 /* << 678 * For EVA we need distinct symbols for readin << 679 * This is because we need to use specific EVA << 680 * virtual <-> physical translation when a vir << 681 * space << 682 */ << 683 << 684 /* << 685 * __copy_from_user (EVA) << 686 */ << 687 416 688 LEAF(__raw_copy_from_user) !! 417 88: /* short_end */ 689 EXPORT_SYMBOL(__raw_copy_from_user) << 690 __BUILD_COPY_USER EVA_MODE USEROP KERN << 691 END(__raw_copy_from_user) << 692 418 >> 419 and %o2, 0xe, %o3 >> 420 20: >> 421 sethi %hi(89f), %o5 >> 422 sll %o3, 3, %o4 >> 423 add %o0, %o3, %o0 >> 424 sub %o5, %o4, %o5 >> 425 add %o1, %o3, %o1 >> 426 jmpl %o5 + %lo(89f), %g0 >> 427 andcc %o2, 1, %g0 >> 428 >> 429 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) >> 430 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) >> 431 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) >> 432 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) >> 433 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) >> 434 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) >> 435 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) 693 436 >> 437 89: /* short_table_end */ 694 438 695 /* !! 439 be 1f 696 * __copy_to_user (EVA) !! 440 nop 697 */ << 698 441 699 LEAF(__raw_copy_to_user) !! 442 ldub [%o1], %g2 700 EXPORT_SYMBOL(__raw_copy_to_user) !! 443 stb %g2, [%o0] 701 __BUILD_COPY_USER EVA_MODE KERNELOP USEROP !! 444 1: 702 END(__raw_copy_to_user) !! 445 retl >> 446 mov %g7, %o0 703 447 704 #endif !! 448 90: /* short_aligned_end */ >> 449 bne 88b >> 450 andcc %o2, 8, %g0 >> 451 >> 452 be 1f >> 453 andcc %o2, 4, %g0 >> 454 >> 455 ld [%o1 + 0x00], %g2 >> 456 ld [%o1 + 0x04], %g3 >> 457 add %o1, 8, %o1 >> 458 st %g2, [%o0 + 0x00] >> 459 st %g3, [%o0 + 0x04] >> 460 add %o0, 8, %o0 >> 461 1: >> 462 b 81b >> 463 mov %o2, %g1
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.