~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
memcpy.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~
Diff markup

Differences between /arch/sparc/lib/memcpy.S (Version linux-6.12-rc7) and /arch/mips/lib/memcpy.S (Version linux-4.10.17)

  1 /* SPDX-License-Identifier: GPL-2.0 */         !!   1 /*
  2 /* memcpy.S: Sparc optimized memcpy and memmov !!   2  * This file is subject to the terms and conditions of the GNU General Public
  3  * Hand optimized from GNU libc's memcpy and m !!   3  * License.  See the file "COPYING" in the main directory of this archive
  4  * Copyright (C) 1991,1996 Free Software Found !!   4  * for more details.
  5  * Copyright (C) 1995 Linus Torvalds (Linus.To !!   5  *
  6  * Copyright (C) 1996 David S. Miller (davem@c !!   6  * Unified implementation of memcpy, memmove and the __copy_user backend.
  7  * Copyright (C) 1996 Eddie C. Dost (ecd@skyne !!   7  *
  8  * Copyright (C) 1996 Jakub Jelinek (jj@sunsit !!   8  * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org)
  9  */                                            !!   9  * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
 10                                                !!  10  * Copyright (C) 2002 Broadcom, Inc.
 11 #include <linux/export.h>                      !!  11  *   memcpy/copy_user author: Mark Vandevoorde
 12                                                !!  12  * Copyright (C) 2007  Maciej W. Rozycki
 13 #define FUNC(x)                 \              !!  13  * Copyright (C) 2014 Imagination Technologies Ltd.
 14         .globl  x;              \              !!  14  *
 15         .type   x,@function;    \              !!  15  * Mnemonic names for arguments to memcpy/__copy_user
 16         .align  4;              \              !!  16  */
 17 x:                                             !!  17 
 18                                                !!  18 /*
 19 /* Both these macros have to start with exactl !!  19  * Hack to resolve longstanding prefetch issue
 20 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1 !!  20  *
 21         ldd     [%src + (offset) + 0x00], %t0; !!  21  * Prefetching may be fatal on some systems if we're prefetching beyond the
 22         ldd     [%src + (offset) + 0x08], %t2; !!  22  * end of memory on some systems.  It's also a seriously bad idea on non
 23         ldd     [%src + (offset) + 0x10], %t4; !!  23  * dma-coherent systems.
 24         ldd     [%src + (offset) + 0x18], %t6; !!  24  */
 25         st      %t0, [%dst + (offset) + 0x00]; !!  25 #ifdef CONFIG_DMA_NONCOHERENT
 26         st      %t1, [%dst + (offset) + 0x04]; !!  26 #undef CONFIG_CPU_HAS_PREFETCH
 27         st      %t2, [%dst + (offset) + 0x08]; !!  27 #endif
 28         st      %t3, [%dst + (offset) + 0x0c]; !!  28 #ifdef CONFIG_MIPS_MALTA
 29         st      %t4, [%dst + (offset) + 0x10]; !!  29 #undef CONFIG_CPU_HAS_PREFETCH
 30         st      %t5, [%dst + (offset) + 0x14]; !!  30 #endif
 31         st      %t6, [%dst + (offset) + 0x18]; !!  31 
 32         st      %t7, [%dst + (offset) + 0x1c]; !!  32 #include <asm/asm.h>
 33                                                !!  33 #include <asm/asm-offsets.h>
 34 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t !!  34 #include <asm/regdef.h>
 35         ldd     [%src + (offset) + 0x00], %t0; !!  35 
 36         ldd     [%src + (offset) + 0x08], %t2; !!  36 #define dst a0
 37         ldd     [%src + (offset) + 0x10], %t4; !!  37 #define src a1
 38         ldd     [%src + (offset) + 0x18], %t6; !!  38 #define len a2
 39         std     %t0, [%dst + (offset) + 0x00]; !!  39 
 40         std     %t2, [%dst + (offset) + 0x08]; !!  40 /*
 41         std     %t4, [%dst + (offset) + 0x10]; !!  41  * Spec
 42         std     %t6, [%dst + (offset) + 0x18]; !!  42  *
 43                                                !!  43  * memcpy copies len bytes from src to dst and sets v0 to dst.
 44 #define MOVE_LASTCHUNK(src, dst, offset, t0, t !!  44  * It assumes that
 45         ldd     [%src - (offset) - 0x10], %t0; !!  45  *   - src and dst don't overlap
 46         ldd     [%src - (offset) - 0x08], %t2; !!  46  *   - src is readable
 47         st      %t0, [%dst - (offset) - 0x10]; !!  47  *   - dst is writable
 48         st      %t1, [%dst - (offset) - 0x0c]; !!  48  * memcpy uses the standard calling convention
 49         st      %t2, [%dst - (offset) - 0x08]; !!  49  *
 50         st      %t3, [%dst - (offset) - 0x04]; !!  50  * __copy_user copies up to len bytes from src to dst and sets a2 (len) to
 51                                                !!  51  * the number of uncopied bytes due to an exception caused by a read or write.
 52 #define MOVE_LASTALIGNCHUNK(src, dst, offset,  !!  52  * __copy_user assumes that src and dst don't overlap, and that the call is
 53         ldd     [%src - (offset) - 0x10], %t0; !!  53  * implementing one of the following:
 54         ldd     [%src - (offset) - 0x08], %t2; !!  54  *   copy_to_user
 55         std     %t0, [%dst - (offset) - 0x10]; !!  55  *     - src is readable  (no exceptions when reading src)
 56         std     %t2, [%dst - (offset) - 0x08]; !!  56  *   copy_from_user
 57                                                !!  57  *     - dst is writable  (no exceptions when writing dst)
 58 #define MOVE_SHORTCHUNK(src, dst, offset, t0,  !!  58  * __copy_user uses a non-standard calling convention; see
 59         ldub    [%src - (offset) - 0x02], %t0; !!  59  * include/asm-mips/uaccess.h
 60         ldub    [%src - (offset) - 0x01], %t1; !!  60  *
 61         stb     %t0, [%dst - (offset) - 0x02]; !!  61  * When an exception happens on a load, the handler must
 62         stb     %t1, [%dst - (offset) - 0x01]; !!  62  # ensure that all of the destination buffer is overwritten to prevent
                                                   >>  63  * leaking information to user mode programs.
                                                   >>  64  */
                                                   >>  65 
                                                   >>  66 /*
                                                   >>  67  * Implementation
                                                   >>  68  */
                                                   >>  69 
                                                   >>  70 /*
                                                   >>  71  * The exception handler for loads requires that:
                                                   >>  72  *  1- AT contain the address of the byte just past the end of the source
                                                   >>  73  *     of the copy,
                                                   >>  74  *  2- src_entry <= src < AT, and
                                                   >>  75  *  3- (dst - src) == (dst_entry - src_entry),
                                                   >>  76  * The _entry suffix denotes values when __copy_user was called.
                                                   >>  77  *
                                                   >>  78  * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user
                                                   >>  79  * (2) is met by incrementing src by the number of bytes copied
                                                   >>  80  * (3) is met by not doing loads between a pair of increments of dst and src
                                                   >>  81  *
                                                   >>  82  * The exception handlers for stores adjust len (if necessary) and return.
                                                   >>  83  * These handlers do not need to overwrite any data.
                                                   >>  84  *
                                                   >>  85  * For __rmemcpy and memmove an exception is always a kernel bug, therefore
                                                   >>  86  * they're not protected.
                                                   >>  87  */
                                                   >>  88 
                                                   >>  89 /* Instruction type */
                                                   >>  90 #define LD_INSN 1
                                                   >>  91 #define ST_INSN 2
                                                   >>  92 /* Pretech type */
                                                   >>  93 #define SRC_PREFETCH 1
                                                   >>  94 #define DST_PREFETCH 2
                                                   >>  95 #define LEGACY_MODE 1
                                                   >>  96 #define EVA_MODE    2
                                                   >>  97 #define USEROP   1
                                                   >>  98 #define KERNELOP 2
                                                   >>  99 
                                                   >> 100 /*
                                                   >> 101  * Wrapper to add an entry in the exception table
                                                   >> 102  * in case the insn causes a memory exception.
                                                   >> 103  * Arguments:
                                                   >> 104  * insn    : Load/store instruction
                                                   >> 105  * type    : Instruction type
                                                   >> 106  * reg     : Register
                                                   >> 107  * addr    : Address
                                                   >> 108  * handler : Exception handler
                                                   >> 109  */
                                                   >> 110 
                                                   >> 111 #define EXC(insn, type, reg, addr, handler)                     \
                                                   >> 112         .if \mode == LEGACY_MODE;                               \
                                                   >> 113 9:              insn reg, addr;                                 \
                                                   >> 114                 .section __ex_table,"a";                        \
                                                   >> 115                 PTR     9b, handler;                            \
                                                   >> 116                 .previous;                                      \
                                                   >> 117         /* This is assembled in EVA mode */                     \
                                                   >> 118         .else;                                                  \
                                                   >> 119                 /* If loading from user or storing to user */   \
                                                   >> 120                 .if ((\from == USEROP) && (type == LD_INSN)) || \
                                                   >> 121                     ((\to == USEROP) && (type == ST_INSN));     \
                                                   >> 122 9:                      __BUILD_EVA_INSN(insn##e, reg, addr);   \
                                                   >> 123                         .section __ex_table,"a";                \
                                                   >> 124                         PTR     9b, handler;                    \
                                                   >> 125                         .previous;                              \
                                                   >> 126                 .else;                                          \
                                                   >> 127                         /*                                      \
                                                   >> 128                          *  Still in EVA, but no need for       \
                                                   >> 129                          * exception handler or EVA insn        \
                                                   >> 130                          */                                     \
                                                   >> 131                         insn reg, addr;                         \
                                                   >> 132                 .endif;                                         \
                                                   >> 133         .endif
                                                   >> 134 
                                                   >> 135 /*
                                                   >> 136  * Only on the 64-bit kernel we can made use of 64-bit registers.
                                                   >> 137  */
                                                   >> 138 #ifdef CONFIG_64BIT
                                                   >> 139 #define USE_DOUBLE
                                                   >> 140 #endif
                                                   >> 141 
                                                   >> 142 #ifdef USE_DOUBLE
                                                   >> 143 
                                                   >> 144 #define LOADK ld /* No exception */
                                                   >> 145 #define LOAD(reg, addr, handler)        EXC(ld, LD_INSN, reg, addr, handler)
                                                   >> 146 #define LOADL(reg, addr, handler)       EXC(ldl, LD_INSN, reg, addr, handler)
                                                   >> 147 #define LOADR(reg, addr, handler)       EXC(ldr, LD_INSN, reg, addr, handler)
                                                   >> 148 #define STOREL(reg, addr, handler)      EXC(sdl, ST_INSN, reg, addr, handler)
                                                   >> 149 #define STORER(reg, addr, handler)      EXC(sdr, ST_INSN, reg, addr, handler)
                                                   >> 150 #define STORE(reg, addr, handler)       EXC(sd, ST_INSN, reg, addr, handler)
                                                   >> 151 #define ADD    daddu
                                                   >> 152 #define SUB    dsubu
                                                   >> 153 #define SRL    dsrl
                                                   >> 154 #define SRA    dsra
                                                   >> 155 #define SLL    dsll
                                                   >> 156 #define SLLV   dsllv
                                                   >> 157 #define SRLV   dsrlv
                                                   >> 158 #define NBYTES 8
                                                   >> 159 #define LOG_NBYTES 3
                                                   >> 160 
                                                   >> 161 /*
                                                   >> 162  * As we are sharing code base with the mips32 tree (which use the o32 ABI
                                                   >> 163  * register definitions). We need to redefine the register definitions from
                                                   >> 164  * the n64 ABI register naming to the o32 ABI register naming.
                                                   >> 165  */
                                                   >> 166 #undef t0
                                                   >> 167 #undef t1
                                                   >> 168 #undef t2
                                                   >> 169 #undef t3
                                                   >> 170 #define t0      $8
                                                   >> 171 #define t1      $9
                                                   >> 172 #define t2      $10
                                                   >> 173 #define t3      $11
                                                   >> 174 #define t4      $12
                                                   >> 175 #define t5      $13
                                                   >> 176 #define t6      $14
                                                   >> 177 #define t7      $15
                                                   >> 178 
                                                   >> 179 #else
                                                   >> 180 
                                                   >> 181 #define LOADK lw /* No exception */
                                                   >> 182 #define LOAD(reg, addr, handler)        EXC(lw, LD_INSN, reg, addr, handler)
                                                   >> 183 #define LOADL(reg, addr, handler)       EXC(lwl, LD_INSN, reg, addr, handler)
                                                   >> 184 #define LOADR(reg, addr, handler)       EXC(lwr, LD_INSN, reg, addr, handler)
                                                   >> 185 #define STOREL(reg, addr, handler)      EXC(swl, ST_INSN, reg, addr, handler)
                                                   >> 186 #define STORER(reg, addr, handler)      EXC(swr, ST_INSN, reg, addr, handler)
                                                   >> 187 #define STORE(reg, addr, handler)       EXC(sw, ST_INSN, reg, addr, handler)
                                                   >> 188 #define ADD    addu
                                                   >> 189 #define SUB    subu
                                                   >> 190 #define SRL    srl
                                                   >> 191 #define SLL    sll
                                                   >> 192 #define SRA    sra
                                                   >> 193 #define SLLV   sllv
                                                   >> 194 #define SRLV   srlv
                                                   >> 195 #define NBYTES 4
                                                   >> 196 #define LOG_NBYTES 2
                                                   >> 197 
                                                   >> 198 #endif /* USE_DOUBLE */
                                                   >> 199 
                                                   >> 200 #define LOADB(reg, addr, handler)       EXC(lb, LD_INSN, reg, addr, handler)
                                                   >> 201 #define STOREB(reg, addr, handler)      EXC(sb, ST_INSN, reg, addr, handler)
                                                   >> 202 
                                                   >> 203 #define _PREF(hint, addr, type)                                         \
                                                   >> 204         .if \mode == LEGACY_MODE;                                       \
                                                   >> 205                 PREF(hint, addr);                                       \
                                                   >> 206         .else;                                                          \
                                                   >> 207                 .if ((\from == USEROP) && (type == SRC_PREFETCH)) ||    \
                                                   >> 208                     ((\to == USEROP) && (type == DST_PREFETCH));        \
                                                   >> 209                         /*                                              \
                                                   >> 210                          * PREFE has only 9 bits for the offset         \
                                                   >> 211                          * compared to PREF which has 16, so it may     \
                                                   >> 212                          * need to use the $at register but this        \
                                                   >> 213                          * register should remain intact because it's   \
                                                   >> 214                          * used later on. Therefore use $v1.            \
                                                   >> 215                          */                                             \
                                                   >> 216                         .set at=v1;                                     \
                                                   >> 217                         PREFE(hint, addr);                              \
                                                   >> 218                         .set noat;                                      \
                                                   >> 219                 .else;                                                  \
                                                   >> 220                         PREF(hint, addr);                               \
                                                   >> 221                 .endif;                                                 \
                                                   >> 222         .endif
                                                   >> 223 
                                                   >> 224 #define PREFS(hint, addr) _PREF(hint, addr, SRC_PREFETCH)
                                                   >> 225 #define PREFD(hint, addr) _PREF(hint, addr, DST_PREFETCH)
                                                   >> 226 
                                                   >> 227 #ifdef CONFIG_CPU_LITTLE_ENDIAN
                                                   >> 228 #define LDFIRST LOADR
                                                   >> 229 #define LDREST  LOADL
                                                   >> 230 #define STFIRST STORER
                                                   >> 231 #define STREST  STOREL
                                                   >> 232 #define SHIFT_DISCARD SLLV
                                                   >> 233 #else
                                                   >> 234 #define LDFIRST LOADL
                                                   >> 235 #define LDREST  LOADR
                                                   >> 236 #define STFIRST STOREL
                                                   >> 237 #define STREST  STORER
                                                   >> 238 #define SHIFT_DISCARD SRLV
                                                   >> 239 #endif
                                                   >> 240 
                                                   >> 241 #define FIRST(unit) ((unit)*NBYTES)
                                                   >> 242 #define REST(unit)  (FIRST(unit)+NBYTES-1)
                                                   >> 243 #define UNIT(unit)  FIRST(unit)
                                                   >> 244 
                                                   >> 245 #define ADDRMASK (NBYTES-1)
 63                                                   246 
 64         .text                                     247         .text
                                                   >> 248         .set    noreorder
                                                   >> 249 #ifndef CONFIG_CPU_DADDI_WORKAROUNDS
                                                   >> 250         .set    noat
                                                   >> 251 #else
                                                   >> 252         .set    at=v1
                                                   >> 253 #endif
                                                   >> 254 
                                                   >> 255         .align  5
                                                   >> 256 
                                                   >> 257         /*
                                                   >> 258          * Macro to build the __copy_user common code
                                                   >> 259          * Arguments:
                                                   >> 260          * mode : LEGACY_MODE or EVA_MODE
                                                   >> 261          * from : Source operand. USEROP or KERNELOP
                                                   >> 262          * to   : Destination operand. USEROP or KERNELOP
                                                   >> 263          */
                                                   >> 264         .macro __BUILD_COPY_USER mode, from, to
                                                   >> 265 
                                                   >> 266         /* initialize __memcpy if this the first time we execute this macro */
                                                   >> 267         .ifnotdef __memcpy
                                                   >> 268         .set __memcpy, 1
                                                   >> 269         .hidden __memcpy /* make sure it does not leak */
                                                   >> 270         .endif
                                                   >> 271 
                                                   >> 272         /*
                                                   >> 273          * Note: dst & src may be unaligned, len may be 0
                                                   >> 274          * Temps
                                                   >> 275          */
                                                   >> 276 #define rem t8
                                                   >> 277 
                                                   >> 278         R10KCBARRIER(0(ra))
                                                   >> 279         /*
                                                   >> 280          * The "issue break"s below are very approximate.
                                                   >> 281          * Issue delays for dcache fills will perturb the schedule, as will
                                                   >> 282          * load queue full replay traps, etc.
                                                   >> 283          *
                                                   >> 284          * If len < NBYTES use byte operations.
                                                   >> 285          */
                                                   >> 286         PREFS(  0, 0(src) )
                                                   >> 287         PREFD(  1, 0(dst) )
                                                   >> 288         sltu    t2, len, NBYTES
                                                   >> 289         and     t1, dst, ADDRMASK
                                                   >> 290         PREFS(  0, 1*32(src) )
                                                   >> 291         PREFD(  1, 1*32(dst) )
                                                   >> 292         bnez    t2, .Lcopy_bytes_checklen\@
                                                   >> 293          and    t0, src, ADDRMASK
                                                   >> 294         PREFS(  0, 2*32(src) )
                                                   >> 295         PREFD(  1, 2*32(dst) )
                                                   >> 296 #ifndef CONFIG_CPU_MIPSR6
                                                   >> 297         bnez    t1, .Ldst_unaligned\@
                                                   >> 298          nop
                                                   >> 299         bnez    t0, .Lsrc_unaligned_dst_aligned\@
                                                   >> 300 #else
                                                   >> 301         or      t0, t0, t1
                                                   >> 302         bnez    t0, .Lcopy_unaligned_bytes\@
                                                   >> 303 #endif
                                                   >> 304         /*
                                                   >> 305          * use delay slot for fall-through
                                                   >> 306          * src and dst are aligned; need to compute rem
                                                   >> 307          */
                                                   >> 308 .Lboth_aligned\@:
                                                   >> 309          SRL    t0, len, LOG_NBYTES+3    # +3 for 8 units/iter
                                                   >> 310         beqz    t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
                                                   >> 311          and    rem, len, (8*NBYTES-1)   # rem = len % (8*NBYTES)
                                                   >> 312         PREFS(  0, 3*32(src) )
                                                   >> 313         PREFD(  1, 3*32(dst) )
 65         .align  4                                 314         .align  4
 66                                                << 
 67 FUNC(memmove)                                  << 
 68 EXPORT_SYMBOL(memmove)                         << 
 69         cmp             %o0, %o1               << 
 70         mov             %o0, %g7               << 
 71         bleu            9f                     << 
 72          sub            %o0, %o1, %o4          << 
 73                                                << 
 74         add             %o1, %o2, %o3          << 
 75         cmp             %o3, %o0               << 
 76         bleu            0f                     << 
 77          andcc          %o4, 3, %o5            << 
 78                                                << 
 79         add             %o1, %o2, %o1          << 
 80         add             %o0, %o2, %o0          << 
 81         sub             %o1, 1, %o1            << 
 82         sub             %o0, 1, %o0            << 
 83                                                << 
 84 1:      /* reverse_bytes */                    << 
 85                                                << 
 86         ldub            [%o1], %o4             << 
 87         subcc           %o2, 1, %o2            << 
 88         stb             %o4, [%o0]             << 
 89         sub             %o1, 1, %o1            << 
 90         bne             1b                     << 
 91          sub            %o0, 1, %o0            << 
 92                                                << 
 93         retl                                   << 
 94          mov            %g7, %o0               << 
 95                                                << 
 96 /* NOTE: This code is executed just for the ca << 
 97          where %src (=%o1) & 3 is != 0.        << 
 98          We need to align it to 4. So, for (%s << 
 99          1 we need to do ldub,lduh             << 
100          2 lduh                                << 
101          3 just ldub                           << 
102          so even if it looks weird, the branch << 
103          are correct here. -jj                 << 
104  */                                            << 
105 78:     /* dword_align */                      << 
106                                                << 
107         andcc           %o1, 1, %g0            << 
108         be              4f                     << 
109          andcc          %o1, 2, %g0            << 
110                                                << 
111         ldub            [%o1], %g2             << 
112         add             %o1, 1, %o1            << 
113         stb             %g2, [%o0]             << 
114         sub             %o2, 1, %o2            << 
115         bne             3f                     << 
116          add            %o0, 1, %o0            << 
117 4:                                             << 
118         lduh            [%o1], %g2             << 
119         add             %o1, 2, %o1            << 
120         sth             %g2, [%o0]             << 
121         sub             %o2, 2, %o2            << 
122         b               3f                     << 
123          add            %o0, 2, %o0            << 
124                                                << 
125 FUNC(memcpy)    /* %o0=dst %o1=src %o2=len */  << 
126 EXPORT_SYMBOL(memcpy)                          << 
127                                                << 
128         sub             %o0, %o1, %o4          << 
129         mov             %o0, %g7               << 
130 9:                                             << 
131         andcc           %o4, 3, %o5            << 
132 0:                                             << 
133         bne             86f                    << 
134          cmp            %o2, 15                << 
135                                                << 
136         bleu            90f                    << 
137          andcc          %o1, 3, %g0            << 
138                                                << 
139         bne             78b                    << 
140 3:                                             << 
141          andcc          %o1, 4, %g0            << 
142                                                << 
143         be              2f                     << 
144          mov            %o2, %g1               << 
145                                                << 
146         ld              [%o1], %o4             << 
147         sub             %g1, 4, %g1            << 
148         st              %o4, [%o0]             << 
149         add             %o1, 4, %o1            << 
150         add             %o0, 4, %o0            << 
151 2:                                             << 
152         andcc           %g1, 0xffffff80, %g0   << 
153         be              3f                     << 
154          andcc          %o0, 4, %g0            << 
155                                                << 
156         be              82f + 4                << 
157 5:                                             << 
158         MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4 << 
159         MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4 << 
160         MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4 << 
161         MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4 << 
162         sub             %g1, 128, %g1          << 
163         add             %o1, 128, %o1          << 
164         cmp             %g1, 128               << 
165         bge             5b                     << 
166          add            %o0, 128, %o0          << 
167 3:                                             << 
168         andcc           %g1, 0x70, %g4         << 
169         be              80f                    << 
170          andcc          %g1, 8, %g0            << 
171                                                << 
172         sethi           %hi(80f), %o5          << 
173         srl             %g4, 1, %o4            << 
174         add             %g4, %o4, %o4          << 
175         add             %o1, %g4, %o1          << 
176         sub             %o5, %o4, %o5          << 
177         jmpl            %o5 + %lo(80f), %g0    << 
178          add            %o0, %g4, %o0          << 
179                                                << 
180 79:     /* memcpy_table */                     << 
181                                                << 
182         MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g << 
183         MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g << 
184         MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g << 
185         MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g << 
186         MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g << 
187         MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g << 
188         MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g << 
189                                                << 
190 80:     /* memcpy_table_end */                 << 
191         be              81f                    << 
192          andcc          %g1, 4, %g0            << 
193                                                << 
194         ldd             [%o1], %g2             << 
195         add             %o0, 8, %o0            << 
196         st              %g2, [%o0 - 0x08]      << 
197         add             %o1, 8, %o1            << 
198         st              %g3, [%o0 - 0x04]      << 
199                                                << 
200 81:     /* memcpy_last7 */                     << 
201                                                << 
202         be              1f                     << 
203          andcc          %g1, 2, %g0            << 
204                                                << 
205         ld              [%o1], %g2             << 
206         add             %o1, 4, %o1            << 
207         st              %g2, [%o0]             << 
208         add             %o0, 4, %o0            << 
209 1:                                                315 1:
210         be              1f                     !! 316         R10KCBARRIER(0(ra))
211          andcc          %g1, 1, %g0            !! 317         LOAD(t0, UNIT(0)(src), .Ll_exc\@)
                                                   >> 318         LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@)
                                                   >> 319         LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@)
                                                   >> 320         LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)
                                                   >> 321         SUB     len, len, 8*NBYTES
                                                   >> 322         LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@)
                                                   >> 323         LOAD(t7, UNIT(5)(src), .Ll_exc_copy\@)
                                                   >> 324         STORE(t0, UNIT(0)(dst), .Ls_exc_p8u\@)
                                                   >> 325         STORE(t1, UNIT(1)(dst), .Ls_exc_p7u\@)
                                                   >> 326         LOAD(t0, UNIT(6)(src), .Ll_exc_copy\@)
                                                   >> 327         LOAD(t1, UNIT(7)(src), .Ll_exc_copy\@)
                                                   >> 328         ADD     src, src, 8*NBYTES
                                                   >> 329         ADD     dst, dst, 8*NBYTES
                                                   >> 330         STORE(t2, UNIT(-6)(dst), .Ls_exc_p6u\@)
                                                   >> 331         STORE(t3, UNIT(-5)(dst), .Ls_exc_p5u\@)
                                                   >> 332         STORE(t4, UNIT(-4)(dst), .Ls_exc_p4u\@)
                                                   >> 333         STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u\@)
                                                   >> 334         STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u\@)
                                                   >> 335         STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u\@)
                                                   >> 336         PREFS(  0, 8*32(src) )
                                                   >> 337         PREFD(  1, 8*32(dst) )
                                                   >> 338         bne     len, rem, 1b
                                                   >> 339          nop
212                                                   340 
213         lduh            [%o1], %g2             !! 341         /*
214         add             %o1, 2, %o1            !! 342          * len == rem == the number of bytes left to copy < 8*NBYTES
215         sth             %g2, [%o0]             !! 343          */
216         add             %o0, 2, %o0            !! 344 .Lcleanup_both_aligned\@:
                                                   >> 345         beqz    len, .Ldone\@
                                                   >> 346          sltu   t0, len, 4*NBYTES
                                                   >> 347         bnez    t0, .Lless_than_4units\@
                                                   >> 348          and    rem, len, (NBYTES-1)    # rem = len % NBYTES
                                                   >> 349         /*
                                                   >> 350          * len >= 4*NBYTES
                                                   >> 351          */
                                                   >> 352         LOAD( t0, UNIT(0)(src), .Ll_exc\@)
                                                   >> 353         LOAD( t1, UNIT(1)(src), .Ll_exc_copy\@)
                                                   >> 354         LOAD( t2, UNIT(2)(src), .Ll_exc_copy\@)
                                                   >> 355         LOAD( t3, UNIT(3)(src), .Ll_exc_copy\@)
                                                   >> 356         SUB     len, len, 4*NBYTES
                                                   >> 357         ADD     src, src, 4*NBYTES
                                                   >> 358         R10KCBARRIER(0(ra))
                                                   >> 359         STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@)
                                                   >> 360         STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@)
                                                   >> 361         STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@)
                                                   >> 362         STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@)
                                                   >> 363         .set    reorder                         /* DADDI_WAR */
                                                   >> 364         ADD     dst, dst, 4*NBYTES
                                                   >> 365         beqz    len, .Ldone\@
                                                   >> 366         .set    noreorder
                                                   >> 367 .Lless_than_4units\@:
                                                   >> 368         /*
                                                   >> 369          * rem = len % NBYTES
                                                   >> 370          */
                                                   >> 371         beq     rem, len, .Lcopy_bytes\@
                                                   >> 372          nop
                                                   >> 373 1:
                                                   >> 374         R10KCBARRIER(0(ra))
                                                   >> 375         LOAD(t0, 0(src), .Ll_exc\@)
                                                   >> 376         ADD     src, src, NBYTES
                                                   >> 377         SUB     len, len, NBYTES
                                                   >> 378         STORE(t0, 0(dst), .Ls_exc_p1u\@)
                                                   >> 379         .set    reorder                         /* DADDI_WAR */
                                                   >> 380         ADD     dst, dst, NBYTES
                                                   >> 381         bne     rem, len, 1b
                                                   >> 382         .set    noreorder
                                                   >> 383 
                                                   >> 384 #ifndef CONFIG_CPU_MIPSR6
                                                   >> 385         /*
                                                   >> 386          * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
                                                   >> 387          * A loop would do only a byte at a time with possible branch
                                                   >> 388          * mispredicts.  Can't do an explicit LOAD dst,mask,or,STORE
                                                   >> 389          * because can't assume read-access to dst.  Instead, use
                                                   >> 390          * STREST dst, which doesn't require read access to dst.
                                                   >> 391          *
                                                   >> 392          * This code should perform better than a simple loop on modern,
                                                   >> 393          * wide-issue mips processors because the code has fewer branches and
                                                   >> 394          * more instruction-level parallelism.
                                                   >> 395          */
                                                   >> 396 #define bits t2
                                                   >> 397         beqz    len, .Ldone\@
                                                   >> 398          ADD    t1, dst, len    # t1 is just past last byte of dst
                                                   >> 399         li      bits, 8*NBYTES
                                                   >> 400         SLL     rem, len, 3     # rem = number of bits to keep
                                                   >> 401         LOAD(t0, 0(src), .Ll_exc\@)
                                                   >> 402         SUB     bits, bits, rem # bits = number of bits to discard
                                                   >> 403         SHIFT_DISCARD t0, t0, bits
                                                   >> 404         STREST(t0, -1(t1), .Ls_exc\@)
                                                   >> 405         jr      ra
                                                   >> 406          move   len, zero
                                                   >> 407 .Ldst_unaligned\@:
                                                   >> 408         /*
                                                   >> 409          * dst is unaligned
                                                   >> 410          * t0 = src & ADDRMASK
                                                   >> 411          * t1 = dst & ADDRMASK; T1 > 0
                                                   >> 412          * len >= NBYTES
                                                   >> 413          *
                                                   >> 414          * Copy enough bytes to align dst
                                                   >> 415          * Set match = (src and dst have same alignment)
                                                   >> 416          */
                                                   >> 417 #define match rem
                                                   >> 418         LDFIRST(t3, FIRST(0)(src), .Ll_exc\@)
                                                   >> 419         ADD     t2, zero, NBYTES
                                                   >> 420         LDREST(t3, REST(0)(src), .Ll_exc_copy\@)
                                                   >> 421         SUB     t2, t2, t1      # t2 = number of bytes copied
                                                   >> 422         xor     match, t0, t1
                                                   >> 423         R10KCBARRIER(0(ra))
                                                   >> 424         STFIRST(t3, FIRST(0)(dst), .Ls_exc\@)
                                                   >> 425         beq     len, t2, .Ldone\@
                                                   >> 426          SUB    len, len, t2
                                                   >> 427         ADD     dst, dst, t2
                                                   >> 428         beqz    match, .Lboth_aligned\@
                                                   >> 429          ADD    src, src, t2
                                                   >> 430 
                                                   >> 431 .Lsrc_unaligned_dst_aligned\@:
                                                   >> 432         SRL     t0, len, LOG_NBYTES+2    # +2 for 4 units/iter
                                                   >> 433         PREFS(  0, 3*32(src) )
                                                   >> 434         beqz    t0, .Lcleanup_src_unaligned\@
                                                   >> 435          and    rem, len, (4*NBYTES-1)   # rem = len % 4*NBYTES
                                                   >> 436         PREFD(  1, 3*32(dst) )
217 1:                                                437 1:
218         be              1f                     !! 438 /*
                                                   >> 439  * Avoid consecutive LD*'s to the same register since some mips
                                                   >> 440  * implementations can't issue them in the same cycle.
                                                   >> 441  * It's OK to load FIRST(N+1) before REST(N) because the two addresses
                                                   >> 442  * are to the same unit (unless src is aligned, but it's not).
                                                   >> 443  */
                                                   >> 444         R10KCBARRIER(0(ra))
                                                   >> 445         LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
                                                   >> 446         LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@)
                                                   >> 447         SUB     len, len, 4*NBYTES
                                                   >> 448         LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
                                                   >> 449         LDREST(t1, REST(1)(src), .Ll_exc_copy\@)
                                                   >> 450         LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@)
                                                   >> 451         LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@)
                                                   >> 452         LDREST(t2, REST(2)(src), .Ll_exc_copy\@)
                                                   >> 453         LDREST(t3, REST(3)(src), .Ll_exc_copy\@)
                                                   >> 454         PREFS(  0, 9*32(src) )          # 0 is PREF_LOAD  (not streamed)
                                                   >> 455         ADD     src, src, 4*NBYTES
                                                   >> 456 #ifdef CONFIG_CPU_SB1
                                                   >> 457         nop                             # improves slotting
                                                   >> 458 #endif
                                                   >> 459         STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@)
                                                   >> 460         STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@)
                                                   >> 461         STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@)
                                                   >> 462         STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@)
                                                   >> 463         PREFD(  1, 9*32(dst) )          # 1 is PREF_STORE (not streamed)
                                                   >> 464         .set    reorder                         /* DADDI_WAR */
                                                   >> 465         ADD     dst, dst, 4*NBYTES
                                                   >> 466         bne     len, rem, 1b
                                                   >> 467         .set    noreorder
                                                   >> 468 
                                                   >> 469 .Lcleanup_src_unaligned\@:
                                                   >> 470         beqz    len, .Ldone\@
                                                   >> 471          and    rem, len, NBYTES-1  # rem = len % NBYTES
                                                   >> 472         beq     rem, len, .Lcopy_bytes\@
219          nop                                      473          nop
220                                                << 
221         ldub            [%o1], %g2             << 
222         stb             %g2, [%o0]             << 
223 1:                                                474 1:
224         retl                                   !! 475         R10KCBARRIER(0(ra))
225          mov            %g7, %o0               !! 476         LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
                                                   >> 477         LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
                                                   >> 478         ADD     src, src, NBYTES
                                                   >> 479         SUB     len, len, NBYTES
                                                   >> 480         STORE(t0, 0(dst), .Ls_exc_p1u\@)
                                                   >> 481         .set    reorder                         /* DADDI_WAR */
                                                   >> 482         ADD     dst, dst, NBYTES
                                                   >> 483         bne     len, rem, 1b
                                                   >> 484         .set    noreorder
                                                   >> 485 
                                                   >> 486 #endif /* !CONFIG_CPU_MIPSR6 */
                                                   >> 487 .Lcopy_bytes_checklen\@:
                                                   >> 488         beqz    len, .Ldone\@
                                                   >> 489          nop
                                                   >> 490 .Lcopy_bytes\@:
                                                   >> 491         /* 0 < len < NBYTES  */
                                                   >> 492         R10KCBARRIER(0(ra))
                                                   >> 493 #define COPY_BYTE(N)                    \
                                                   >> 494         LOADB(t0, N(src), .Ll_exc\@);   \
                                                   >> 495         SUB     len, len, 1;            \
                                                   >> 496         beqz    len, .Ldone\@;          \
                                                   >> 497         STOREB(t0, N(dst), .Ls_exc_p1\@)
                                                   >> 498 
                                                   >> 499         COPY_BYTE(0)
                                                   >> 500         COPY_BYTE(1)
                                                   >> 501 #ifdef USE_DOUBLE
                                                   >> 502         COPY_BYTE(2)
                                                   >> 503         COPY_BYTE(3)
                                                   >> 504         COPY_BYTE(4)
                                                   >> 505         COPY_BYTE(5)
                                                   >> 506 #endif
                                                   >> 507         LOADB(t0, NBYTES-2(src), .Ll_exc\@)
                                                   >> 508         SUB     len, len, 1
                                                   >> 509         jr      ra
                                                   >> 510         STOREB(t0, NBYTES-2(dst), .Ls_exc_p1\@)
                                                   >> 511 .Ldone\@:
                                                   >> 512         jr      ra
                                                   >> 513          nop
226                                                   514 
227 82:     /* ldd_std */                          !! 515 #ifdef CONFIG_CPU_MIPSR6
228         MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o !! 516 .Lcopy_unaligned_bytes\@:
229         MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o << 
230         MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o << 
231         MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o << 
232         subcc           %g1, 128, %g1          << 
233         add             %o1, 128, %o1          << 
234         cmp             %g1, 128               << 
235         bge             82b                    << 
236          add            %o0, 128, %o0          << 
237                                                << 
238         andcc           %g1, 0x70, %g4         << 
239         be              84f                    << 
240          andcc          %g1, 8, %g0            << 
241                                                << 
242         sethi           %hi(84f), %o5          << 
243         add             %o1, %g4, %o1          << 
244         sub             %o5, %g4, %o5          << 
245         jmpl            %o5 + %lo(84f), %g0    << 
246          add            %o0, %g4, %o0          << 
247                                                << 
248 83:     /* amemcpy_table */                    << 
249                                                << 
250         MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2,  << 
251         MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2,  << 
252         MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2,  << 
253         MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2,  << 
254         MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2,  << 
255         MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2,  << 
256         MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2,  << 
257                                                << 
258 84:     /* amemcpy_table_end */                << 
259         be              85f                    << 
260          andcc          %g1, 4, %g0            << 
261                                                << 
262         ldd             [%o1], %g2             << 
263         add             %o0, 8, %o0            << 
264         std             %g2, [%o0 - 0x08]      << 
265         add             %o1, 8, %o1            << 
266 85:     /* amemcpy_last7 */                    << 
267         be              1f                     << 
268          andcc          %g1, 2, %g0            << 
269                                                << 
270         ld              [%o1], %g2             << 
271         add             %o1, 4, %o1            << 
272         st              %g2, [%o0]             << 
273         add             %o0, 4, %o0            << 
274 1:                                                517 1:
275         be              1f                     !! 518         COPY_BYTE(0)
276          andcc          %g1, 1, %g0            !! 519         COPY_BYTE(1)
277                                                !! 520         COPY_BYTE(2)
278         lduh            [%o1], %g2             !! 521         COPY_BYTE(3)
279         add             %o1, 2, %o1            !! 522         COPY_BYTE(4)
280         sth             %g2, [%o0]             !! 523         COPY_BYTE(5)
281         add             %o0, 2, %o0            !! 524         COPY_BYTE(6)
                                                   >> 525         COPY_BYTE(7)
                                                   >> 526         ADD     src, src, 8
                                                   >> 527         b       1b
                                                   >> 528          ADD    dst, dst, 8
                                                   >> 529 #endif /* CONFIG_CPU_MIPSR6 */
                                                   >> 530         .if __memcpy == 1
                                                   >> 531         END(memcpy)
                                                   >> 532         .set __memcpy, 0
                                                   >> 533         .hidden __memcpy
                                                   >> 534         .endif
                                                   >> 535 
                                                   >> 536 .Ll_exc_copy\@:
                                                   >> 537         /*
                                                   >> 538          * Copy bytes from src until faulting load address (or until a
                                                   >> 539          * lb faults)
                                                   >> 540          *
                                                   >> 541          * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
                                                   >> 542          * may be more than a byte beyond the last address.
                                                   >> 543          * Hence, the lb below may get an exception.
                                                   >> 544          *
                                                   >> 545          * Assumes src < THREAD_BUADDR($28)
                                                   >> 546          */
                                                   >> 547         LOADK   t0, TI_TASK($28)
                                                   >> 548          nop
                                                   >> 549         LOADK   t0, THREAD_BUADDR(t0)
282 1:                                                550 1:
283         be              1f                     !! 551         LOADB(t1, 0(src), .Ll_exc\@)
                                                   >> 552         ADD     src, src, 1
                                                   >> 553         sb      t1, 0(dst)      # can't fault -- we're copy_from_user
                                                   >> 554         .set    reorder                         /* DADDI_WAR */
                                                   >> 555         ADD     dst, dst, 1
                                                   >> 556         bne     src, t0, 1b
                                                   >> 557         .set    noreorder
                                                   >> 558 .Ll_exc\@:
                                                   >> 559         LOADK   t0, TI_TASK($28)
                                                   >> 560          nop
                                                   >> 561         LOADK   t0, THREAD_BUADDR(t0)   # t0 is just past last good address
                                                   >> 562          nop
                                                   >> 563         SUB     len, AT, t0             # len number of uncopied bytes
                                                   >> 564         bnez    t6, .Ldone\@    /* Skip the zeroing part if inatomic */
                                                   >> 565         /*
                                                   >> 566          * Here's where we rely on src and dst being incremented in tandem,
                                                   >> 567          *   See (3) above.
                                                   >> 568          * dst += (fault addr - src) to put dst at first byte to clear
                                                   >> 569          */
                                                   >> 570         ADD     dst, t0                 # compute start address in a1
                                                   >> 571         SUB     dst, src
                                                   >> 572         /*
                                                   >> 573          * Clear len bytes starting at dst.  Can't call __bzero because it
                                                   >> 574          * might modify len.  An inefficient loop for these rare times...
                                                   >> 575          */
                                                   >> 576         .set    reorder                         /* DADDI_WAR */
                                                   >> 577         SUB     src, len, 1
                                                   >> 578         beqz    len, .Ldone\@
                                                   >> 579         .set    noreorder
                                                   >> 580 1:      sb      zero, 0(dst)
                                                   >> 581         ADD     dst, dst, 1
                                                   >> 582 #ifndef CONFIG_CPU_DADDI_WORKAROUNDS
                                                   >> 583         bnez    src, 1b
                                                   >> 584          SUB    src, src, 1
                                                   >> 585 #else
                                                   >> 586         .set    push
                                                   >> 587         .set    noat
                                                   >> 588         li      v1, 1
                                                   >> 589         bnez    src, 1b
                                                   >> 590          SUB    src, src, v1
                                                   >> 591         .set    pop
                                                   >> 592 #endif
                                                   >> 593         jr      ra
284          nop                                      594          nop
285                                                   595 
286         ldub            [%o1], %g2             << 
287         stb             %g2, [%o0]             << 
288 1:                                             << 
289         retl                                   << 
290          mov            %g7, %o0               << 
291                                                   596 
292 86:     /* non_aligned */                      !! 597 #define SEXC(n)                                                 \
293         cmp             %o2, 6                 !! 598         .set    reorder;                        /* DADDI_WAR */ \
294         bleu            88f                    !! 599 .Ls_exc_p ## n ## u\@:                                          \
295          nop                                   !! 600         ADD     len, len, n*NBYTES;                             \
296                                                !! 601         jr      ra;                                             \
297         save            %sp, -96, %sp          !! 602         .set    noreorder
298         andcc           %i0, 3, %g0            !! 603 
299         be              61f                    !! 604 SEXC(8)
300          andcc          %i0, 1, %g0            !! 605 SEXC(7)
301         be              60f                    !! 606 SEXC(6)
302          andcc          %i0, 2, %g0            !! 607 SEXC(5)
303                                                !! 608 SEXC(4)
304         ldub            [%i1], %g5             !! 609 SEXC(3)
305         add             %i1, 1, %i1            !! 610 SEXC(2)
306         stb             %g5, [%i0]             !! 611 SEXC(1)
307         sub             %i2, 1, %i2            !! 612 
308         bne             61f                    !! 613 .Ls_exc_p1\@:
309          add            %i0, 1, %i0            !! 614         .set    reorder                         /* DADDI_WAR */
310 60:                                            !! 615         ADD     len, len, 1
311         ldub            [%i1], %g3             !! 616         jr      ra
312         add             %i1, 2, %i1            !! 617         .set    noreorder
313         stb             %g3, [%i0]             !! 618 .Ls_exc\@:
314         sub             %i2, 2, %i2            !! 619         jr      ra
315         ldub            [%i1 - 1], %g3         << 
316         add             %i0, 2, %i0            << 
317         stb             %g3, [%i0 - 1]         << 
318 61:                                            << 
319         and             %i1, 3, %g2            << 
320         and             %i2, 0xc, %g3          << 
321         and             %i1, -4, %i1           << 
322         cmp             %g3, 4                 << 
323         sll             %g2, 3, %g4            << 
324         mov             32, %g2                << 
325         be              4f                     << 
326          sub            %g2, %g4, %l0          << 
327                                                << 
328         blu             3f                     << 
329          cmp            %g3, 0x8               << 
330                                                << 
331         be              2f                     << 
332          srl            %i2, 2, %g3            << 
333                                                << 
334         ld              [%i1], %i3             << 
335         add             %i0, -8, %i0           << 
336         ld              [%i1 + 4], %i4         << 
337         b               8f                     << 
338          add            %g3, 1, %g3            << 
339 2:                                             << 
340         ld              [%i1], %i4             << 
341         add             %i0, -12, %i0          << 
342         ld              [%i1 + 4], %i5         << 
343         add             %g3, 2, %g3            << 
344         b               9f                     << 
345          add            %i1, -4, %i1           << 
346 3:                                             << 
347         ld              [%i1], %g1             << 
348         add             %i0, -4, %i0           << 
349         ld              [%i1 + 4], %i3         << 
350         srl             %i2, 2, %g3            << 
351         b               7f                     << 
352          add            %i1, 4, %i1            << 
353 4:                                             << 
354         ld              [%i1], %i5             << 
355         cmp             %i2, 7                 << 
356         ld              [%i1 + 4], %g1         << 
357         srl             %i2, 2, %g3            << 
358         bleu            10f                    << 
359          add            %i1, 8, %i1            << 
360                                                << 
361         ld              [%i1], %i3             << 
362         add             %g3, -1, %g3           << 
363 5:                                             << 
364         sll             %i5, %g4, %g2          << 
365         srl             %g1, %l0, %g5          << 
366         or              %g2, %g5, %g2          << 
367         st              %g2, [%i0]             << 
368 7:                                             << 
369         ld              [%i1 + 4], %i4         << 
370         sll             %g1, %g4, %g2          << 
371         srl             %i3, %l0, %g5          << 
372         or              %g2, %g5, %g2          << 
373         st              %g2, [%i0 + 4]         << 
374 8:                                             << 
375         ld              [%i1 + 8], %i5         << 
376         sll             %i3, %g4, %g2          << 
377         srl             %i4, %l0, %g5          << 
378         or              %g2, %g5, %g2          << 
379         st              %g2, [%i0 + 8]         << 
380 9:                                             << 
381         ld              [%i1 + 12], %g1        << 
382         sll             %i4, %g4, %g2          << 
383         srl             %i5, %l0, %g5          << 
384         addcc           %g3, -4, %g3           << 
385         or              %g2, %g5, %g2          << 
386         add             %i1, 16, %i1           << 
387         st              %g2, [%i0 + 12]        << 
388         add             %i0, 16, %i0           << 
389         bne,a           5b                     << 
390          ld             [%i1], %i3             << 
391 10:                                            << 
392         sll             %i5, %g4, %g2          << 
393         srl             %g1, %l0, %g5          << 
394         srl             %l0, 3, %g3            << 
395         or              %g2, %g5, %g2          << 
396         sub             %i1, %g3, %i1          << 
397         andcc           %i2, 2, %g0            << 
398         st              %g2, [%i0]             << 
399         be              1f                     << 
400          andcc          %i2, 1, %g0            << 
401                                                << 
402         ldub            [%i1], %g2             << 
403         add             %i1, 2, %i1            << 
404         stb             %g2, [%i0 + 4]         << 
405         add             %i0, 2, %i0            << 
406         ldub            [%i1 - 1], %g2         << 
407         stb             %g2, [%i0 + 3]         << 
408 1:                                             << 
409         be              1f                     << 
410          nop                                      620          nop
411         ldub            [%i1], %g2             !! 621         .endm
412         stb             %g2, [%i0 + 4]         << 
413 1:                                             << 
414         ret                                    << 
415          restore        %g7, %g0, %o0          << 
416                                                   622 
417 88:     /* short_end */                        !! 623         .align  5
                                                   >> 624 LEAF(memmove)
                                                   >> 625         ADD     t0, a0, a2
                                                   >> 626         ADD     t1, a1, a2
                                                   >> 627         sltu    t0, a1, t0                      # dst + len <= src -> memcpy
                                                   >> 628         sltu    t1, a0, t1                      # dst >= src + len -> memcpy
                                                   >> 629         and     t0, t1
                                                   >> 630         beqz    t0, .L__memcpy
                                                   >> 631          move   v0, a0                          /* return value */
                                                   >> 632         beqz    a2, .Lr_out
                                                   >> 633         END(memmove)
                                                   >> 634 
                                                   >> 635         /* fall through to __rmemcpy */
                                                   >> 636 LEAF(__rmemcpy)                                 /* a0=dst a1=src a2=len */
                                                   >> 637          sltu   t0, a1, a0
                                                   >> 638         beqz    t0, .Lr_end_bytes_up            # src >= dst
                                                   >> 639          nop
                                                   >> 640         ADD     a0, a2                          # dst = dst + len
                                                   >> 641         ADD     a1, a2                          # src = src + len
418                                                   642 
419         and             %o2, 0xe, %o3          !! 643 .Lr_end_bytes:
420 20:                                            !! 644         R10KCBARRIER(0(ra))
421         sethi           %hi(89f), %o5          !! 645         lb      t0, -1(a1)
422         sll             %o3, 3, %o4            !! 646         SUB     a2, a2, 0x1
423         add             %o0, %o3, %o0          !! 647         sb      t0, -1(a0)
424         sub             %o5, %o4, %o5          !! 648         SUB     a1, a1, 0x1
425         add             %o1, %o3, %o1          !! 649         .set    reorder                         /* DADDI_WAR */
426         jmpl            %o5 + %lo(89f), %g0    !! 650         SUB     a0, a0, 0x1
427          andcc          %o2, 1, %g0            !! 651         bnez    a2, .Lr_end_bytes
428                                                !! 652         .set    noreorder
429         MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)  !! 653 
430         MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)  !! 654 .Lr_out:
431         MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)  !! 655         jr      ra
432         MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)  !! 656          move   a2, zero
433         MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)  !! 657 
434         MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)  !! 658 .Lr_end_bytes_up:
435         MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)  !! 659         R10KCBARRIER(0(ra))
                                                   >> 660         lb      t0, (a1)
                                                   >> 661         SUB     a2, a2, 0x1
                                                   >> 662         sb      t0, (a0)
                                                   >> 663         ADD     a1, a1, 0x1
                                                   >> 664         .set    reorder                         /* DADDI_WAR */
                                                   >> 665         ADD     a0, a0, 0x1
                                                   >> 666         bnez    a2, .Lr_end_bytes_up
                                                   >> 667         .set    noreorder
                                                   >> 668 
                                                   >> 669         jr      ra
                                                   >> 670          move   a2, zero
                                                   >> 671         END(__rmemcpy)
436                                                   672 
437 89:     /* short_table_end */                  !! 673 /*
                                                   >> 674  * t6 is used as a flag to note inatomic mode.
                                                   >> 675  */
                                                   >> 676 LEAF(__copy_user_inatomic)
                                                   >> 677         b       __copy_user_common
                                                   >> 678         li      t6, 1
                                                   >> 679         END(__copy_user_inatomic)
                                                   >> 680 
                                                   >> 681 /*
                                                   >> 682  * A combined memcpy/__copy_user
                                                   >> 683  * __copy_user sets len to 0 for success; else to an upper bound of
                                                   >> 684  * the number of uncopied bytes.
                                                   >> 685  * memcpy sets v0 to dst.
                                                   >> 686  */
                                                   >> 687         .align  5
                                                   >> 688 LEAF(memcpy)                                    /* a0=dst a1=src a2=len */
                                                   >> 689         move    v0, dst                         /* return value */
                                                   >> 690 .L__memcpy:
                                                   >> 691 FEXPORT(__copy_user)
                                                   >> 692         li      t6, 0   /* not inatomic */
                                                   >> 693 __copy_user_common:
                                                   >> 694         /* Legacy Mode, user <-> user */
                                                   >> 695         __BUILD_COPY_USER LEGACY_MODE USEROP USEROP
                                                   >> 696 
                                                   >> 697 #ifdef CONFIG_EVA
                                                   >> 698 
                                                   >> 699 /*
                                                   >> 700  * For EVA we need distinct symbols for reading and writing to user space.
                                                   >> 701  * This is because we need to use specific EVA instructions to perform the
                                                   >> 702  * virtual <-> physical translation when a virtual address is actually in user
                                                   >> 703  * space
                                                   >> 704  */
438                                                   705 
439         be              1f                     !! 706 LEAF(__copy_user_inatomic_eva)
440          nop                                   !! 707         b       __copy_from_user_common
                                                   >> 708         li      t6, 1
                                                   >> 709         END(__copy_user_inatomic_eva)
441                                                   710 
442         ldub            [%o1], %g2             !! 711 /*
443         stb             %g2, [%o0]             !! 712  * __copy_from_user (EVA)
444 1:                                             !! 713  */
445         retl                                   << 
446          mov            %g7, %o0               << 
447                                                   714 
448 90:     /* short_aligned_end */                !! 715 LEAF(__copy_from_user_eva)
449         bne             88b                    !! 716         li      t6, 0   /* not inatomic */
450          andcc          %o2, 8, %g0            !! 717 __copy_from_user_common:
451                                                !! 718         __BUILD_COPY_USER EVA_MODE USEROP KERNELOP
452         be              1f                     !! 719 END(__copy_from_user_eva)
453          andcc          %o2, 4, %g0            !! 720 
454                                                !! 721 
455         ld              [%o1 + 0x00], %g2      !! 722 
456         ld              [%o1 + 0x04], %g3      !! 723 /*
457         add             %o1, 8, %o1            !! 724  * __copy_to_user (EVA)
458         st              %g2, [%o0 + 0x00]      !! 725  */
459         st              %g3, [%o0 + 0x04]      !! 726 
460         add             %o0, 8, %o0            !! 727 LEAF(__copy_to_user_eva)
461 1:                                             !! 728 __BUILD_COPY_USER EVA_MODE KERNELOP USEROP
462         b               81b                    !! 729 END(__copy_to_user_eva)
463          mov            %o2, %g1               !! 730 
                                                   >> 731 /*
                                                   >> 732  * __copy_in_user (EVA)
                                                   >> 733  */
                                                   >> 734 
                                                   >> 735 LEAF(__copy_in_user_eva)
                                                   >> 736 __BUILD_COPY_USER EVA_MODE USEROP USEROP
                                                   >> 737 END(__copy_in_user_eva)
                                                   >> 738 
                                                   >> 739 #endif
~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.
TOMOYO Linux Cross Reference Linux/arch/sparc/lib/memcpy.S

Diff markup

Differences between /arch/sparc/lib/memcpy.S (Version linux-6.12-rc7) and /arch/mips/lib/memcpy.S (Version linux-4.10.17)

TOMOYO Linux Cross Reference
Linux/arch/sparc/lib/memcpy.S