~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/riscv/lib/memcpy.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/riscv/lib/memcpy.S (Version linux-6.12-rc7) and /arch/mips/lib/memcpy.S (Version linux-2.4.37.11)


  1 /* SPDX-License-Identifier: GPL-2.0-only */    << 
  2 /*                                                  1 /*
  3  * Copyright (C) 2013 Regents of the Universit !!   2  * This file is subject to the terms and conditions of the GNU General Public
                                                   >>   3  * License.  See the file "COPYING" in the main directory of this archive
                                                   >>   4  * for more details.
                                                   >>   5  *
                                                   >>   6  * Unified implementation of memcpy, memmove and the __copy_user backend.
                                                   >>   7  *
                                                   >>   8  * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org)
                                                   >>   9  * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
                                                   >>  10  * Copyright (C) 2002 Broadcom, Inc.
                                                   >>  11  *   memcpy/copy_user author: Mark Vandevoorde
                                                   >>  12  *
                                                   >>  13  * Mnemonic names for arguments to memcpy/__copy_user
  4  */                                                14  */
  5                                                !!  15 #include <linux/config.h>
  6 #include <linux/linkage.h>                     << 
  7 #include <asm/asm.h>                               16 #include <asm/asm.h>
                                                   >>  17 #include <asm/offset.h>
                                                   >>  18 #include <asm/regdef.h>
                                                   >>  19 
                                                   >>  20 #define dst a0
                                                   >>  21 #define src a1
                                                   >>  22 #define len a2
                                                   >>  23 
                                                   >>  24 /*
                                                   >>  25  * Spec
                                                   >>  26  *
                                                   >>  27  * memcpy copies len bytes from src to dst and sets v0 to dst.
                                                   >>  28  * It assumes that
                                                   >>  29  *   - src and dst don't overlap
                                                   >>  30  *   - src is readable
                                                   >>  31  *   - dst is writable
                                                   >>  32  * memcpy uses the standard calling convention
                                                   >>  33  *
                                                   >>  34  * __copy_user copies up to len bytes from src to dst and sets a2 (len) to
                                                   >>  35  * the number of uncopied bytes due to an exception caused by a read or write.
                                                   >>  36  * __copy_user assumes that src and dst don't overlap, and that the call is
                                                   >>  37  * implementing one of the following:
                                                   >>  38  *   copy_to_user
                                                   >>  39  *     - src is readable  (no exceptions when reading src)
                                                   >>  40  *   copy_from_user
                                                   >>  41  *     - dst is writable  (no exceptions when writing dst)
                                                   >>  42  * __copy_user uses a non-standard calling convention; see
                                                   >>  43  * include/asm-mips/uaccess.h
                                                   >>  44  *
                                                   >>  45  * When an exception happens on a load, the handler must
                                                   >>  46  # ensure that all of the destination buffer is overwritten to prevent
                                                   >>  47  * leaking information to user mode programs.
                                                   >>  48  */
                                                   >>  49 
                                                   >>  50 /*
                                                   >>  51  * Implementation
                                                   >>  52  */
                                                   >>  53 
                                                   >>  54 /*
                                                   >>  55  * The exception handler for loads requires that:
                                                   >>  56  *  1- AT contain the address of the byte just past the end of the source
                                                   >>  57  *     of the copy,
                                                   >>  58  *  2- src_entry <= src < AT, and
                                                   >>  59  *  3- (dst - src) == (dst_entry - src_entry),
                                                   >>  60  * The _entry suffix denotes values when __copy_user was called.
                                                   >>  61  *
                                                   >>  62  * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user
                                                   >>  63  * (2) is met by incrementing src by the number of bytes copied
                                                   >>  64  * (3) is met by not doing loads between a pair of increments of dst and src
                                                   >>  65  *
                                                   >>  66  * The exception handlers for stores adjust len (if necessary) and return.
                                                   >>  67  * These handlers do not need to overwrite any data.
                                                   >>  68  *
                                                   >>  69  * For __rmemcpy and memmove an exception is always a kernel bug, therefore
                                                   >>  70  * they're not protected.
                                                   >>  71  */
                                                   >>  72 
                                                   >>  73 #define EXC(inst_reg,addr,handler)              \
                                                   >>  74 9:      inst_reg, addr;                         \
                                                   >>  75         .section __ex_table,"a";                \
                                                   >>  76         PTR     9b, handler;                    \
                                                   >>  77         .previous
                                                   >>  78 
                                                   >>  79 /*
                                                   >>  80  * Only on the 64-bit kernel we can made use of 64-bit registers.
                                                   >>  81  */
                                                   >>  82 #ifdef CONFIG_MIPS64
                                                   >>  83 #define USE_DOUBLE
                                                   >>  84 #endif
                                                   >>  85 
                                                   >>  86 #ifdef USE_DOUBLE
                                                   >>  87 
                                                   >>  88 #define LOAD   ld
                                                   >>  89 #define LOADL  ldl
                                                   >>  90 #define LOADR  ldr
                                                   >>  91 #define STOREL sdl
                                                   >>  92 #define STORER sdr
                                                   >>  93 #define STORE  sd
                                                   >>  94 #define ADD    daddu
                                                   >>  95 #define SUB    dsubu
                                                   >>  96 #define SRL    dsrl
                                                   >>  97 #define SRA    dsra
                                                   >>  98 #define SLL    dsll
                                                   >>  99 #define SLLV   dsllv
                                                   >> 100 #define SRLV   dsrlv
                                                   >> 101 #define NBYTES 8
                                                   >> 102 #define LOG_NBYTES 3
                                                   >> 103 
                                                   >> 104 /* 
                                                   >> 105  * As we are sharing code base with the mips32 tree (which use the o32 ABI
                                                   >> 106  * register definitions). We need to redefine the register definitions from
                                                   >> 107  * the n64 ABI register naming to the o32 ABI register naming.
                                                   >> 108  */
                                                   >> 109 #undef t0
                                                   >> 110 #undef t1
                                                   >> 111 #undef t2
                                                   >> 112 #undef t3
                                                   >> 113 #define t0      $8
                                                   >> 114 #define t1      $9
                                                   >> 115 #define t2      $10
                                                   >> 116 #define t3      $11
                                                   >> 117 #define t4      $12
                                                   >> 118 #define t5      $13
                                                   >> 119 #define t6      $14
                                                   >> 120 #define t7      $15
                                                   >> 121         
                                                   >> 122 #else
  8                                                   123 
  9 /* void *memcpy(void *, const void *, size_t)  !! 124 #define LOAD   lw
 10 SYM_FUNC_START(__memcpy)                       !! 125 #define LOADL  lwl
 11         move t6, a0  /* Preserve return value  !! 126 #define LOADR  lwr
 12                                                !! 127 #define STOREL swl
 13         /* Defer to byte-oriented copy for sma !! 128 #define STORER swr
 14         sltiu a3, a2, 128                      !! 129 #define STORE  sw
 15         bnez a3, 4f                            !! 130 #define ADD    addu
 16         /* Use word-oriented copy only if low- !! 131 #define SUB    subu
 17         andi a3, t6, SZREG-1                   !! 132 #define SRL    srl
 18         andi a4, a1, SZREG-1                   !! 133 #define SLL    sll
 19         bne a3, a4, 4f                         !! 134 #define SRA    sra
 20                                                !! 135 #define SLLV   sllv
 21         beqz a3, 2f  /* Skip if already aligne !! 136 #define SRLV   srlv
 22         /*                                     !! 137 #define NBYTES 4
 23          * Round to nearest double word-aligne !! 138 #define LOG_NBYTES 2
 24          * greater than or equal to start addr !! 139 
 25          */                                    !! 140 #endif /* USE_DOUBLE */
 26         andi a3, a1, ~(SZREG-1)                !! 141 
 27         addi a3, a3, SZREG                     !! 142 #ifdef CONFIG_CPU_LITTLE_ENDIAN
 28         /* Handle initial misalignment */      !! 143 #define LDFIRST LOADR
 29         sub a4, a3, a1                         !! 144 #define LDREST  LOADL
                                                   >> 145 #define STFIRST STORER
                                                   >> 146 #define STREST  STOREL
                                                   >> 147 #define SHIFT_DISCARD SLLV
                                                   >> 148 #else
                                                   >> 149 #define LDFIRST LOADL
                                                   >> 150 #define LDREST  LOADR
                                                   >> 151 #define STFIRST STOREL
                                                   >> 152 #define STREST  STORER
                                                   >> 153 #define SHIFT_DISCARD SRLV
                                                   >> 154 #endif
                                                   >> 155 
                                                   >> 156 #define FIRST(unit) ((unit)*NBYTES)
                                                   >> 157 #define REST(unit)  (FIRST(unit)+NBYTES-1)
                                                   >> 158 #define UNIT(unit)  FIRST(unit)
                                                   >> 159 
                                                   >> 160 #define ADDRMASK (NBYTES-1)
                                                   >> 161 
                                                   >> 162         .text
                                                   >> 163         .set    noreorder
                                                   >> 164         .set    noat
                                                   >> 165 
                                                   >> 166 /*
                                                   >> 167  * A combined memcpy/__copy_user
                                                   >> 168  * __copy_user sets len to 0 for success; else to an upper bound of
                                                   >> 169  * the number of uncopied bytes.
                                                   >> 170  * memcpy sets v0 to dst.
                                                   >> 171  */
                                                   >> 172         .align  5
                                                   >> 173 LEAF(memcpy)                                    /* a0=dst a1=src a2=len */
                                                   >> 174         move    v0, dst                         /* return value */
                                                   >> 175 __memcpy:
                                                   >> 176 FEXPORT(__copy_user)
                                                   >> 177         /*
                                                   >> 178          * Note: dst & src may be unaligned, len may be 0
                                                   >> 179          * Temps
                                                   >> 180          */
                                                   >> 181 #define rem t8
                                                   >> 182 
                                                   >> 183         /*
                                                   >> 184          * The "issue break"s below are very approximate.
                                                   >> 185          * Issue delays for dcache fills will perturb the schedule, as will
                                                   >> 186          * load queue full replay traps, etc.
                                                   >> 187          *
                                                   >> 188          * If len < NBYTES use byte operations.
                                                   >> 189          */
                                                   >> 190         PREF(   0, 0(src) )
                                                   >> 191         PREF(   1, 0(dst) )
                                                   >> 192         sltu    t2, len, NBYTES
                                                   >> 193         and     t1, dst, ADDRMASK
                                                   >> 194         PREF(   0, 1*32(src) )
                                                   >> 195         PREF(   1, 1*32(dst) )
                                                   >> 196         bnez    t2, copy_bytes_checklen
                                                   >> 197          and    t0, src, ADDRMASK
                                                   >> 198         PREF(   0, 2*32(src) )
                                                   >> 199         PREF(   1, 2*32(dst) )
                                                   >> 200         bnez    t1, dst_unaligned
                                                   >> 201          nop
                                                   >> 202         bnez    t0, src_unaligned_dst_aligned
                                                   >> 203         /*
                                                   >> 204          * use delay slot for fall-through
                                                   >> 205          * src and dst are aligned; need to compute rem
                                                   >> 206          */
                                                   >> 207 both_aligned:
                                                   >> 208          SRL    t0, len, LOG_NBYTES+3    # +3 for 8 units/iter
                                                   >> 209         beqz    t0, cleanup_both_aligned # len < 8*NBYTES
                                                   >> 210          and    rem, len, (8*NBYTES-1)   # rem = len % (8*NBYTES)
                                                   >> 211         PREF(   0, 3*32(src) )
                                                   >> 212         PREF(   1, 3*32(dst) )
                                                   >> 213         .align  4
 30 1:                                                214 1:
 31         lb a5, 0(a1)                           !! 215 EXC(    LOAD    t0, UNIT(0)(src),       l_exc)
 32         addi a1, a1, 1                         !! 216 EXC(    LOAD    t1, UNIT(1)(src),       l_exc_copy)
 33         sb a5, 0(t6)                           !! 217 EXC(    LOAD    t2, UNIT(2)(src),       l_exc_copy)
 34         addi t6, t6, 1                         !! 218 EXC(    LOAD    t3, UNIT(3)(src),       l_exc_copy)
 35         bltu a1, a3, 1b                        !! 219         SUB     len, len, 8*NBYTES
 36         sub a2, a2, a4  /* Update count */     !! 220 EXC(    LOAD    t4, UNIT(4)(src),       l_exc_copy)
 37                                                !! 221 EXC(    LOAD    t7, UNIT(5)(src),       l_exc_copy)
 38 2:                                             !! 222 EXC(    STORE   t0, UNIT(0)(dst),       s_exc_p8u)
 39         andi a4, a2, ~((16*SZREG)-1)           !! 223 EXC(    STORE   t1, UNIT(1)(dst),       s_exc_p7u)
 40         beqz a4, 4f                            !! 224 EXC(    LOAD    t0, UNIT(6)(src),       l_exc_copy)
 41         add a3, a1, a4                         !! 225 EXC(    LOAD    t1, UNIT(7)(src),       l_exc_copy)
 42 3:                                             !! 226         ADD     src, src, 8*NBYTES
 43         REG_L a4,       0(a1)                  !! 227         ADD     dst, dst, 8*NBYTES
 44         REG_L a5,   SZREG(a1)                  !! 228 EXC(    STORE   t2, UNIT(-6)(dst),      s_exc_p6u)
 45         REG_L a6, 2*SZREG(a1)                  !! 229 EXC(    STORE   t3, UNIT(-5)(dst),      s_exc_p5u)
 46         REG_L a7, 3*SZREG(a1)                  !! 230 EXC(    STORE   t4, UNIT(-4)(dst),      s_exc_p4u)
 47         REG_L t0, 4*SZREG(a1)                  !! 231 EXC(    STORE   t7, UNIT(-3)(dst),      s_exc_p3u)
 48         REG_L t1, 5*SZREG(a1)                  !! 232 EXC(    STORE   t0, UNIT(-2)(dst),      s_exc_p2u)
 49         REG_L t2, 6*SZREG(a1)                  !! 233 EXC(    STORE   t1, UNIT(-1)(dst),      s_exc_p1u)
 50         REG_L t3, 7*SZREG(a1)                  !! 234         PREF(   0, 8*32(src) )
 51         REG_L t4, 8*SZREG(a1)                  !! 235         PREF(   1, 8*32(dst) )
 52         REG_L t5, 9*SZREG(a1)                  !! 236         bne     len, rem, 1b
 53         REG_S a4,       0(t6)                  !! 237          nop
 54         REG_S a5,   SZREG(t6)                  !! 238 
 55         REG_S a6, 2*SZREG(t6)                  !! 239         /*
 56         REG_S a7, 3*SZREG(t6)                  !! 240          * len == rem == the number of bytes left to copy < 8*NBYTES
 57         REG_S t0, 4*SZREG(t6)                  !! 241          */
 58         REG_S t1, 5*SZREG(t6)                  !! 242 cleanup_both_aligned:
 59         REG_S t2, 6*SZREG(t6)                  !! 243         beqz    len, done
 60         REG_S t3, 7*SZREG(t6)                  !! 244          sltu   t0, len, 4*NBYTES
 61         REG_S t4, 8*SZREG(t6)                  !! 245         bnez    t0, less_than_4units
 62         REG_S t5, 9*SZREG(t6)                  !! 246          and    rem, len, (NBYTES-1)    # rem = len % NBYTES
 63         REG_L a4, 10*SZREG(a1)                 !! 247         /*
 64         REG_L a5, 11*SZREG(a1)                 !! 248          * len >= 4*NBYTES
 65         REG_L a6, 12*SZREG(a1)                 !! 249          */
 66         REG_L a7, 13*SZREG(a1)                 !! 250 EXC(    LOAD    t0, UNIT(0)(src),       l_exc)
 67         REG_L t0, 14*SZREG(a1)                 !! 251 EXC(    LOAD    t1, UNIT(1)(src),       l_exc_copy)
 68         REG_L t1, 15*SZREG(a1)                 !! 252 EXC(    LOAD    t2, UNIT(2)(src),       l_exc_copy)
 69         addi a1, a1, 16*SZREG                  !! 253 EXC(    LOAD    t3, UNIT(3)(src),       l_exc_copy)
 70         REG_S a4, 10*SZREG(t6)                 !! 254         SUB     len, len, 4*NBYTES
 71         REG_S a5, 11*SZREG(t6)                 !! 255         ADD     src, src, 4*NBYTES
 72         REG_S a6, 12*SZREG(t6)                 !! 256 EXC(    STORE   t0, UNIT(0)(dst),       s_exc_p4u)
 73         REG_S a7, 13*SZREG(t6)                 !! 257 EXC(    STORE   t1, UNIT(1)(dst),       s_exc_p3u)
 74         REG_S t0, 14*SZREG(t6)                 !! 258 EXC(    STORE   t2, UNIT(2)(dst),       s_exc_p2u)
 75         REG_S t1, 15*SZREG(t6)                 !! 259 EXC(    STORE   t3, UNIT(3)(dst),       s_exc_p1u)
 76         addi t6, t6, 16*SZREG                  !! 260         beqz    len, done
 77         bltu a1, a3, 3b                        !! 261          ADD    dst, dst, 4*NBYTES
 78         andi a2, a2, (16*SZREG)-1  /* Update c !! 262 less_than_4units:
 79                                                !! 263         /*
 80 4:                                             !! 264          * rem = len % NBYTES
 81         /* Handle trailing misalignment */     !! 265          */
 82         beqz a2, 6f                            !! 266         beq     rem, len, copy_bytes
 83         add a3, a1, a2                         !! 267          nop
 84                                                !! 268 1:
 85         /* Use word-oriented copy if co-aligne !! 269 EXC(    LOAD    t0, 0(src),             l_exc)
 86         or a5, a1, t6                          !! 270         ADD     src, src, NBYTES
 87         or a5, a5, a3                          !! 271         SUB     len, len, NBYTES
 88         andi a5, a5, 3                         !! 272 EXC(    STORE   t0, 0(dst),             s_exc_p1u)
 89         bnez a5, 5f                            !! 273         bne     rem, len, 1b
 90 7:                                             !! 274          ADD    dst, dst, NBYTES
 91         lw a4, 0(a1)                           !! 275 
 92         addi a1, a1, 4                         !! 276         /*
 93         sw a4, 0(t6)                           !! 277          * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
 94         addi t6, t6, 4                         !! 278          * A loop would do only a byte at a time with possible branch
 95         bltu a1, a3, 7b                        !! 279          * mispredicts.  Can't do an explicit LOAD dst,mask,or,STORE
 96                                                !! 280          * because can't assume read-access to dst.  Instead, use
 97         ret                                    !! 281          * STREST dst, which doesn't require read access to dst.
 98                                                !! 282          *
 99 5:                                             !! 283          * This code should perform better than a simple loop on modern,
100         lb a4, 0(a1)                           !! 284          * wide-issue mips processors because the code has fewer branches and
101         addi a1, a1, 1                         !! 285          * more instruction-level parallelism.
102         sb a4, 0(t6)                           !! 286          */
103         addi t6, t6, 1                         !! 287 #define bits t2
104         bltu a1, a3, 5b                        !! 288         beqz    len, done
105 6:                                             !! 289          ADD    t1, dst, len    # t1 is just past last byte of dst
106         ret                                    !! 290         li      bits, 8*NBYTES
107 SYM_FUNC_END(__memcpy)                         !! 291         SLL     rem, len, 3     # rem = number of bits to keep
108 SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)          !! 292 EXC(    LOAD    t0, 0(src),             l_exc)
109 SYM_FUNC_ALIAS(__pi_memcpy, __memcpy)          !! 293         SUB     bits, bits, rem # bits = number of bits to discard
110 SYM_FUNC_ALIAS(__pi___memcpy, __memcpy)        !! 294         SHIFT_DISCARD t0, t0, bits
                                                   >> 295 EXC(    STREST  t0, -1(t1),             s_exc)
                                                   >> 296         jr      ra
                                                   >> 297          move   len, zero
                                                   >> 298 dst_unaligned:
                                                   >> 299         /*
                                                   >> 300          * dst is unaligned
                                                   >> 301          * t0 = src & ADDRMASK
                                                   >> 302          * t1 = dst & ADDRMASK; T1 > 0
                                                   >> 303          * len >= NBYTES
                                                   >> 304          *
                                                   >> 305          * Copy enough bytes to align dst
                                                   >> 306          * Set match = (src and dst have same alignment)
                                                   >> 307          */
                                                   >> 308 #define match rem
                                                   >> 309 EXC(    LDFIRST t3, FIRST(0)(src),      l_exc)
                                                   >> 310         ADD     t2, zero, NBYTES
                                                   >> 311 EXC(    LDREST  t3, REST(0)(src),       l_exc_copy)
                                                   >> 312         SUB     t2, t2, t1      # t2 = number of bytes copied
                                                   >> 313         xor     match, t0, t1
                                                   >> 314 EXC(    STFIRST t3, FIRST(0)(dst),      s_exc)
                                                   >> 315         beq     len, t2, done
                                                   >> 316          SUB    len, len, t2
                                                   >> 317         ADD     dst, dst, t2
                                                   >> 318         beqz    match, both_aligned
                                                   >> 319          ADD    src, src, t2
                                                   >> 320 
                                                   >> 321 src_unaligned_dst_aligned:
                                                   >> 322         SRL     t0, len, LOG_NBYTES+2    # +2 for 4 units/iter
                                                   >> 323         PREF(   0, 3*32(src) )
                                                   >> 324         beqz    t0, cleanup_src_unaligned
                                                   >> 325          and    rem, len, (4*NBYTES-1)   # rem = len % 4*NBYTES
                                                   >> 326         PREF(   1, 3*32(dst) )
                                                   >> 327 1:
                                                   >> 328 /*
                                                   >> 329  * Avoid consecutive LD*'s to the same register since some mips
                                                   >> 330  * implementations can't issue them in the same cycle.
                                                   >> 331  * It's OK to load FIRST(N+1) before REST(N) because the two addresses
                                                   >> 332  * are to the same unit (unless src is aligned, but it's not).
                                                   >> 333  */
                                                   >> 334 EXC(    LDFIRST t0, FIRST(0)(src),      l_exc)
                                                   >> 335 EXC(    LDFIRST t1, FIRST(1)(src),      l_exc_copy)
                                                   >> 336         SUB     len, len, 4*NBYTES
                                                   >> 337 EXC(    LDREST  t0, REST(0)(src),       l_exc_copy)
                                                   >> 338 EXC(    LDREST  t1, REST(1)(src),       l_exc_copy)
                                                   >> 339 EXC(    LDFIRST t2, FIRST(2)(src),      l_exc_copy)
                                                   >> 340 EXC(    LDFIRST t3, FIRST(3)(src),      l_exc_copy)
                                                   >> 341 EXC(    LDREST  t2, REST(2)(src),       l_exc_copy)
                                                   >> 342 EXC(    LDREST  t3, REST(3)(src),       l_exc_copy)
                                                   >> 343         PREF(   0, 9*32(src) )          # 0 is PREF_LOAD  (not streamed)
                                                   >> 344         ADD     src, src, 4*NBYTES
                                                   >> 345 #ifdef CONFIG_CPU_SB1
                                                   >> 346         nop                             # improves slotting
                                                   >> 347 #endif
                                                   >> 348 EXC(    STORE   t0, UNIT(0)(dst),       s_exc_p4u)
                                                   >> 349 EXC(    STORE   t1, UNIT(1)(dst),       s_exc_p3u)
                                                   >> 350 EXC(    STORE   t2, UNIT(2)(dst),       s_exc_p2u)
                                                   >> 351 EXC(    STORE   t3, UNIT(3)(dst),       s_exc_p1u)
                                                   >> 352         PREF(   1, 9*32(dst) )          # 1 is PREF_STORE (not streamed)
                                                   >> 353         bne     len, rem, 1b
                                                   >> 354          ADD    dst, dst, 4*NBYTES
                                                   >> 355 
                                                   >> 356 cleanup_src_unaligned:
                                                   >> 357         beqz    len, done
                                                   >> 358          and    rem, len, NBYTES-1  # rem = len % NBYTES
                                                   >> 359         beq     rem, len, copy_bytes
                                                   >> 360          nop
                                                   >> 361 1:
                                                   >> 362 EXC(    LDFIRST t0, FIRST(0)(src),      l_exc)
                                                   >> 363 EXC(    LDREST  t0, REST(0)(src),       l_exc_copy)
                                                   >> 364         ADD     src, src, NBYTES
                                                   >> 365         SUB     len, len, NBYTES
                                                   >> 366 EXC(    STORE   t0, 0(dst),             s_exc_p1u)
                                                   >> 367         bne     len, rem, 1b
                                                   >> 368          ADD    dst, dst, NBYTES
                                                   >> 369 
                                                   >> 370 copy_bytes_checklen:
                                                   >> 371         beqz    len, done
                                                   >> 372          nop
                                                   >> 373 copy_bytes:
                                                   >> 374         /* 0 < len < NBYTES  */
                                                   >> 375 #define COPY_BYTE(N)                    \
                                                   >> 376 EXC(    lb      t0, N(src), l_exc);     \
                                                   >> 377         SUB     len, len, 1;            \
                                                   >> 378         beqz    len, done;              \
                                                   >> 379 EXC(     sb     t0, N(dst), s_exc_p1)
                                                   >> 380 
                                                   >> 381         COPY_BYTE(0)
                                                   >> 382         COPY_BYTE(1)
                                                   >> 383 #ifdef USE_DOUBLE
                                                   >> 384         COPY_BYTE(2)
                                                   >> 385         COPY_BYTE(3)
                                                   >> 386         COPY_BYTE(4)
                                                   >> 387         COPY_BYTE(5)
                                                   >> 388 #endif
                                                   >> 389 EXC(    lb      t0, NBYTES-2(src), l_exc)
                                                   >> 390         SUB     len, len, 1
                                                   >> 391         jr      ra
                                                   >> 392 EXC(     sb     t0, NBYTES-2(dst), s_exc_p1)
                                                   >> 393 done:
                                                   >> 394         jr      ra
                                                   >> 395          nop
                                                   >> 396         END(memcpy)
                                                   >> 397 
                                                   >> 398 l_exc_copy:
                                                   >> 399         /*
                                                   >> 400          * Copy bytes from src until faulting load address (or until a
                                                   >> 401          * lb faults)
                                                   >> 402          *
                                                   >> 403          * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
                                                   >> 404          * may be more than a byte beyond the last address.
                                                   >> 405          * Hence, the lb below may get an exception.
                                                   >> 406          *
                                                   >> 407          * Assumes src < THREAD_BUADDR($28)
                                                   >> 408          */
                                                   >> 409         LOAD    t0, THREAD_BUADDR($28)
                                                   >> 410 1:
                                                   >> 411 EXC(    lb      t1, 0(src),     l_exc)
                                                   >> 412         ADD     src, src, 1
                                                   >> 413         sb      t1, 0(dst)      # can't fault -- we're copy_from_user
                                                   >> 414         bne     src, t0, 1b
                                                   >> 415          ADD    dst, dst, 1
                                                   >> 416 l_exc:
                                                   >> 417         LOAD    t0, THREAD_BUADDR($28)  # t0 is just past last good address
                                                   >> 418          nop
                                                   >> 419         SUB     len, AT, t0             # len number of uncopied bytes
                                                   >> 420         /*
                                                   >> 421          * Here's where we rely on src and dst being incremented in tandem,
                                                   >> 422          *   See (3) above.
                                                   >> 423          * dst += (fault addr - src) to put dst at first byte to clear
                                                   >> 424          */
                                                   >> 425         ADD     dst, t0                 # compute start address in a1
                                                   >> 426         SUB     dst, src
                                                   >> 427         /*
                                                   >> 428          * Clear len bytes starting at dst.  Can't call __bzero because it
                                                   >> 429          * might modify len.  An inefficient loop for these rare times...
                                                   >> 430          */
                                                   >> 431         beqz    len, done
                                                   >> 432          SUB    src, len, 1
                                                   >> 433 1:      sb      zero, 0(dst)
                                                   >> 434         ADD     dst, dst, 1
                                                   >> 435         bnez    src, 1b
                                                   >> 436          SUB    src, src, 1
                                                   >> 437         jr      ra
                                                   >> 438          nop
                                                   >> 439 
                                                   >> 440 
                                                   >> 441 #define SEXC(n)                         \
                                                   >> 442 s_exc_p ## n ## u:                      \
                                                   >> 443         jr      ra;                     \
                                                   >> 444          ADD    len, len, n*NBYTES
                                                   >> 445 
                                                   >> 446 SEXC(8)
                                                   >> 447 SEXC(7)
                                                   >> 448 SEXC(6)
                                                   >> 449 SEXC(5)
                                                   >> 450 SEXC(4)
                                                   >> 451 SEXC(3)
                                                   >> 452 SEXC(2)
                                                   >> 453 SEXC(1)
                                                   >> 454 
                                                   >> 455 s_exc_p1:
                                                   >> 456         jr      ra
                                                   >> 457          ADD    len, len, 1
                                                   >> 458 s_exc:
                                                   >> 459         jr      ra
                                                   >> 460          nop
                                                   >> 461 
                                                   >> 462         .align  5
                                                   >> 463 LEAF(memmove)
                                                   >> 464         ADD     t0, a0, a2
                                                   >> 465         ADD     t1, a1, a2
                                                   >> 466         sltu    t0, a1, t0                      # dst + len <= src -> memcpy
                                                   >> 467         sltu    t1, a0, t1                      # dst >= src + len -> memcpy
                                                   >> 468         and     t0, t1
                                                   >> 469         beqz    t0, __memcpy
                                                   >> 470          move   v0, a0                          /* return value */
                                                   >> 471         beqz    a2, r_out
                                                   >> 472         END(memmove)
                                                   >> 473 
                                                   >> 474         /* fall through to __rmemcpy */
                                                   >> 475 LEAF(__rmemcpy)                                 /* a0=dst a1=src a2=len */
                                                   >> 476          sltu   t0, a1, a0
                                                   >> 477         beqz    t0, r_end_bytes_up              # src >= dst
                                                   >> 478          nop
                                                   >> 479         ADD     a0, a2                          # dst = dst + len
                                                   >> 480         ADD     a1, a2                          # src = src + len
                                                   >> 481 
                                                   >> 482 r_end_bytes:
                                                   >> 483         lb      t0, -1(a1)
                                                   >> 484         SUB     a2, a2, 0x1
                                                   >> 485         sb      t0, -1(a0)
                                                   >> 486         SUB     a1, a1, 0x1
                                                   >> 487         bnez    a2, r_end_bytes
                                                   >> 488          SUB    a0, a0, 0x1
                                                   >> 489 
                                                   >> 490 r_out:
                                                   >> 491         jr      ra
                                                   >> 492          move   a2, zero
                                                   >> 493 
                                                   >> 494 r_end_bytes_up:
                                                   >> 495         lb      t0, (a1)
                                                   >> 496         SUB     a2, a2, 0x1
                                                   >> 497         sb      t0, (a0)
                                                   >> 498         ADD     a1, a1, 0x1
                                                   >> 499         bnez    a2, r_end_bytes_up
                                                   >> 500          ADD    a0, a0, 0x1
                                                   >> 501 
                                                   >> 502         jr      ra
                                                   >> 503          move   a2, zero
                                                   >> 504         END(__rmemcpy)
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php