~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/lib/memcpy.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/arm64/lib/memcpy.S (Version linux-6.12-rc7) and /arch/sparc/lib/memcpy.S (Version linux-3.10.108)


  1 /* SPDX-License-Identifier: GPL-2.0-only */    !!   1 /* memcpy.S: Sparc optimized memcpy and memmove code
  2 /*                                             !!   2  * Hand optimized from GNU libc's memcpy and memmove
  3  * Copyright (c) 2012-2021, Arm Limited.       !!   3  * Copyright (C) 1991,1996 Free Software Foundation
  4  *                                             !!   4  * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi)
  5  * Adapted from the original at:               !!   5  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
  6  * https://github.com/ARM-software/optimized-r !!   6  * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
                                                   >>   7  * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  7  */                                                 8  */
  8                                                     9 
  9 #include <linux/linkage.h>                     !!  10 #define FUNC(x)                 \
 10 #include <asm/assembler.h>                     !!  11         .globl  x;              \
 11                                                !!  12         .type   x,@function;    \
 12 /* Assumptions:                                !!  13         .align  4;              \
 13  *                                             !!  14 x:
 14  * ARMv8-a, AArch64, unaligned accesses.       !!  15 
 15  *                                             !!  16 /* Both these macros have to start with exactly the same insn */
                                                   >>  17 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
                                                   >>  18         ldd     [%src + (offset) + 0x00], %t0; \
                                                   >>  19         ldd     [%src + (offset) + 0x08], %t2; \
                                                   >>  20         ldd     [%src + (offset) + 0x10], %t4; \
                                                   >>  21         ldd     [%src + (offset) + 0x18], %t6; \
                                                   >>  22         st      %t0, [%dst + (offset) + 0x00]; \
                                                   >>  23         st      %t1, [%dst + (offset) + 0x04]; \
                                                   >>  24         st      %t2, [%dst + (offset) + 0x08]; \
                                                   >>  25         st      %t3, [%dst + (offset) + 0x0c]; \
                                                   >>  26         st      %t4, [%dst + (offset) + 0x10]; \
                                                   >>  27         st      %t5, [%dst + (offset) + 0x14]; \
                                                   >>  28         st      %t6, [%dst + (offset) + 0x18]; \
                                                   >>  29         st      %t7, [%dst + (offset) + 0x1c];
                                                   >>  30 
                                                   >>  31 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
                                                   >>  32         ldd     [%src + (offset) + 0x00], %t0; \
                                                   >>  33         ldd     [%src + (offset) + 0x08], %t2; \
                                                   >>  34         ldd     [%src + (offset) + 0x10], %t4; \
                                                   >>  35         ldd     [%src + (offset) + 0x18], %t6; \
                                                   >>  36         std     %t0, [%dst + (offset) + 0x00]; \
                                                   >>  37         std     %t2, [%dst + (offset) + 0x08]; \
                                                   >>  38         std     %t4, [%dst + (offset) + 0x10]; \
                                                   >>  39         std     %t6, [%dst + (offset) + 0x18];
                                                   >>  40 
                                                   >>  41 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
                                                   >>  42         ldd     [%src - (offset) - 0x10], %t0; \
                                                   >>  43         ldd     [%src - (offset) - 0x08], %t2; \
                                                   >>  44         st      %t0, [%dst - (offset) - 0x10]; \
                                                   >>  45         st      %t1, [%dst - (offset) - 0x0c]; \
                                                   >>  46         st      %t2, [%dst - (offset) - 0x08]; \
                                                   >>  47         st      %t3, [%dst - (offset) - 0x04];
                                                   >>  48 
                                                   >>  49 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
                                                   >>  50         ldd     [%src - (offset) - 0x10], %t0; \
                                                   >>  51         ldd     [%src - (offset) - 0x08], %t2; \
                                                   >>  52         std     %t0, [%dst - (offset) - 0x10]; \
                                                   >>  53         std     %t2, [%dst - (offset) - 0x08];
                                                   >>  54 
                                                   >>  55 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
                                                   >>  56         ldub    [%src - (offset) - 0x02], %t0; \
                                                   >>  57         ldub    [%src - (offset) - 0x01], %t1; \
                                                   >>  58         stb     %t0, [%dst - (offset) - 0x02]; \
                                                   >>  59         stb     %t1, [%dst - (offset) - 0x01];
                                                   >>  60 
                                                   >>  61 /* Both these macros have to start with exactly the same insn */
                                                   >>  62 #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
                                                   >>  63         ldd     [%src - (offset) - 0x20], %t0; \
                                                   >>  64         ldd     [%src - (offset) - 0x18], %t2; \
                                                   >>  65         ldd     [%src - (offset) - 0x10], %t4; \
                                                   >>  66         ldd     [%src - (offset) - 0x08], %t6; \
                                                   >>  67         st      %t0, [%dst - (offset) - 0x20]; \
                                                   >>  68         st      %t1, [%dst - (offset) - 0x1c]; \
                                                   >>  69         st      %t2, [%dst - (offset) - 0x18]; \
                                                   >>  70         st      %t3, [%dst - (offset) - 0x14]; \
                                                   >>  71         st      %t4, [%dst - (offset) - 0x10]; \
                                                   >>  72         st      %t5, [%dst - (offset) - 0x0c]; \
                                                   >>  73         st      %t6, [%dst - (offset) - 0x08]; \
                                                   >>  74         st      %t7, [%dst - (offset) - 0x04];
                                                   >>  75 
                                                   >>  76 #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
                                                   >>  77         ldd     [%src - (offset) - 0x20], %t0; \
                                                   >>  78         ldd     [%src - (offset) - 0x18], %t2; \
                                                   >>  79         ldd     [%src - (offset) - 0x10], %t4; \
                                                   >>  80         ldd     [%src - (offset) - 0x08], %t6; \
                                                   >>  81         std     %t0, [%dst - (offset) - 0x20]; \
                                                   >>  82         std     %t2, [%dst - (offset) - 0x18]; \
                                                   >>  83         std     %t4, [%dst - (offset) - 0x10]; \
                                                   >>  84         std     %t6, [%dst - (offset) - 0x08];
                                                   >>  85 
                                                   >>  86 #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
                                                   >>  87         ldd     [%src + (offset) + 0x00], %t0; \
                                                   >>  88         ldd     [%src + (offset) + 0x08], %t2; \
                                                   >>  89         st      %t0, [%dst + (offset) + 0x00]; \
                                                   >>  90         st      %t1, [%dst + (offset) + 0x04]; \
                                                   >>  91         st      %t2, [%dst + (offset) + 0x08]; \
                                                   >>  92         st      %t3, [%dst + (offset) + 0x0c];
                                                   >>  93 
                                                   >>  94 #define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
                                                   >>  95         ldub    [%src + (offset) + 0x00], %t0; \
                                                   >>  96         ldub    [%src + (offset) + 0x01], %t1; \
                                                   >>  97         stb     %t0, [%dst + (offset) + 0x00]; \
                                                   >>  98         stb     %t1, [%dst + (offset) + 0x01];
                                                   >>  99 
                                                   >> 100 #define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \
                                                   >> 101         ldd     [%src + (offset) + 0x00], %t0; \
                                                   >> 102         ldd     [%src + (offset) + 0x08], %t2; \
                                                   >> 103         srl     %t0, shir, %t5; \
                                                   >> 104         srl     %t1, shir, %t6; \
                                                   >> 105         sll     %t0, shil, %t0; \
                                                   >> 106         or      %t5, %prev, %t5; \
                                                   >> 107         sll     %t1, shil, %prev; \
                                                   >> 108         or      %t6, %t0, %t0; \
                                                   >> 109         srl     %t2, shir, %t1; \
                                                   >> 110         srl     %t3, shir, %t6; \
                                                   >> 111         sll     %t2, shil, %t2; \
                                                   >> 112         or      %t1, %prev, %t1; \
                                                   >> 113         std     %t4, [%dst + (offset) + (offset2) - 0x04]; \
                                                   >> 114         std     %t0, [%dst + (offset) + (offset2) + 0x04]; \
                                                   >> 115         sll     %t3, shil, %prev; \
                                                   >> 116         or      %t6, %t2, %t4;
                                                   >> 117 
                                                   >> 118 #define SMOVE_ALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \
                                                   >> 119         ldd     [%src + (offset) + 0x00], %t0; \
                                                   >> 120         ldd     [%src + (offset) + 0x08], %t2; \
                                                   >> 121         srl     %t0, shir, %t4; \
                                                   >> 122         srl     %t1, shir, %t5; \
                                                   >> 123         sll     %t0, shil, %t6; \
                                                   >> 124         or      %t4, %prev, %t0; \
                                                   >> 125         sll     %t1, shil, %prev; \
                                                   >> 126         or      %t5, %t6, %t1; \
                                                   >> 127         srl     %t2, shir, %t4; \
                                                   >> 128         srl     %t3, shir, %t5; \
                                                   >> 129         sll     %t2, shil, %t6; \
                                                   >> 130         or      %t4, %prev, %t2; \
                                                   >> 131         sll     %t3, shil, %prev; \
                                                   >> 132         or      %t5, %t6, %t3; \
                                                   >> 133         std     %t0, [%dst + (offset) + (offset2) + 0x00]; \
                                                   >> 134         std     %t2, [%dst + (offset) + (offset2) + 0x08];
                                                   >> 135 
                                                   >> 136         .text
                                                   >> 137         .align  4
                                                   >> 138 
                                                   >> 139 0:
                                                   >> 140         retl
                                                   >> 141          nop            ! Only bcopy returns here and it retuns void...
                                                   >> 142 
                                                   >> 143 #ifdef __KERNEL__
                                                   >> 144 FUNC(amemmove)
                                                   >> 145 FUNC(__memmove)
                                                   >> 146 #endif
                                                   >> 147 FUNC(memmove)
                                                   >> 148         cmp             %o0, %o1
                                                   >> 149         mov             %o0, %g7
                                                   >> 150         bleu            9f
                                                   >> 151          sub            %o0, %o1, %o4
                                                   >> 152 
                                                   >> 153         add             %o1, %o2, %o3
                                                   >> 154         cmp             %o3, %o0
                                                   >> 155         bleu            0f
                                                   >> 156          andcc          %o4, 3, %o5
                                                   >> 157 
                                                   >> 158         add             %o1, %o2, %o1
                                                   >> 159         add             %o0, %o2, %o0
                                                   >> 160         sub             %o1, 1, %o1
                                                   >> 161         sub             %o0, 1, %o0
                                                   >> 162         
                                                   >> 163 1:      /* reverse_bytes */
                                                   >> 164 
                                                   >> 165         ldub            [%o1], %o4
                                                   >> 166         subcc           %o2, 1, %o2
                                                   >> 167         stb             %o4, [%o0]
                                                   >> 168         sub             %o1, 1, %o1
                                                   >> 169         bne             1b
                                                   >> 170          sub            %o0, 1, %o0
                                                   >> 171 
                                                   >> 172         retl
                                                   >> 173          mov            %g7, %o0
                                                   >> 174 
                                                   >> 175 /* NOTE: This code is executed just for the cases,
                                                   >> 176          where %src (=%o1) & 3 is != 0.
                                                   >> 177          We need to align it to 4. So, for (%src & 3)
                                                   >> 178          1 we need to do ldub,lduh
                                                   >> 179          2 lduh
                                                   >> 180          3 just ldub
                                                   >> 181          so even if it looks weird, the branches
                                                   >> 182          are correct here. -jj
 16  */                                               183  */
                                                   >> 184 78:     /* dword_align */
 17                                                   185 
 18 #define L(label) .L ## label                   !! 186         andcc           %o1, 1, %g0
 19                                                !! 187         be              4f
 20 #define dstin   x0                             !! 188          andcc          %o1, 2, %g0
 21 #define src     x1                             !! 189 
 22 #define count   x2                             !! 190         ldub            [%o1], %g2
 23 #define dst     x3                             !! 191         add             %o1, 1, %o1
 24 #define srcend  x4                             !! 192         stb             %g2, [%o0]
 25 #define dstend  x5                             !! 193         sub             %o2, 1, %o2
 26 #define A_l     x6                             !! 194         bne             3f
 27 #define A_lw    w6                             !! 195          add            %o0, 1, %o0
 28 #define A_h     x7                             !! 196 4:
 29 #define B_l     x8                             !! 197         lduh            [%o1], %g2
 30 #define B_lw    w8                             !! 198         add             %o1, 2, %o1
 31 #define B_h     x9                             !! 199         sth             %g2, [%o0]
 32 #define C_l     x10                            !! 200         sub             %o2, 2, %o2
 33 #define C_lw    w10                            !! 201         b               3f
 34 #define C_h     x11                            !! 202          add            %o0, 2, %o0
 35 #define D_l     x12                            !! 203 
 36 #define D_h     x13                            !! 204 FUNC(memcpy)    /* %o0=dst %o1=src %o2=len */
 37 #define E_l     x14                            !! 205 
 38 #define E_h     x15                            !! 206         sub             %o0, %o1, %o4
 39 #define F_l     x16                            !! 207         mov             %o0, %g7
 40 #define F_h     x17                            !! 208 9:
 41 #define G_l     count                          !! 209         andcc           %o4, 3, %o5
 42 #define G_h     dst                            !! 210 0:
 43 #define H_l     src                            !! 211         bne             86f
 44 #define H_h     srcend                         !! 212          cmp            %o2, 15
 45 #define tmp1    x14                            !! 213 
 46                                                !! 214         bleu            90f
 47 /* This implementation handles overlaps and su !! 215          andcc          %o1, 3, %g0
 48    from a single entry point.  It uses unalign !! 216 
 49    sequences to keep the code small, simple an !! 217         bne             78b
 50                                                !! 218 3:
 51    Copies are split into 3 main cases: small c !! 219          andcc          %o1, 4, %g0
 52    copies of up to 128 bytes, and large copies !! 220 
 53    check is negligible since it is only requir !! 221         be              2f
 54                                                !! 222          mov            %o2, %g1
 55    Large copies use a software pipelined loop  !! 223 
 56    The destination pointer is 16-byte aligned  !! 224         ld              [%o1], %o4
 57    The loop tail is handled by always copying  !! 225         sub             %g1, 4, %g1
 58 */                                             !! 226         st              %o4, [%o0]
 59                                                !! 227         add             %o1, 4, %o1
 60 SYM_FUNC_START(__pi_memcpy)                    !! 228         add             %o0, 4, %o0
 61         add     srcend, src, count             !! 229 2:
 62         add     dstend, dstin, count           !! 230         andcc           %g1, 0xffffff80, %g0
 63         cmp     count, 128                     !! 231         be              3f
 64         b.hi    L(copy_long)                   !! 232          andcc          %o0, 4, %g0
 65         cmp     count, 32                      !! 233 
 66         b.hi    L(copy32_128)                  !! 234         be              82f + 4
 67                                                !! 235 5:
 68         /* Small copies: 0..32 bytes.  */      !! 236         MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
 69         cmp     count, 16                      !! 237         MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
 70         b.lo    L(copy16)                      !! 238         MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
 71         ldp     A_l, A_h, [src]                !! 239         MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
 72         ldp     D_l, D_h, [srcend, -16]        !! 240         sub             %g1, 128, %g1
 73         stp     A_l, A_h, [dstin]              !! 241         add             %o1, 128, %o1
 74         stp     D_l, D_h, [dstend, -16]        !! 242         cmp             %g1, 128
 75         ret                                    !! 243         bge             5b
 76                                                !! 244          add            %o0, 128, %o0
 77         /* Copy 8-15 bytes.  */                !! 245 3:
 78 L(copy16):                                     !! 246         andcc           %g1, 0x70, %g4
 79         tbz     count, 3, L(copy8)             !! 247         be              80f
 80         ldr     A_l, [src]                     !! 248          andcc          %g1, 8, %g0
 81         ldr     A_h, [srcend, -8]              !! 249 
 82         str     A_l, [dstin]                   !! 250         sethi           %hi(80f), %o5
 83         str     A_h, [dstend, -8]              !! 251         srl             %g4, 1, %o4
 84         ret                                    !! 252         add             %g4, %o4, %o4
 85                                                !! 253         add             %o1, %g4, %o1
 86         .p2align 3                             !! 254         sub             %o5, %o4, %o5
 87         /* Copy 4-7 bytes.  */                 !! 255         jmpl            %o5 + %lo(80f), %g0
 88 L(copy8):                                      !! 256          add            %o0, %g4, %o0
 89         tbz     count, 2, L(copy4)             !! 257 
 90         ldr     A_lw, [src]                    !! 258 79:     /* memcpy_table */
 91         ldr     B_lw, [srcend, -4]             !! 259 
 92         str     A_lw, [dstin]                  !! 260         MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
 93         str     B_lw, [dstend, -4]             !! 261         MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
 94         ret                                    !! 262         MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
 95                                                !! 263         MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
 96         /* Copy 0..3 bytes using a branchless  !! 264         MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
 97 L(copy4):                                      !! 265         MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
 98         cbz     count, L(copy0)                !! 266         MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
 99         lsr     tmp1, count, 1                 !! 267 
100         ldrb    A_lw, [src]                    !! 268 80:     /* memcpy_table_end */
101         ldrb    C_lw, [srcend, -1]             !! 269         be              81f
102         ldrb    B_lw, [src, tmp1]              !! 270          andcc          %g1, 4, %g0
103         strb    A_lw, [dstin]                  !! 271 
104         strb    B_lw, [dstin, tmp1]            !! 272         ldd             [%o1], %g2
105         strb    C_lw, [dstend, -1]             !! 273         add             %o0, 8, %o0
106 L(copy0):                                      !! 274         st              %g2, [%o0 - 0x08]
                                                   >> 275         add             %o1, 8, %o1
                                                   >> 276         st              %g3, [%o0 - 0x04]
                                                   >> 277 
                                                   >> 278 81:     /* memcpy_last7 */
                                                   >> 279 
                                                   >> 280         be              1f
                                                   >> 281          andcc          %g1, 2, %g0
                                                   >> 282 
                                                   >> 283         ld              [%o1], %g2
                                                   >> 284         add             %o1, 4, %o1
                                                   >> 285         st              %g2, [%o0]
                                                   >> 286         add             %o0, 4, %o0
                                                   >> 287 1:
                                                   >> 288         be              1f
                                                   >> 289          andcc          %g1, 1, %g0
                                                   >> 290 
                                                   >> 291         lduh            [%o1], %g2
                                                   >> 292         add             %o1, 2, %o1
                                                   >> 293         sth             %g2, [%o0]
                                                   >> 294         add             %o0, 2, %o0
                                                   >> 295 1:
                                                   >> 296         be              1f
                                                   >> 297          nop
                                                   >> 298 
                                                   >> 299         ldub            [%o1], %g2
                                                   >> 300         stb             %g2, [%o0]
                                                   >> 301 1:
                                                   >> 302         retl
                                                   >> 303          mov            %g7, %o0
                                                   >> 304 
                                                   >> 305 82:     /* ldd_std */
                                                   >> 306         MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
                                                   >> 307         MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
                                                   >> 308         MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
                                                   >> 309         MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
                                                   >> 310         subcc           %g1, 128, %g1
                                                   >> 311         add             %o1, 128, %o1
                                                   >> 312         cmp             %g1, 128
                                                   >> 313         bge             82b
                                                   >> 314          add            %o0, 128, %o0
                                                   >> 315 
                                                   >> 316         andcc           %g1, 0x70, %g4
                                                   >> 317         be              84f
                                                   >> 318          andcc          %g1, 8, %g0
                                                   >> 319 
                                                   >> 320         sethi           %hi(84f), %o5
                                                   >> 321         add             %o1, %g4, %o1
                                                   >> 322         sub             %o5, %g4, %o5
                                                   >> 323         jmpl            %o5 + %lo(84f), %g0
                                                   >> 324          add            %o0, %g4, %o0
                                                   >> 325 
                                                   >> 326 83:     /* amemcpy_table */
                                                   >> 327 
                                                   >> 328         MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
                                                   >> 329         MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
                                                   >> 330         MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
                                                   >> 331         MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
                                                   >> 332         MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
                                                   >> 333         MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
                                                   >> 334         MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
                                                   >> 335 
                                                   >> 336 84:     /* amemcpy_table_end */
                                                   >> 337         be              85f
                                                   >> 338          andcc          %g1, 4, %g0
                                                   >> 339 
                                                   >> 340         ldd             [%o1], %g2
                                                   >> 341         add             %o0, 8, %o0
                                                   >> 342         std             %g2, [%o0 - 0x08]
                                                   >> 343         add             %o1, 8, %o1
                                                   >> 344 85:     /* amemcpy_last7 */
                                                   >> 345         be              1f
                                                   >> 346          andcc          %g1, 2, %g0
                                                   >> 347 
                                                   >> 348         ld              [%o1], %g2
                                                   >> 349         add             %o1, 4, %o1
                                                   >> 350         st              %g2, [%o0]
                                                   >> 351         add             %o0, 4, %o0
                                                   >> 352 1:
                                                   >> 353         be              1f
                                                   >> 354          andcc          %g1, 1, %g0
                                                   >> 355 
                                                   >> 356         lduh            [%o1], %g2
                                                   >> 357         add             %o1, 2, %o1
                                                   >> 358         sth             %g2, [%o0]
                                                   >> 359         add             %o0, 2, %o0
                                                   >> 360 1:
                                                   >> 361         be              1f
                                                   >> 362          nop
                                                   >> 363 
                                                   >> 364         ldub            [%o1], %g2
                                                   >> 365         stb             %g2, [%o0]
                                                   >> 366 1:
                                                   >> 367         retl
                                                   >> 368          mov            %g7, %o0
                                                   >> 369 
                                                   >> 370 86:     /* non_aligned */
                                                   >> 371         cmp             %o2, 6
                                                   >> 372         bleu            88f
                                                   >> 373          nop
                                                   >> 374 
                                                   >> 375         save            %sp, -96, %sp
                                                   >> 376         andcc           %i0, 3, %g0
                                                   >> 377         be              61f
                                                   >> 378          andcc          %i0, 1, %g0
                                                   >> 379         be              60f
                                                   >> 380          andcc          %i0, 2, %g0
                                                   >> 381 
                                                   >> 382         ldub            [%i1], %g5
                                                   >> 383         add             %i1, 1, %i1
                                                   >> 384         stb             %g5, [%i0]
                                                   >> 385         sub             %i2, 1, %i2
                                                   >> 386         bne             61f
                                                   >> 387          add            %i0, 1, %i0
                                                   >> 388 60:
                                                   >> 389         ldub            [%i1], %g3
                                                   >> 390         add             %i1, 2, %i1
                                                   >> 391         stb             %g3, [%i0]
                                                   >> 392         sub             %i2, 2, %i2
                                                   >> 393         ldub            [%i1 - 1], %g3
                                                   >> 394         add             %i0, 2, %i0
                                                   >> 395         stb             %g3, [%i0 - 1]
                                                   >> 396 61:
                                                   >> 397         and             %i1, 3, %g2
                                                   >> 398         and             %i2, 0xc, %g3
                                                   >> 399         and             %i1, -4, %i1
                                                   >> 400         cmp             %g3, 4
                                                   >> 401         sll             %g2, 3, %g4
                                                   >> 402         mov             32, %g2
                                                   >> 403         be              4f
                                                   >> 404          sub            %g2, %g4, %l0
                                                   >> 405         
                                                   >> 406         blu             3f
                                                   >> 407          cmp            %g3, 0x8
                                                   >> 408 
                                                   >> 409         be              2f
                                                   >> 410          srl            %i2, 2, %g3
                                                   >> 411 
                                                   >> 412         ld              [%i1], %i3
                                                   >> 413         add             %i0, -8, %i0
                                                   >> 414         ld              [%i1 + 4], %i4
                                                   >> 415         b               8f
                                                   >> 416          add            %g3, 1, %g3
                                                   >> 417 2:
                                                   >> 418         ld              [%i1], %i4
                                                   >> 419         add             %i0, -12, %i0
                                                   >> 420         ld              [%i1 + 4], %i5
                                                   >> 421         add             %g3, 2, %g3
                                                   >> 422         b               9f
                                                   >> 423          add            %i1, -4, %i1
                                                   >> 424 3:
                                                   >> 425         ld              [%i1], %g1
                                                   >> 426         add             %i0, -4, %i0
                                                   >> 427         ld              [%i1 + 4], %i3
                                                   >> 428         srl             %i2, 2, %g3
                                                   >> 429         b               7f
                                                   >> 430          add            %i1, 4, %i1
                                                   >> 431 4:
                                                   >> 432         ld              [%i1], %i5
                                                   >> 433         cmp             %i2, 7
                                                   >> 434         ld              [%i1 + 4], %g1
                                                   >> 435         srl             %i2, 2, %g3
                                                   >> 436         bleu            10f
                                                   >> 437          add            %i1, 8, %i1
                                                   >> 438 
                                                   >> 439         ld              [%i1], %i3
                                                   >> 440         add             %g3, -1, %g3
                                                   >> 441 5:
                                                   >> 442         sll             %i5, %g4, %g2
                                                   >> 443         srl             %g1, %l0, %g5
                                                   >> 444         or              %g2, %g5, %g2
                                                   >> 445         st              %g2, [%i0]
                                                   >> 446 7:
                                                   >> 447         ld              [%i1 + 4], %i4
                                                   >> 448         sll             %g1, %g4, %g2
                                                   >> 449         srl             %i3, %l0, %g5
                                                   >> 450         or              %g2, %g5, %g2
                                                   >> 451         st              %g2, [%i0 + 4]
                                                   >> 452 8:
                                                   >> 453         ld              [%i1 + 8], %i5
                                                   >> 454         sll             %i3, %g4, %g2
                                                   >> 455         srl             %i4, %l0, %g5
                                                   >> 456         or              %g2, %g5, %g2
                                                   >> 457         st              %g2, [%i0 + 8]
                                                   >> 458 9:
                                                   >> 459         ld              [%i1 + 12], %g1
                                                   >> 460         sll             %i4, %g4, %g2
                                                   >> 461         srl             %i5, %l0, %g5
                                                   >> 462         addcc           %g3, -4, %g3
                                                   >> 463         or              %g2, %g5, %g2
                                                   >> 464         add             %i1, 16, %i1
                                                   >> 465         st              %g2, [%i0 + 12]
                                                   >> 466         add             %i0, 16, %i0
                                                   >> 467         bne,a           5b
                                                   >> 468          ld             [%i1], %i3
                                                   >> 469 10:
                                                   >> 470         sll             %i5, %g4, %g2
                                                   >> 471         srl             %g1, %l0, %g5
                                                   >> 472         srl             %l0, 3, %g3
                                                   >> 473         or              %g2, %g5, %g2
                                                   >> 474         sub             %i1, %g3, %i1
                                                   >> 475         andcc           %i2, 2, %g0
                                                   >> 476         st              %g2, [%i0]
                                                   >> 477         be              1f
                                                   >> 478          andcc          %i2, 1, %g0
                                                   >> 479 
                                                   >> 480         ldub            [%i1], %g2
                                                   >> 481         add             %i1, 2, %i1
                                                   >> 482         stb             %g2, [%i0 + 4]
                                                   >> 483         add             %i0, 2, %i0
                                                   >> 484         ldub            [%i1 - 1], %g2
                                                   >> 485         stb             %g2, [%i0 + 3]
                                                   >> 486 1:
                                                   >> 487         be              1f
                                                   >> 488          nop
                                                   >> 489         ldub            [%i1], %g2
                                                   >> 490         stb             %g2, [%i0 + 4]
                                                   >> 491 1:
107         ret                                       492         ret
                                                   >> 493          restore        %g7, %g0, %o0
108                                                   494 
109         .p2align 4                             !! 495 88:     /* short_end */
110         /* Medium copies: 33..128 bytes.  */   << 
111 L(copy32_128):                                 << 
112         ldp     A_l, A_h, [src]                << 
113         ldp     B_l, B_h, [src, 16]            << 
114         ldp     C_l, C_h, [srcend, -32]        << 
115         ldp     D_l, D_h, [srcend, -16]        << 
116         cmp     count, 64                      << 
117         b.hi    L(copy128)                     << 
118         stp     A_l, A_h, [dstin]              << 
119         stp     B_l, B_h, [dstin, 16]          << 
120         stp     C_l, C_h, [dstend, -32]        << 
121         stp     D_l, D_h, [dstend, -16]        << 
122         ret                                    << 
123                                                << 
124         .p2align 4                             << 
125         /* Copy 65..128 bytes.  */             << 
126 L(copy128):                                    << 
127         ldp     E_l, E_h, [src, 32]            << 
128         ldp     F_l, F_h, [src, 48]            << 
129         cmp     count, 96                      << 
130         b.ls    L(copy96)                      << 
131         ldp     G_l, G_h, [srcend, -64]        << 
132         ldp     H_l, H_h, [srcend, -48]        << 
133         stp     G_l, G_h, [dstend, -64]        << 
134         stp     H_l, H_h, [dstend, -48]        << 
135 L(copy96):                                     << 
136         stp     A_l, A_h, [dstin]              << 
137         stp     B_l, B_h, [dstin, 16]          << 
138         stp     E_l, E_h, [dstin, 32]          << 
139         stp     F_l, F_h, [dstin, 48]          << 
140         stp     C_l, C_h, [dstend, -32]        << 
141         stp     D_l, D_h, [dstend, -16]        << 
142         ret                                    << 
143                                                << 
144         .p2align 4                             << 
145         /* Copy more than 128 bytes.  */       << 
146 L(copy_long):                                  << 
147         /* Use backwards copy if there is an o << 
148         sub     tmp1, dstin, src               << 
149         cbz     tmp1, L(copy0)                 << 
150         cmp     tmp1, count                    << 
151         b.lo    L(copy_long_backwards)         << 
152                                                << 
153         /* Copy 16 bytes and then align dst to << 
154                                                << 
155         ldp     D_l, D_h, [src]                << 
156         and     tmp1, dstin, 15                << 
157         bic     dst, dstin, 15                 << 
158         sub     src, src, tmp1                 << 
159         add     count, count, tmp1      /* Cou << 
160         ldp     A_l, A_h, [src, 16]            << 
161         stp     D_l, D_h, [dstin]              << 
162         ldp     B_l, B_h, [src, 32]            << 
163         ldp     C_l, C_h, [src, 48]            << 
164         ldp     D_l, D_h, [src, 64]!           << 
165         subs    count, count, 128 + 16  /* Tes << 
166         b.ls    L(copy64_from_end)             << 
167                                                << 
168 L(loop64):                                     << 
169         stp     A_l, A_h, [dst, 16]            << 
170         ldp     A_l, A_h, [src, 16]            << 
171         stp     B_l, B_h, [dst, 32]            << 
172         ldp     B_l, B_h, [src, 32]            << 
173         stp     C_l, C_h, [dst, 48]            << 
174         ldp     C_l, C_h, [src, 48]            << 
175         stp     D_l, D_h, [dst, 64]!           << 
176         ldp     D_l, D_h, [src, 64]!           << 
177         subs    count, count, 64               << 
178         b.hi    L(loop64)                      << 
179                                                << 
180         /* Write the last iteration and copy 6 << 
181 L(copy64_from_end):                            << 
182         ldp     E_l, E_h, [srcend, -64]        << 
183         stp     A_l, A_h, [dst, 16]            << 
184         ldp     A_l, A_h, [srcend, -48]        << 
185         stp     B_l, B_h, [dst, 32]            << 
186         ldp     B_l, B_h, [srcend, -32]        << 
187         stp     C_l, C_h, [dst, 48]            << 
188         ldp     C_l, C_h, [srcend, -16]        << 
189         stp     D_l, D_h, [dst, 64]            << 
190         stp     E_l, E_h, [dstend, -64]        << 
191         stp     A_l, A_h, [dstend, -48]        << 
192         stp     B_l, B_h, [dstend, -32]        << 
193         stp     C_l, C_h, [dstend, -16]        << 
194         ret                                    << 
195                                                << 
196         .p2align 4                             << 
197                                                << 
198         /* Large backwards copy for overlappin << 
199            Copy 16 bytes and then align dst to << 
200 L(copy_long_backwards):                        << 
201         ldp     D_l, D_h, [srcend, -16]        << 
202         and     tmp1, dstend, 15               << 
203         sub     srcend, srcend, tmp1           << 
204         sub     count, count, tmp1             << 
205         ldp     A_l, A_h, [srcend, -16]        << 
206         stp     D_l, D_h, [dstend, -16]        << 
207         ldp     B_l, B_h, [srcend, -32]        << 
208         ldp     C_l, C_h, [srcend, -48]        << 
209         ldp     D_l, D_h, [srcend, -64]!       << 
210         sub     dstend, dstend, tmp1           << 
211         subs    count, count, 128              << 
212         b.ls    L(copy64_from_start)           << 
213                                                << 
214 L(loop64_backwards):                           << 
215         stp     A_l, A_h, [dstend, -16]        << 
216         ldp     A_l, A_h, [srcend, -16]        << 
217         stp     B_l, B_h, [dstend, -32]        << 
218         ldp     B_l, B_h, [srcend, -32]        << 
219         stp     C_l, C_h, [dstend, -48]        << 
220         ldp     C_l, C_h, [srcend, -48]        << 
221         stp     D_l, D_h, [dstend, -64]!       << 
222         ldp     D_l, D_h, [srcend, -64]!       << 
223         subs    count, count, 64               << 
224         b.hi    L(loop64_backwards)            << 
225                                                << 
226         /* Write the last iteration and copy 6 << 
227 L(copy64_from_start):                          << 
228         ldp     G_l, G_h, [src, 48]            << 
229         stp     A_l, A_h, [dstend, -16]        << 
230         ldp     A_l, A_h, [src, 32]            << 
231         stp     B_l, B_h, [dstend, -32]        << 
232         ldp     B_l, B_h, [src, 16]            << 
233         stp     C_l, C_h, [dstend, -48]        << 
234         ldp     C_l, C_h, [src]                << 
235         stp     D_l, D_h, [dstend, -64]        << 
236         stp     G_l, G_h, [dstin, 48]          << 
237         stp     A_l, A_h, [dstin, 32]          << 
238         stp     B_l, B_h, [dstin, 16]          << 
239         stp     C_l, C_h, [dstin]              << 
240         ret                                    << 
241 SYM_FUNC_END(__pi_memcpy)                      << 
242                                                   496 
243 SYM_FUNC_ALIAS(__memcpy, __pi_memcpy)          !! 497         and             %o2, 0xe, %o3
244 EXPORT_SYMBOL(__memcpy)                        !! 498 20:
245 SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)          !! 499         sethi           %hi(89f), %o5
246 EXPORT_SYMBOL(memcpy)                          !! 500         sll             %o3, 3, %o4
247                                                !! 501         add             %o0, %o3, %o0
248 SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy)      !! 502         sub             %o5, %o4, %o5
249                                                !! 503         add             %o1, %o3, %o1
250 SYM_FUNC_ALIAS(__memmove, __pi_memmove)        !! 504         jmpl            %o5 + %lo(89f), %g0
251 EXPORT_SYMBOL(__memmove)                       !! 505          andcc          %o2, 1, %g0
252 SYM_FUNC_ALIAS_WEAK(memmove, __memmove)        !! 506 
253 EXPORT_SYMBOL(memmove)                         !! 507         MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
                                                   >> 508         MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
                                                   >> 509         MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
                                                   >> 510         MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
                                                   >> 511         MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
                                                   >> 512         MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
                                                   >> 513         MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
                                                   >> 514 
                                                   >> 515 89:     /* short_table_end */
                                                   >> 516 
                                                   >> 517         be              1f
                                                   >> 518          nop
                                                   >> 519 
                                                   >> 520         ldub            [%o1], %g2
                                                   >> 521         stb             %g2, [%o0]
                                                   >> 522 1:
                                                   >> 523         retl
                                                   >> 524          mov            %g7, %o0
                                                   >> 525 
                                                   >> 526 90:     /* short_aligned_end */
                                                   >> 527         bne             88b
                                                   >> 528          andcc          %o2, 8, %g0
                                                   >> 529 
                                                   >> 530         be              1f
                                                   >> 531          andcc          %o2, 4, %g0
                                                   >> 532 
                                                   >> 533         ld              [%o1 + 0x00], %g2
                                                   >> 534         ld              [%o1 + 0x04], %g3
                                                   >> 535         add             %o1, 8, %o1
                                                   >> 536         st              %g2, [%o0 + 0x00]
                                                   >> 537         st              %g3, [%o0 + 0x04]
                                                   >> 538         add             %o0, 8, %o0
                                                   >> 539 1:
                                                   >> 540         b               81b
                                                   >> 541          mov            %o2, %g1
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php