~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/loongarch/lib/memcpy.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/loongarch/lib/memcpy.S (Architecture i386) and /arch/sparc/lib/memcpy.S (Architecture sparc)


  1 /* SPDX-License-Identifier: GPL-2.0 */              1 /* SPDX-License-Identifier: GPL-2.0 */
  2 /*                                             !!   2 /* memcpy.S: Sparc optimized memcpy and memmove code
  3  * Copyright (C) 2020-2022 Loongson Technology !!   3  * Hand optimized from GNU libc's memcpy and memmove
                                                   >>   4  * Copyright (C) 1991,1996 Free Software Foundation
                                                   >>   5  * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi)
                                                   >>   6  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
                                                   >>   7  * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
                                                   >>   8  * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  4  */                                                 9  */
  5                                                    10 
  6 #include <linux/export.h>                          11 #include <linux/export.h>
  7 #include <asm/alternative-asm.h>               << 
  8 #include <asm/asm.h>                           << 
  9 #include <asm/asmmacro.h>                      << 
 10 #include <asm/cpu.h>                           << 
 11 #include <asm/regdef.h>                        << 
 12 #include <asm/unwind_hints.h>                  << 
 13                                                << 
 14 .section .noinstr.text, "ax"                   << 
 15                                                << 
 16 SYM_FUNC_START(memcpy)                         << 
 17         /*                                     << 
 18          * Some CPUs support hardware unaligne << 
 19          */                                    << 
 20         ALTERNATIVE     "b __memcpy_generic",  << 
 21                         "b __memcpy_fast", CPU << 
 22 SYM_FUNC_END(memcpy)                           << 
 23 SYM_FUNC_ALIAS(__memcpy, memcpy)               << 
 24                                                    12 
 25 EXPORT_SYMBOL(memcpy)                          !!  13 #define FUNC(x)                 \
 26 EXPORT_SYMBOL(__memcpy)                        !!  14         .globl  x;              \
                                                   >>  15         .type   x,@function;    \
                                                   >>  16         .align  4;              \
                                                   >>  17 x:
                                                   >>  18 
                                                   >>  19 /* Both these macros have to start with exactly the same insn */
                                                   >>  20 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
                                                   >>  21         ldd     [%src + (offset) + 0x00], %t0; \
                                                   >>  22         ldd     [%src + (offset) + 0x08], %t2; \
                                                   >>  23         ldd     [%src + (offset) + 0x10], %t4; \
                                                   >>  24         ldd     [%src + (offset) + 0x18], %t6; \
                                                   >>  25         st      %t0, [%dst + (offset) + 0x00]; \
                                                   >>  26         st      %t1, [%dst + (offset) + 0x04]; \
                                                   >>  27         st      %t2, [%dst + (offset) + 0x08]; \
                                                   >>  28         st      %t3, [%dst + (offset) + 0x0c]; \
                                                   >>  29         st      %t4, [%dst + (offset) + 0x10]; \
                                                   >>  30         st      %t5, [%dst + (offset) + 0x14]; \
                                                   >>  31         st      %t6, [%dst + (offset) + 0x18]; \
                                                   >>  32         st      %t7, [%dst + (offset) + 0x1c];
                                                   >>  33 
                                                   >>  34 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
                                                   >>  35         ldd     [%src + (offset) + 0x00], %t0; \
                                                   >>  36         ldd     [%src + (offset) + 0x08], %t2; \
                                                   >>  37         ldd     [%src + (offset) + 0x10], %t4; \
                                                   >>  38         ldd     [%src + (offset) + 0x18], %t6; \
                                                   >>  39         std     %t0, [%dst + (offset) + 0x00]; \
                                                   >>  40         std     %t2, [%dst + (offset) + 0x08]; \
                                                   >>  41         std     %t4, [%dst + (offset) + 0x10]; \
                                                   >>  42         std     %t6, [%dst + (offset) + 0x18];
                                                   >>  43 
                                                   >>  44 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
                                                   >>  45         ldd     [%src - (offset) - 0x10], %t0; \
                                                   >>  46         ldd     [%src - (offset) - 0x08], %t2; \
                                                   >>  47         st      %t0, [%dst - (offset) - 0x10]; \
                                                   >>  48         st      %t1, [%dst - (offset) - 0x0c]; \
                                                   >>  49         st      %t2, [%dst - (offset) - 0x08]; \
                                                   >>  50         st      %t3, [%dst - (offset) - 0x04];
                                                   >>  51 
                                                   >>  52 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
                                                   >>  53         ldd     [%src - (offset) - 0x10], %t0; \
                                                   >>  54         ldd     [%src - (offset) - 0x08], %t2; \
                                                   >>  55         std     %t0, [%dst - (offset) - 0x10]; \
                                                   >>  56         std     %t2, [%dst - (offset) - 0x08];
                                                   >>  57 
                                                   >>  58 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
                                                   >>  59         ldub    [%src - (offset) - 0x02], %t0; \
                                                   >>  60         ldub    [%src - (offset) - 0x01], %t1; \
                                                   >>  61         stb     %t0, [%dst - (offset) - 0x02]; \
                                                   >>  62         stb     %t1, [%dst - (offset) - 0x01];
                                                   >>  63 
                                                   >>  64         .text
                                                   >>  65         .align  4
                                                   >>  66 
                                                   >>  67 FUNC(memmove)
                                                   >>  68 EXPORT_SYMBOL(memmove)
                                                   >>  69         cmp             %o0, %o1
                                                   >>  70         mov             %o0, %g7
                                                   >>  71         bleu            9f
                                                   >>  72          sub            %o0, %o1, %o4
                                                   >>  73 
                                                   >>  74         add             %o1, %o2, %o3
                                                   >>  75         cmp             %o3, %o0
                                                   >>  76         bleu            0f
                                                   >>  77          andcc          %o4, 3, %o5
                                                   >>  78 
                                                   >>  79         add             %o1, %o2, %o1
                                                   >>  80         add             %o0, %o2, %o0
                                                   >>  81         sub             %o1, 1, %o1
                                                   >>  82         sub             %o0, 1, %o0
                                                   >>  83         
                                                   >>  84 1:      /* reverse_bytes */
                                                   >>  85 
                                                   >>  86         ldub            [%o1], %o4
                                                   >>  87         subcc           %o2, 1, %o2
                                                   >>  88         stb             %o4, [%o0]
                                                   >>  89         sub             %o1, 1, %o1
                                                   >>  90         bne             1b
                                                   >>  91          sub            %o0, 1, %o0
                                                   >>  92 
                                                   >>  93         retl
                                                   >>  94          mov            %g7, %o0
                                                   >>  95 
                                                   >>  96 /* NOTE: This code is executed just for the cases,
                                                   >>  97          where %src (=%o1) & 3 is != 0.
                                                   >>  98          We need to align it to 4. So, for (%src & 3)
                                                   >>  99          1 we need to do ldub,lduh
                                                   >> 100          2 lduh
                                                   >> 101          3 just ldub
                                                   >> 102          so even if it looks weird, the branches
                                                   >> 103          are correct here. -jj
                                                   >> 104  */
                                                   >> 105 78:     /* dword_align */
 27                                                   106 
 28 _ASM_NOKPROBE(memcpy)                          !! 107         andcc           %o1, 1, %g0
 29 _ASM_NOKPROBE(__memcpy)                        !! 108         be              4f
                                                   >> 109          andcc          %o1, 2, %g0
                                                   >> 110 
                                                   >> 111         ldub            [%o1], %g2
                                                   >> 112         add             %o1, 1, %o1
                                                   >> 113         stb             %g2, [%o0]
                                                   >> 114         sub             %o2, 1, %o2
                                                   >> 115         bne             3f
                                                   >> 116          add            %o0, 1, %o0
                                                   >> 117 4:
                                                   >> 118         lduh            [%o1], %g2
                                                   >> 119         add             %o1, 2, %o1
                                                   >> 120         sth             %g2, [%o0]
                                                   >> 121         sub             %o2, 2, %o2
                                                   >> 122         b               3f
                                                   >> 123          add            %o0, 2, %o0
 30                                                   124 
 31 /*                                             !! 125 FUNC(memcpy)    /* %o0=dst %o1=src %o2=len */
 32  * void *__memcpy_generic(void *dst, const voi !! 126 EXPORT_SYMBOL(memcpy)
 33  *                                             << 
 34  * a0: dst                                     << 
 35  * a1: src                                     << 
 36  * a2: n                                       << 
 37  */                                            << 
 38 SYM_FUNC_START(__memcpy_generic)               << 
 39         move    a3, a0                         << 
 40         beqz    a2, 2f                         << 
 41                                                << 
 42 1:      ld.b    t0, a1, 0                      << 
 43         st.b    t0, a0, 0                      << 
 44         addi.d  a0, a0, 1                      << 
 45         addi.d  a1, a1, 1                      << 
 46         addi.d  a2, a2, -1                     << 
 47         bgt     a2, zero, 1b                   << 
 48                                                << 
 49 2:      move    a0, a3                         << 
 50         jr      ra                             << 
 51 SYM_FUNC_END(__memcpy_generic)                 << 
 52 _ASM_NOKPROBE(__memcpy_generic)                << 
 53                                                << 
 54         .align  5                              << 
 55 SYM_FUNC_START_NOALIGN(__memcpy_small)         << 
 56         pcaddi  t0, 8                          << 
 57         slli.d  a2, a2, 5                      << 
 58         add.d   t0, t0, a2                     << 
 59         jr      t0                             << 
 60                                                << 
 61         .align  5                              << 
 62 0:      jr      ra                             << 
 63                                                << 
 64         .align  5                              << 
 65 1:      ld.b    t0, a1, 0                      << 
 66         st.b    t0, a0, 0                      << 
 67         jr      ra                             << 
 68                                                << 
 69         .align  5                              << 
 70 2:      ld.h    t0, a1, 0                      << 
 71         st.h    t0, a0, 0                      << 
 72         jr      ra                             << 
 73                                                << 
 74         .align  5                              << 
 75 3:      ld.h    t0, a1, 0                      << 
 76         ld.b    t1, a1, 2                      << 
 77         st.h    t0, a0, 0                      << 
 78         st.b    t1, a0, 2                      << 
 79         jr      ra                             << 
 80                                                << 
 81         .align  5                              << 
 82 4:      ld.w    t0, a1, 0                      << 
 83         st.w    t0, a0, 0                      << 
 84         jr      ra                             << 
 85                                                << 
 86         .align  5                              << 
 87 5:      ld.w    t0, a1, 0                      << 
 88         ld.b    t1, a1, 4                      << 
 89         st.w    t0, a0, 0                      << 
 90         st.b    t1, a0, 4                      << 
 91         jr      ra                             << 
 92                                                << 
 93         .align  5                              << 
 94 6:      ld.w    t0, a1, 0                      << 
 95         ld.h    t1, a1, 4                      << 
 96         st.w    t0, a0, 0                      << 
 97         st.h    t1, a0, 4                      << 
 98         jr      ra                             << 
 99                                                << 
100         .align  5                              << 
101 7:      ld.w    t0, a1, 0                      << 
102         ld.w    t1, a1, 3                      << 
103         st.w    t0, a0, 0                      << 
104         st.w    t1, a0, 3                      << 
105         jr      ra                             << 
106                                                << 
107         .align  5                              << 
108 8:      ld.d    t0, a1, 0                      << 
109         st.d    t0, a0, 0                      << 
110         jr      ra                             << 
111 SYM_FUNC_END(__memcpy_small)                   << 
112 _ASM_NOKPROBE(__memcpy_small)                  << 
113                                                << 
114 /*                                             << 
115  * void *__memcpy_fast(void *dst, const void * << 
116  *                                             << 
117  * a0: dst                                     << 
118  * a1: src                                     << 
119  * a2: n                                       << 
120  */                                            << 
121 SYM_FUNC_START(__memcpy_fast)                  << 
122         sltui   t0, a2, 9                      << 
123         bnez    t0, __memcpy_small             << 
124                                                << 
125         add.d   a3, a1, a2                     << 
126         add.d   a2, a0, a2                     << 
127         ld.d    a6, a1, 0                      << 
128         ld.d    a7, a3, -8                     << 
129                                                << 
130         /* align up destination address */     << 
131         andi    t1, a0, 7                      << 
132         sub.d   t0, zero, t1                   << 
133         addi.d  t0, t0, 8                      << 
134         add.d   a1, a1, t0                     << 
135         add.d   a5, a0, t0                     << 
136                                                << 
137         addi.d  a4, a3, -64                    << 
138         bgeu    a1, a4, .Llt64                 << 
139                                                << 
140         /* copy 64 bytes at a time */          << 
141 .Lloop64:                                      << 
142         ld.d    t0, a1, 0                      << 
143         ld.d    t1, a1, 8                      << 
144         ld.d    t2, a1, 16                     << 
145         ld.d    t3, a1, 24                     << 
146         ld.d    t4, a1, 32                     << 
147         ld.d    t5, a1, 40                     << 
148         ld.d    t6, a1, 48                     << 
149         ld.d    t7, a1, 56                     << 
150         addi.d  a1, a1, 64                     << 
151         st.d    t0, a5, 0                      << 
152         st.d    t1, a5, 8                      << 
153         st.d    t2, a5, 16                     << 
154         st.d    t3, a5, 24                     << 
155         st.d    t4, a5, 32                     << 
156         st.d    t5, a5, 40                     << 
157         st.d    t6, a5, 48                     << 
158         st.d    t7, a5, 56                     << 
159         addi.d  a5, a5, 64                     << 
160         bltu    a1, a4, .Lloop64               << 
161                                                << 
162         /* copy the remaining bytes */         << 
163 .Llt64:                                        << 
164         addi.d  a4, a3, -32                    << 
165         bgeu    a1, a4, .Llt32                 << 
166         ld.d    t0, a1, 0                      << 
167         ld.d    t1, a1, 8                      << 
168         ld.d    t2, a1, 16                     << 
169         ld.d    t3, a1, 24                     << 
170         addi.d  a1, a1, 32                     << 
171         st.d    t0, a5, 0                      << 
172         st.d    t1, a5, 8                      << 
173         st.d    t2, a5, 16                     << 
174         st.d    t3, a5, 24                     << 
175         addi.d  a5, a5, 32                     << 
176                                                << 
177 .Llt32:                                        << 
178         addi.d  a4, a3, -16                    << 
179         bgeu    a1, a4, .Llt16                 << 
180         ld.d    t0, a1, 0                      << 
181         ld.d    t1, a1, 8                      << 
182         addi.d  a1, a1, 16                     << 
183         st.d    t0, a5, 0                      << 
184         st.d    t1, a5, 8                      << 
185         addi.d  a5, a5, 16                     << 
186                                                << 
187 .Llt16:                                        << 
188         addi.d  a4, a3, -8                     << 
189         bgeu    a1, a4, .Llt8                  << 
190         ld.d    t0, a1, 0                      << 
191         st.d    t0, a5, 0                      << 
192                                                << 
193 .Llt8:                                         << 
194         st.d    a6, a0, 0                      << 
195         st.d    a7, a2, -8                     << 
196                                                << 
197         /* return */                           << 
198         jr      ra                             << 
199 SYM_FUNC_END(__memcpy_fast)                    << 
200 _ASM_NOKPROBE(__memcpy_fast)                   << 
201                                                   127 
202 STACK_FRAME_NON_STANDARD __memcpy_small        !! 128         sub             %o0, %o1, %o4
                                                   >> 129         mov             %o0, %g7
                                                   >> 130 9:
                                                   >> 131         andcc           %o4, 3, %o5
                                                   >> 132 0:
                                                   >> 133         bne             86f
                                                   >> 134          cmp            %o2, 15
                                                   >> 135 
                                                   >> 136         bleu            90f
                                                   >> 137          andcc          %o1, 3, %g0
                                                   >> 138 
                                                   >> 139         bne             78b
                                                   >> 140 3:
                                                   >> 141          andcc          %o1, 4, %g0
                                                   >> 142 
                                                   >> 143         be              2f
                                                   >> 144          mov            %o2, %g1
                                                   >> 145 
                                                   >> 146         ld              [%o1], %o4
                                                   >> 147         sub             %g1, 4, %g1
                                                   >> 148         st              %o4, [%o0]
                                                   >> 149         add             %o1, 4, %o1
                                                   >> 150         add             %o0, 4, %o0
                                                   >> 151 2:
                                                   >> 152         andcc           %g1, 0xffffff80, %g0
                                                   >> 153         be              3f
                                                   >> 154          andcc          %o0, 4, %g0
                                                   >> 155 
                                                   >> 156         be              82f + 4
                                                   >> 157 5:
                                                   >> 158         MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
                                                   >> 159         MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
                                                   >> 160         MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
                                                   >> 161         MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
                                                   >> 162         sub             %g1, 128, %g1
                                                   >> 163         add             %o1, 128, %o1
                                                   >> 164         cmp             %g1, 128
                                                   >> 165         bge             5b
                                                   >> 166          add            %o0, 128, %o0
                                                   >> 167 3:
                                                   >> 168         andcc           %g1, 0x70, %g4
                                                   >> 169         be              80f
                                                   >> 170          andcc          %g1, 8, %g0
                                                   >> 171 
                                                   >> 172         sethi           %hi(80f), %o5
                                                   >> 173         srl             %g4, 1, %o4
                                                   >> 174         add             %g4, %o4, %o4
                                                   >> 175         add             %o1, %g4, %o1
                                                   >> 176         sub             %o5, %o4, %o5
                                                   >> 177         jmpl            %o5 + %lo(80f), %g0
                                                   >> 178          add            %o0, %g4, %o0
                                                   >> 179 
                                                   >> 180 79:     /* memcpy_table */
                                                   >> 181 
                                                   >> 182         MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
                                                   >> 183         MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
                                                   >> 184         MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
                                                   >> 185         MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
                                                   >> 186         MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
                                                   >> 187         MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
                                                   >> 188         MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
                                                   >> 189 
                                                   >> 190 80:     /* memcpy_table_end */
                                                   >> 191         be              81f
                                                   >> 192          andcc          %g1, 4, %g0
                                                   >> 193 
                                                   >> 194         ldd             [%o1], %g2
                                                   >> 195         add             %o0, 8, %o0
                                                   >> 196         st              %g2, [%o0 - 0x08]
                                                   >> 197         add             %o1, 8, %o1
                                                   >> 198         st              %g3, [%o0 - 0x04]
                                                   >> 199 
                                                   >> 200 81:     /* memcpy_last7 */
                                                   >> 201 
                                                   >> 202         be              1f
                                                   >> 203          andcc          %g1, 2, %g0
                                                   >> 204 
                                                   >> 205         ld              [%o1], %g2
                                                   >> 206         add             %o1, 4, %o1
                                                   >> 207         st              %g2, [%o0]
                                                   >> 208         add             %o0, 4, %o0
                                                   >> 209 1:
                                                   >> 210         be              1f
                                                   >> 211          andcc          %g1, 1, %g0
                                                   >> 212 
                                                   >> 213         lduh            [%o1], %g2
                                                   >> 214         add             %o1, 2, %o1
                                                   >> 215         sth             %g2, [%o0]
                                                   >> 216         add             %o0, 2, %o0
                                                   >> 217 1:
                                                   >> 218         be              1f
                                                   >> 219          nop
                                                   >> 220 
                                                   >> 221         ldub            [%o1], %g2
                                                   >> 222         stb             %g2, [%o0]
                                                   >> 223 1:
                                                   >> 224         retl
                                                   >> 225          mov            %g7, %o0
                                                   >> 226 
                                                   >> 227 82:     /* ldd_std */
                                                   >> 228         MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
                                                   >> 229         MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
                                                   >> 230         MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
                                                   >> 231         MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
                                                   >> 232         subcc           %g1, 128, %g1
                                                   >> 233         add             %o1, 128, %o1
                                                   >> 234         cmp             %g1, 128
                                                   >> 235         bge             82b
                                                   >> 236          add            %o0, 128, %o0
                                                   >> 237 
                                                   >> 238         andcc           %g1, 0x70, %g4
                                                   >> 239         be              84f
                                                   >> 240          andcc          %g1, 8, %g0
                                                   >> 241 
                                                   >> 242         sethi           %hi(84f), %o5
                                                   >> 243         add             %o1, %g4, %o1
                                                   >> 244         sub             %o5, %g4, %o5
                                                   >> 245         jmpl            %o5 + %lo(84f), %g0
                                                   >> 246          add            %o0, %g4, %o0
                                                   >> 247 
                                                   >> 248 83:     /* amemcpy_table */
                                                   >> 249 
                                                   >> 250         MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
                                                   >> 251         MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
                                                   >> 252         MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
                                                   >> 253         MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
                                                   >> 254         MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
                                                   >> 255         MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
                                                   >> 256         MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
                                                   >> 257 
                                                   >> 258 84:     /* amemcpy_table_end */
                                                   >> 259         be              85f
                                                   >> 260          andcc          %g1, 4, %g0
                                                   >> 261 
                                                   >> 262         ldd             [%o1], %g2
                                                   >> 263         add             %o0, 8, %o0
                                                   >> 264         std             %g2, [%o0 - 0x08]
                                                   >> 265         add             %o1, 8, %o1
                                                   >> 266 85:     /* amemcpy_last7 */
                                                   >> 267         be              1f
                                                   >> 268          andcc          %g1, 2, %g0
                                                   >> 269 
                                                   >> 270         ld              [%o1], %g2
                                                   >> 271         add             %o1, 4, %o1
                                                   >> 272         st              %g2, [%o0]
                                                   >> 273         add             %o0, 4, %o0
                                                   >> 274 1:
                                                   >> 275         be              1f
                                                   >> 276          andcc          %g1, 1, %g0
                                                   >> 277 
                                                   >> 278         lduh            [%o1], %g2
                                                   >> 279         add             %o1, 2, %o1
                                                   >> 280         sth             %g2, [%o0]
                                                   >> 281         add             %o0, 2, %o0
                                                   >> 282 1:
                                                   >> 283         be              1f
                                                   >> 284          nop
                                                   >> 285 
                                                   >> 286         ldub            [%o1], %g2
                                                   >> 287         stb             %g2, [%o0]
                                                   >> 288 1:
                                                   >> 289         retl
                                                   >> 290          mov            %g7, %o0
                                                   >> 291 
                                                   >> 292 86:     /* non_aligned */
                                                   >> 293         cmp             %o2, 6
                                                   >> 294         bleu            88f
                                                   >> 295          nop
                                                   >> 296 
                                                   >> 297         save            %sp, -96, %sp
                                                   >> 298         andcc           %i0, 3, %g0
                                                   >> 299         be              61f
                                                   >> 300          andcc          %i0, 1, %g0
                                                   >> 301         be              60f
                                                   >> 302          andcc          %i0, 2, %g0
                                                   >> 303 
                                                   >> 304         ldub            [%i1], %g5
                                                   >> 305         add             %i1, 1, %i1
                                                   >> 306         stb             %g5, [%i0]
                                                   >> 307         sub             %i2, 1, %i2
                                                   >> 308         bne             61f
                                                   >> 309          add            %i0, 1, %i0
                                                   >> 310 60:
                                                   >> 311         ldub            [%i1], %g3
                                                   >> 312         add             %i1, 2, %i1
                                                   >> 313         stb             %g3, [%i0]
                                                   >> 314         sub             %i2, 2, %i2
                                                   >> 315         ldub            [%i1 - 1], %g3
                                                   >> 316         add             %i0, 2, %i0
                                                   >> 317         stb             %g3, [%i0 - 1]
                                                   >> 318 61:
                                                   >> 319         and             %i1, 3, %g2
                                                   >> 320         and             %i2, 0xc, %g3
                                                   >> 321         and             %i1, -4, %i1
                                                   >> 322         cmp             %g3, 4
                                                   >> 323         sll             %g2, 3, %g4
                                                   >> 324         mov             32, %g2
                                                   >> 325         be              4f
                                                   >> 326          sub            %g2, %g4, %l0
                                                   >> 327         
                                                   >> 328         blu             3f
                                                   >> 329          cmp            %g3, 0x8
                                                   >> 330 
                                                   >> 331         be              2f
                                                   >> 332          srl            %i2, 2, %g3
                                                   >> 333 
                                                   >> 334         ld              [%i1], %i3
                                                   >> 335         add             %i0, -8, %i0
                                                   >> 336         ld              [%i1 + 4], %i4
                                                   >> 337         b               8f
                                                   >> 338          add            %g3, 1, %g3
                                                   >> 339 2:
                                                   >> 340         ld              [%i1], %i4
                                                   >> 341         add             %i0, -12, %i0
                                                   >> 342         ld              [%i1 + 4], %i5
                                                   >> 343         add             %g3, 2, %g3
                                                   >> 344         b               9f
                                                   >> 345          add            %i1, -4, %i1
                                                   >> 346 3:
                                                   >> 347         ld              [%i1], %g1
                                                   >> 348         add             %i0, -4, %i0
                                                   >> 349         ld              [%i1 + 4], %i3
                                                   >> 350         srl             %i2, 2, %g3
                                                   >> 351         b               7f
                                                   >> 352          add            %i1, 4, %i1
                                                   >> 353 4:
                                                   >> 354         ld              [%i1], %i5
                                                   >> 355         cmp             %i2, 7
                                                   >> 356         ld              [%i1 + 4], %g1
                                                   >> 357         srl             %i2, 2, %g3
                                                   >> 358         bleu            10f
                                                   >> 359          add            %i1, 8, %i1
                                                   >> 360 
                                                   >> 361         ld              [%i1], %i3
                                                   >> 362         add             %g3, -1, %g3
                                                   >> 363 5:
                                                   >> 364         sll             %i5, %g4, %g2
                                                   >> 365         srl             %g1, %l0, %g5
                                                   >> 366         or              %g2, %g5, %g2
                                                   >> 367         st              %g2, [%i0]
                                                   >> 368 7:
                                                   >> 369         ld              [%i1 + 4], %i4
                                                   >> 370         sll             %g1, %g4, %g2
                                                   >> 371         srl             %i3, %l0, %g5
                                                   >> 372         or              %g2, %g5, %g2
                                                   >> 373         st              %g2, [%i0 + 4]
                                                   >> 374 8:
                                                   >> 375         ld              [%i1 + 8], %i5
                                                   >> 376         sll             %i3, %g4, %g2
                                                   >> 377         srl             %i4, %l0, %g5
                                                   >> 378         or              %g2, %g5, %g2
                                                   >> 379         st              %g2, [%i0 + 8]
                                                   >> 380 9:
                                                   >> 381         ld              [%i1 + 12], %g1
                                                   >> 382         sll             %i4, %g4, %g2
                                                   >> 383         srl             %i5, %l0, %g5
                                                   >> 384         addcc           %g3, -4, %g3
                                                   >> 385         or              %g2, %g5, %g2
                                                   >> 386         add             %i1, 16, %i1
                                                   >> 387         st              %g2, [%i0 + 12]
                                                   >> 388         add             %i0, 16, %i0
                                                   >> 389         bne,a           5b
                                                   >> 390          ld             [%i1], %i3
                                                   >> 391 10:
                                                   >> 392         sll             %i5, %g4, %g2
                                                   >> 393         srl             %g1, %l0, %g5
                                                   >> 394         srl             %l0, 3, %g3
                                                   >> 395         or              %g2, %g5, %g2
                                                   >> 396         sub             %i1, %g3, %i1
                                                   >> 397         andcc           %i2, 2, %g0
                                                   >> 398         st              %g2, [%i0]
                                                   >> 399         be              1f
                                                   >> 400          andcc          %i2, 1, %g0
                                                   >> 401 
                                                   >> 402         ldub            [%i1], %g2
                                                   >> 403         add             %i1, 2, %i1
                                                   >> 404         stb             %g2, [%i0 + 4]
                                                   >> 405         add             %i0, 2, %i0
                                                   >> 406         ldub            [%i1 - 1], %g2
                                                   >> 407         stb             %g2, [%i0 + 3]
                                                   >> 408 1:
                                                   >> 409         be              1f
                                                   >> 410          nop
                                                   >> 411         ldub            [%i1], %g2
                                                   >> 412         stb             %g2, [%i0 + 4]
                                                   >> 413 1:
                                                   >> 414         ret
                                                   >> 415          restore        %g7, %g0, %o0
                                                   >> 416 
                                                   >> 417 88:     /* short_end */
                                                   >> 418 
                                                   >> 419         and             %o2, 0xe, %o3
                                                   >> 420 20:
                                                   >> 421         sethi           %hi(89f), %o5
                                                   >> 422         sll             %o3, 3, %o4
                                                   >> 423         add             %o0, %o3, %o0
                                                   >> 424         sub             %o5, %o4, %o5
                                                   >> 425         add             %o1, %o3, %o1
                                                   >> 426         jmpl            %o5 + %lo(89f), %g0
                                                   >> 427          andcc          %o2, 1, %g0
                                                   >> 428 
                                                   >> 429         MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
                                                   >> 430         MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
                                                   >> 431         MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
                                                   >> 432         MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
                                                   >> 433         MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
                                                   >> 434         MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
                                                   >> 435         MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
                                                   >> 436 
                                                   >> 437 89:     /* short_table_end */
                                                   >> 438 
                                                   >> 439         be              1f
                                                   >> 440          nop
                                                   >> 441 
                                                   >> 442         ldub            [%o1], %g2
                                                   >> 443         stb             %g2, [%o0]
                                                   >> 444 1:
                                                   >> 445         retl
                                                   >> 446          mov            %g7, %o0
                                                   >> 447 
                                                   >> 448 90:     /* short_aligned_end */
                                                   >> 449         bne             88b
                                                   >> 450          andcc          %o2, 8, %g0
                                                   >> 451 
                                                   >> 452         be              1f
                                                   >> 453          andcc          %o2, 4, %g0
                                                   >> 454 
                                                   >> 455         ld              [%o1 + 0x00], %g2
                                                   >> 456         ld              [%o1 + 0x04], %g3
                                                   >> 457         add             %o1, 8, %o1
                                                   >> 458         st              %g2, [%o0 + 0x00]
                                                   >> 459         st              %g3, [%o0 + 0x04]
                                                   >> 460         add             %o0, 8, %o0
                                                   >> 461 1:
                                                   >> 462         b               81b
                                                   >> 463          mov            %o2, %g1
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php