~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/sparc/lib/NG4memcpy.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/sparc/lib/NG4memcpy.S (Architecture alpha) and /arch/sparc/lib/NG4memcpy.S (Architecture sparc)


  1 /* SPDX-License-Identifier: GPL-2.0 */              1 /* SPDX-License-Identifier: GPL-2.0 */
  2 /* NG4memcpy.S: Niagara-4 optimized memcpy.         2 /* NG4memcpy.S: Niagara-4 optimized memcpy.
  3  *                                                  3  *
  4  * Copyright (C) 2012 David S. Miller (davem@d      4  * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
  5  */                                                 5  */
  6                                                     6 
  7 #ifdef __KERNEL__                                   7 #ifdef __KERNEL__
  8 #include <linux/linkage.h>                          8 #include <linux/linkage.h>
  9 #include <asm/visasm.h>                             9 #include <asm/visasm.h>
 10 #include <asm/asi.h>                               10 #include <asm/asi.h>
 11 #define GLOBAL_SPARE    %g7                        11 #define GLOBAL_SPARE    %g7
 12 #else                                              12 #else
 13 #define ASI_BLK_INIT_QUAD_LDD_P 0xe2               13 #define ASI_BLK_INIT_QUAD_LDD_P 0xe2
 14 #define FPRS_FEF  0x04                             14 #define FPRS_FEF  0x04
 15                                                    15 
 16 /* On T4 it is very expensive to access ASRs l     16 /* On T4 it is very expensive to access ASRs like %fprs and
 17  * %asi, avoiding a read or a write can save ~     17  * %asi, avoiding a read or a write can save ~50 cycles.
 18  */                                                18  */
 19 #define FPU_ENTER                       \          19 #define FPU_ENTER                       \
 20         rd      %fprs, %o5;             \          20         rd      %fprs, %o5;             \
 21         andcc   %o5, FPRS_FEF, %g0;     \          21         andcc   %o5, FPRS_FEF, %g0;     \
 22         be,a,pn %icc, 999f;             \          22         be,a,pn %icc, 999f;             \
 23          wr     %g0, FPRS_FEF, %fprs;   \          23          wr     %g0, FPRS_FEF, %fprs;   \
 24         999:                                       24         999:
 25                                                    25 
 26 #ifdef MEMCPY_DEBUG                                26 #ifdef MEMCPY_DEBUG
 27 #define VISEntryHalf FPU_ENTER; \                  27 #define VISEntryHalf FPU_ENTER; \
 28                      clr %g1; clr %g2; clr %g3     28                      clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0;
 29 #define VISExitHalf and %o5, FPRS_FEF, %o5; wr     29 #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
 30 #else                                              30 #else
 31 #define VISEntryHalf FPU_ENTER                     31 #define VISEntryHalf FPU_ENTER
 32 #define VISExitHalf and %o5, FPRS_FEF, %o5; wr     32 #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
 33 #endif                                             33 #endif
 34                                                    34 
 35 #define GLOBAL_SPARE    %g5                        35 #define GLOBAL_SPARE    %g5
 36 #endif                                             36 #endif
 37                                                    37 
 38 #ifndef STORE_ASI                                  38 #ifndef STORE_ASI
 39 #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA            39 #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
 40 #define STORE_ASI       ASI_BLK_INIT_QUAD_LDD_     40 #define STORE_ASI       ASI_BLK_INIT_QUAD_LDD_P
 41 #else                                              41 #else
 42 #define STORE_ASI       0x80            /* ASI     42 #define STORE_ASI       0x80            /* ASI_P */
 43 #endif                                             43 #endif
 44 #endif                                             44 #endif
 45                                                    45 
 46 #if !defined(EX_LD) && !defined(EX_ST)             46 #if !defined(EX_LD) && !defined(EX_ST)
 47 #define NON_USER_COPY                              47 #define NON_USER_COPY
 48 #endif                                             48 #endif
 49                                                    49 
 50 #ifndef EX_LD                                      50 #ifndef EX_LD
 51 #define EX_LD(x,y)      x                          51 #define EX_LD(x,y)      x
 52 #endif                                             52 #endif
 53 #ifndef EX_LD_FP                                   53 #ifndef EX_LD_FP
 54 #define EX_LD_FP(x,y)   x                          54 #define EX_LD_FP(x,y)   x
 55 #endif                                             55 #endif
 56                                                    56 
 57 #ifndef EX_ST                                      57 #ifndef EX_ST
 58 #define EX_ST(x,y)      x                          58 #define EX_ST(x,y)      x
 59 #endif                                             59 #endif
 60 #ifndef EX_ST_FP                                   60 #ifndef EX_ST_FP
 61 #define EX_ST_FP(x,y)   x                          61 #define EX_ST_FP(x,y)   x
 62 #endif                                             62 #endif
 63                                                    63 
 64                                                    64 
 65 #ifndef LOAD                                       65 #ifndef LOAD
 66 #define LOAD(type,addr,dest)    type [addr], d     66 #define LOAD(type,addr,dest)    type [addr], dest
 67 #endif                                             67 #endif
 68                                                    68 
 69 #ifndef STORE                                      69 #ifndef STORE
 70 #ifndef MEMCPY_DEBUG                               70 #ifndef MEMCPY_DEBUG
 71 #define STORE(type,src,addr)    type src, [add     71 #define STORE(type,src,addr)    type src, [addr]
 72 #else                                              72 #else
 73 #define STORE(type,src,addr)    type##a src, [     73 #define STORE(type,src,addr)    type##a src, [addr] %asi
 74 #endif                                             74 #endif
 75 #endif                                             75 #endif
 76                                                    76 
 77 #ifndef STORE_INIT                                 77 #ifndef STORE_INIT
 78 #define STORE_INIT(src,addr)    stxa src, [add     78 #define STORE_INIT(src,addr)    stxa src, [addr] STORE_ASI
 79 #endif                                             79 #endif
 80                                                    80 
 81 #ifndef FUNC_NAME                                  81 #ifndef FUNC_NAME
 82 #define FUNC_NAME       NG4memcpy                  82 #define FUNC_NAME       NG4memcpy
 83 #endif                                             83 #endif
 84 #ifndef PREAMBLE                                   84 #ifndef PREAMBLE
 85 #define PREAMBLE                                   85 #define PREAMBLE
 86 #endif                                             86 #endif
 87                                                    87 
 88 #ifndef XCC                                        88 #ifndef XCC
 89 #define XCC xcc                                    89 #define XCC xcc
 90 #endif                                             90 #endif
 91                                                    91 
 92         .register       %g2,#scratch               92         .register       %g2,#scratch
 93         .register       %g3,#scratch               93         .register       %g3,#scratch
 94                                                    94 
 95         .text                                      95         .text
 96 #ifndef EX_RETVAL                                  96 #ifndef EX_RETVAL
 97 #define EX_RETVAL(x)    x                          97 #define EX_RETVAL(x)    x
 98 #endif                                             98 #endif
 99         .align          64                         99         .align          64
100                                                   100 
101         .globl  FUNC_NAME                         101         .globl  FUNC_NAME
102         .type   FUNC_NAME,#function               102         .type   FUNC_NAME,#function
103 FUNC_NAME:      /* %o0=dst, %o1=src, %o2=len *    103 FUNC_NAME:      /* %o0=dst, %o1=src, %o2=len */
104 #ifdef MEMCPY_DEBUG                               104 #ifdef MEMCPY_DEBUG
105         wr              %g0, 0x80, %asi           105         wr              %g0, 0x80, %asi
106 #endif                                            106 #endif
107         srlx            %o2, 31, %g2              107         srlx            %o2, 31, %g2
108         cmp             %g2, 0                    108         cmp             %g2, 0
109         tne             %XCC, 5                   109         tne             %XCC, 5
110         PREAMBLE                                  110         PREAMBLE
111         mov             %o0, %o3                  111         mov             %o0, %o3
112         brz,pn          %o2, .Lexit               112         brz,pn          %o2, .Lexit
113          cmp            %o2, 3                    113          cmp            %o2, 3
114         ble,pn          %icc, .Ltiny              114         ble,pn          %icc, .Ltiny
115          cmp            %o2, 19                   115          cmp            %o2, 19
116         ble,pn          %icc, .Lsmall             116         ble,pn          %icc, .Lsmall
117          or             %o0, %o1, %g2             117          or             %o0, %o1, %g2
118         cmp             %o2, 128                  118         cmp             %o2, 128
119         bl,pn           %icc, .Lmedium            119         bl,pn           %icc, .Lmedium
120          nop                                      120          nop
121                                                   121 
122 .Llarge:/* len >= 0x80 */                         122 .Llarge:/* len >= 0x80 */
123         /* First get dest 8 byte aligned.  */     123         /* First get dest 8 byte aligned.  */
124         sub             %g0, %o0, %g1             124         sub             %g0, %o0, %g1
125         and             %g1, 0x7, %g1             125         and             %g1, 0x7, %g1
126         brz,pt          %g1, 51f                  126         brz,pt          %g1, 51f
127          sub            %o2, %g1, %o2             127          sub            %o2, %g1, %o2
128                                                   128 
129                                                   129 
130 1:      EX_LD(LOAD(ldub, %o1 + 0x00, %g2), mem    130 1:      EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1)
131         add             %o1, 1, %o1               131         add             %o1, 1, %o1
132         subcc           %g1, 1, %g1               132         subcc           %g1, 1, %g1
133         add             %o0, 1, %o0               133         add             %o0, 1, %o0
134         bne,pt          %icc, 1b                  134         bne,pt          %icc, 1b
135          EX_ST(STORE(stb, %g2, %o0 - 0x01), me    135          EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1)
136                                                   136 
137 51:     LOAD(prefetch, %o1 + 0x040, #n_reads_s    137 51:     LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
138         LOAD(prefetch, %o1 + 0x080, #n_reads_s    138         LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
139         LOAD(prefetch, %o1 + 0x0c0, #n_reads_s    139         LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong)
140         LOAD(prefetch, %o1 + 0x100, #n_reads_s    140         LOAD(prefetch, %o1 + 0x100, #n_reads_strong)
141         LOAD(prefetch, %o1 + 0x140, #n_reads_s    141         LOAD(prefetch, %o1 + 0x140, #n_reads_strong)
142         LOAD(prefetch, %o1 + 0x180, #n_reads_s    142         LOAD(prefetch, %o1 + 0x180, #n_reads_strong)
143         LOAD(prefetch, %o1 + 0x1c0, #n_reads_s    143         LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong)
144         LOAD(prefetch, %o1 + 0x200, #n_reads_s    144         LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
145                                                   145 
146         /* Check if we can use the straight fu    146         /* Check if we can use the straight fully aligned
147          * loop, or we require the alignaddr/f    147          * loop, or we require the alignaddr/faligndata variant.
148          */                                       148          */
149         andcc           %o1, 0x7, %o5             149         andcc           %o1, 0x7, %o5
150         bne,pn          %icc, .Llarge_src_unal    150         bne,pn          %icc, .Llarge_src_unaligned
151          sub            %g0, %o0, %g1             151          sub            %g0, %o0, %g1
152                                                   152 
153         /* Legitimize the use of initializing     153         /* Legitimize the use of initializing stores by getting dest
154          * to be 64-byte aligned.                 154          * to be 64-byte aligned.
155          */                                       155          */
156         and             %g1, 0x3f, %g1            156         and             %g1, 0x3f, %g1
157         brz,pt          %g1, .Llarge_aligned      157         brz,pt          %g1, .Llarge_aligned
158          sub            %o2, %g1, %o2             158          sub            %o2, %g1, %o2
159                                                   159 
160 1:      EX_LD(LOAD(ldx, %o1 + 0x00, %g2), memc    160 1:      EX_LD(LOAD(ldx, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1)
161         add             %o1, 8, %o1               161         add             %o1, 8, %o1
162         subcc           %g1, 8, %g1               162         subcc           %g1, 8, %g1
163         add             %o0, 8, %o0               163         add             %o0, 8, %o0
164         bne,pt          %icc, 1b                  164         bne,pt          %icc, 1b
165          EX_ST(STORE(stx, %g2, %o0 - 0x08), me    165          EX_ST(STORE(stx, %g2, %o0 - 0x08), memcpy_retl_o2_plus_g1_plus_8)
166                                                   166 
167 .Llarge_aligned:                                  167 .Llarge_aligned:
168         /* len >= 0x80 && src 8-byte aligned &    168         /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
169         andn            %o2, 0x3f, %o4            169         andn            %o2, 0x3f, %o4
170         sub             %o2, %o4, %o2             170         sub             %o2, %o4, %o2
171                                                   171 
172 1:      EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memc    172 1:      EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o4)
173         add             %o1, 0x40, %o1            173         add             %o1, 0x40, %o1
174         EX_LD(LOAD(ldx, %o1 - 0x38, %g2), memc    174         EX_LD(LOAD(ldx, %o1 - 0x38, %g2), memcpy_retl_o2_plus_o4)
175         subcc           %o4, 0x40, %o4            175         subcc           %o4, 0x40, %o4
176         EX_LD(LOAD(ldx, %o1 - 0x30, %g3), memc    176         EX_LD(LOAD(ldx, %o1 - 0x30, %g3), memcpy_retl_o2_plus_o4_plus_64)
177         EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPA    177         EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), memcpy_retl_o2_plus_o4_plus_64)
178         EX_LD(LOAD(ldx, %o1 - 0x20, %o5), memc    178         EX_LD(LOAD(ldx, %o1 - 0x20, %o5), memcpy_retl_o2_plus_o4_plus_64)
179         EX_ST(STORE_INIT(%g1, %o0), memcpy_ret    179         EX_ST(STORE_INIT(%g1, %o0), memcpy_retl_o2_plus_o4_plus_64)
180         add             %o0, 0x08, %o0            180         add             %o0, 0x08, %o0
181         EX_ST(STORE_INIT(%g2, %o0), memcpy_ret    181         EX_ST(STORE_INIT(%g2, %o0), memcpy_retl_o2_plus_o4_plus_56)
182         add             %o0, 0x08, %o0            182         add             %o0, 0x08, %o0
183         EX_LD(LOAD(ldx, %o1 - 0x18, %g2), memc    183         EX_LD(LOAD(ldx, %o1 - 0x18, %g2), memcpy_retl_o2_plus_o4_plus_48)
184         EX_ST(STORE_INIT(%g3, %o0), memcpy_ret    184         EX_ST(STORE_INIT(%g3, %o0), memcpy_retl_o2_plus_o4_plus_48)
185         add             %o0, 0x08, %o0            185         add             %o0, 0x08, %o0
186         EX_LD(LOAD(ldx, %o1 - 0x10, %g3), memc    186         EX_LD(LOAD(ldx, %o1 - 0x10, %g3), memcpy_retl_o2_plus_o4_plus_40)
187         EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), m    187         EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), memcpy_retl_o2_plus_o4_plus_40)
188         add             %o0, 0x08, %o0            188         add             %o0, 0x08, %o0
189         EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPA    189         EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), memcpy_retl_o2_plus_o4_plus_32)
190         EX_ST(STORE_INIT(%o5, %o0), memcpy_ret    190         EX_ST(STORE_INIT(%o5, %o0), memcpy_retl_o2_plus_o4_plus_32)
191         add             %o0, 0x08, %o0            191         add             %o0, 0x08, %o0
192         EX_ST(STORE_INIT(%g2, %o0), memcpy_ret    192         EX_ST(STORE_INIT(%g2, %o0), memcpy_retl_o2_plus_o4_plus_24)
193         add             %o0, 0x08, %o0            193         add             %o0, 0x08, %o0
194         EX_ST(STORE_INIT(%g3, %o0), memcpy_ret    194         EX_ST(STORE_INIT(%g3, %o0), memcpy_retl_o2_plus_o4_plus_16)
195         add             %o0, 0x08, %o0            195         add             %o0, 0x08, %o0
196         EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), m    196         EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), memcpy_retl_o2_plus_o4_plus_8)
197         add             %o0, 0x08, %o0            197         add             %o0, 0x08, %o0
198         bne,pt          %icc, 1b                  198         bne,pt          %icc, 1b
199          LOAD(prefetch, %o1 + 0x200, #n_reads_    199          LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
200                                                   200 
201         membar          #StoreLoad | #StoreSto    201         membar          #StoreLoad | #StoreStore
202                                                   202 
203         brz,pn          %o2, .Lexit               203         brz,pn          %o2, .Lexit
204          cmp            %o2, 19                   204          cmp            %o2, 19
205         ble,pn          %icc, .Lsmall_unaligne    205         ble,pn          %icc, .Lsmall_unaligned
206          nop                                      206          nop
207         ba,a,pt         %icc, .Lmedium_noprefe    207         ba,a,pt         %icc, .Lmedium_noprefetch
208                                                   208 
209 .Lexit: retl                                      209 .Lexit: retl
210          mov            EX_RETVAL(%o3), %o0       210          mov            EX_RETVAL(%o3), %o0
211                                                   211 
212 .Llarge_src_unaligned:                            212 .Llarge_src_unaligned:
213 #ifdef NON_USER_COPY                              213 #ifdef NON_USER_COPY
214         VISEntryHalfFast(.Lmedium_vis_entry_fa    214         VISEntryHalfFast(.Lmedium_vis_entry_fail)
215 #else                                             215 #else
216         VISEntryHalf                              216         VISEntryHalf
217 #endif                                            217 #endif
218         andn            %o2, 0x3f, %o4            218         andn            %o2, 0x3f, %o4
219         sub             %o2, %o4, %o2             219         sub             %o2, %o4, %o2
220         alignaddr       %o1, %g0, %g1             220         alignaddr       %o1, %g0, %g1
221         add             %o1, %o4, %o1             221         add             %o1, %o4, %o1
222         EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), m    222         EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), memcpy_retl_o2_plus_o4)
223 1:      EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), m    223 1:      EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), memcpy_retl_o2_plus_o4)
224         subcc           %o4, 0x40, %o4            224         subcc           %o4, 0x40, %o4
225         EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), m    225         EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), memcpy_retl_o2_plus_o4_plus_64)
226         EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), m    226         EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), memcpy_retl_o2_plus_o4_plus_64)
227         EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), m    227         EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), memcpy_retl_o2_plus_o4_plus_64)
228         EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10),     228         EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), memcpy_retl_o2_plus_o4_plus_64)
229         EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12),     229         EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), memcpy_retl_o2_plus_o4_plus_64)
230         EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14),     230         EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), memcpy_retl_o2_plus_o4_plus_64)
231         faligndata      %f0, %f2, %f16            231         faligndata      %f0, %f2, %f16
232         EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), m    232         EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), memcpy_retl_o2_plus_o4_plus_64)
233         faligndata      %f2, %f4, %f18            233         faligndata      %f2, %f4, %f18
234         add             %g1, 0x40, %g1            234         add             %g1, 0x40, %g1
235         faligndata      %f4, %f6, %f20            235         faligndata      %f4, %f6, %f20
236         faligndata      %f6, %f8, %f22            236         faligndata      %f6, %f8, %f22
237         faligndata      %f8, %f10, %f24           237         faligndata      %f8, %f10, %f24
238         faligndata      %f10, %f12, %f26          238         faligndata      %f10, %f12, %f26
239         faligndata      %f12, %f14, %f28          239         faligndata      %f12, %f14, %f28
240         faligndata      %f14, %f0, %f30           240         faligndata      %f14, %f0, %f30
241         EX_ST_FP(STORE(std, %f16, %o0 + 0x00),    241         EX_ST_FP(STORE(std, %f16, %o0 + 0x00), memcpy_retl_o2_plus_o4_plus_64)
242         EX_ST_FP(STORE(std, %f18, %o0 + 0x08),    242         EX_ST_FP(STORE(std, %f18, %o0 + 0x08), memcpy_retl_o2_plus_o4_plus_56)
243         EX_ST_FP(STORE(std, %f20, %o0 + 0x10),    243         EX_ST_FP(STORE(std, %f20, %o0 + 0x10), memcpy_retl_o2_plus_o4_plus_48)
244         EX_ST_FP(STORE(std, %f22, %o0 + 0x18),    244         EX_ST_FP(STORE(std, %f22, %o0 + 0x18), memcpy_retl_o2_plus_o4_plus_40)
245         EX_ST_FP(STORE(std, %f24, %o0 + 0x20),    245         EX_ST_FP(STORE(std, %f24, %o0 + 0x20), memcpy_retl_o2_plus_o4_plus_32)
246         EX_ST_FP(STORE(std, %f26, %o0 + 0x28),    246         EX_ST_FP(STORE(std, %f26, %o0 + 0x28), memcpy_retl_o2_plus_o4_plus_24)
247         EX_ST_FP(STORE(std, %f28, %o0 + 0x30),    247         EX_ST_FP(STORE(std, %f28, %o0 + 0x30), memcpy_retl_o2_plus_o4_plus_16)
248         EX_ST_FP(STORE(std, %f30, %o0 + 0x38),    248         EX_ST_FP(STORE(std, %f30, %o0 + 0x38), memcpy_retl_o2_plus_o4_plus_8)
249         add             %o0, 0x40, %o0            249         add             %o0, 0x40, %o0
250         bne,pt          %icc, 1b                  250         bne,pt          %icc, 1b
251          LOAD(prefetch, %g1 + 0x200, #n_reads_    251          LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
252 #ifdef NON_USER_COPY                              252 #ifdef NON_USER_COPY
253         VISExitHalfFast                           253         VISExitHalfFast
254 #else                                             254 #else
255         VISExitHalf                               255         VISExitHalf
256 #endif                                            256 #endif
257         brz,pn          %o2, .Lexit               257         brz,pn          %o2, .Lexit
258          cmp            %o2, 19                   258          cmp            %o2, 19
259         ble,pn          %icc, .Lsmall_unaligne    259         ble,pn          %icc, .Lsmall_unaligned
260          nop                                      260          nop
261         ba,a,pt         %icc, .Lmedium_unalign    261         ba,a,pt         %icc, .Lmedium_unaligned
262                                                   262 
263 #ifdef NON_USER_COPY                              263 #ifdef NON_USER_COPY
264 .Lmedium_vis_entry_fail:                          264 .Lmedium_vis_entry_fail:
265          or             %o0, %o1, %g2             265          or             %o0, %o1, %g2
266 #endif                                            266 #endif
267 .Lmedium:                                         267 .Lmedium:
268         LOAD(prefetch, %o1 + 0x40, #n_reads_st    268         LOAD(prefetch, %o1 + 0x40, #n_reads_strong)
269         andcc           %g2, 0x7, %g0             269         andcc           %g2, 0x7, %g0
270         bne,pn          %icc, .Lmedium_unalign    270         bne,pn          %icc, .Lmedium_unaligned
271          nop                                      271          nop
272 .Lmedium_noprefetch:                              272 .Lmedium_noprefetch:
273         andncc          %o2, 0x20 - 1, %o5        273         andncc          %o2, 0x20 - 1, %o5
274         be,pn           %icc, 2f                  274         be,pn           %icc, 2f
275          sub            %o2, %o5, %o2             275          sub            %o2, %o5, %o2
276 1:      EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memc    276 1:      EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5)
277         EX_LD(LOAD(ldx, %o1 + 0x08, %g2), memc    277         EX_LD(LOAD(ldx, %o1 + 0x08, %g2), memcpy_retl_o2_plus_o5)
278         EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPA    278         EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), memcpy_retl_o2_plus_o5)
279         EX_LD(LOAD(ldx, %o1 + 0x18, %o4), memc    279         EX_LD(LOAD(ldx, %o1 + 0x18, %o4), memcpy_retl_o2_plus_o5)
280         add             %o1, 0x20, %o1            280         add             %o1, 0x20, %o1
281         subcc           %o5, 0x20, %o5            281         subcc           %o5, 0x20, %o5
282         EX_ST(STORE(stx, %g1, %o0 + 0x00), mem    282         EX_ST(STORE(stx, %g1, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_32)
283         EX_ST(STORE(stx, %g2, %o0 + 0x08), mem    283         EX_ST(STORE(stx, %g2, %o0 + 0x08), memcpy_retl_o2_plus_o5_plus_24)
284         EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0    284         EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_24)
285         EX_ST(STORE(stx, %o4, %o0 + 0x18), mem    285         EX_ST(STORE(stx, %o4, %o0 + 0x18), memcpy_retl_o2_plus_o5_plus_8)
286         bne,pt          %icc, 1b                  286         bne,pt          %icc, 1b
287          add            %o0, 0x20, %o0            287          add            %o0, 0x20, %o0
288 2:      andcc           %o2, 0x18, %o5            288 2:      andcc           %o2, 0x18, %o5
289         be,pt           %icc, 3f                  289         be,pt           %icc, 3f
290          sub            %o2, %o5, %o2             290          sub            %o2, %o5, %o2
291                                                   291 
292 1:      EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memc    292 1:      EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5)
293         add             %o1, 0x08, %o1            293         add             %o1, 0x08, %o1
294         add             %o0, 0x08, %o0            294         add             %o0, 0x08, %o0
295         subcc           %o5, 0x08, %o5            295         subcc           %o5, 0x08, %o5
296         bne,pt          %icc, 1b                  296         bne,pt          %icc, 1b
297          EX_ST(STORE(stx, %g1, %o0 - 0x08), me    297          EX_ST(STORE(stx, %g1, %o0 - 0x08), memcpy_retl_o2_plus_o5_plus_8)
298 3:      brz,pt          %o2, .Lexit               298 3:      brz,pt          %o2, .Lexit
299          cmp            %o2, 0x04                 299          cmp            %o2, 0x04
300         bl,pn           %icc, .Ltiny              300         bl,pn           %icc, .Ltiny
301          nop                                      301          nop
302         EX_LD(LOAD(lduw, %o1 + 0x00, %g1), mem    302         EX_LD(LOAD(lduw, %o1 + 0x00, %g1), memcpy_retl_o2)
303         add             %o1, 0x04, %o1            303         add             %o1, 0x04, %o1
304         add             %o0, 0x04, %o0            304         add             %o0, 0x04, %o0
305         subcc           %o2, 0x04, %o2            305         subcc           %o2, 0x04, %o2
306         bne,pn          %icc, .Ltiny              306         bne,pn          %icc, .Ltiny
307          EX_ST(STORE(stw, %g1, %o0 - 0x04), me    307          EX_ST(STORE(stw, %g1, %o0 - 0x04), memcpy_retl_o2_plus_4)
308         ba,a,pt         %icc, .Lexit              308         ba,a,pt         %icc, .Lexit
309 .Lmedium_unaligned:                               309 .Lmedium_unaligned:
310         /* First get dest 8 byte aligned.  */     310         /* First get dest 8 byte aligned.  */
311         sub             %g0, %o0, %g1             311         sub             %g0, %o0, %g1
312         and             %g1, 0x7, %g1             312         and             %g1, 0x7, %g1
313         brz,pt          %g1, 2f                   313         brz,pt          %g1, 2f
314          sub            %o2, %g1, %o2             314          sub            %o2, %g1, %o2
315                                                   315 
316 1:      EX_LD(LOAD(ldub, %o1 + 0x00, %g2), mem    316 1:      EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1)
317         add             %o1, 1, %o1               317         add             %o1, 1, %o1
318         subcc           %g1, 1, %g1               318         subcc           %g1, 1, %g1
319         add             %o0, 1, %o0               319         add             %o0, 1, %o0
320         bne,pt          %icc, 1b                  320         bne,pt          %icc, 1b
321          EX_ST(STORE(stb, %g2, %o0 - 0x01), me    321          EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1)
322 2:                                                322 2:
323         and             %o1, 0x7, %g1             323         and             %o1, 0x7, %g1
324         brz,pn          %g1, .Lmedium_noprefet    324         brz,pn          %g1, .Lmedium_noprefetch
325          sll            %g1, 3, %g1               325          sll            %g1, 3, %g1
326         mov             64, %g2                   326         mov             64, %g2
327         sub             %g2, %g1, %g2             327         sub             %g2, %g1, %g2
328         andn            %o1, 0x7, %o1             328         andn            %o1, 0x7, %o1
329         EX_LD(LOAD(ldx, %o1 + 0x00, %o4), memc    329         EX_LD(LOAD(ldx, %o1 + 0x00, %o4), memcpy_retl_o2)
330         sllx            %o4, %g1, %o4             330         sllx            %o4, %g1, %o4
331         andn            %o2, 0x08 - 1, %o5        331         andn            %o2, 0x08 - 1, %o5
332         sub             %o2, %o5, %o2             332         sub             %o2, %o5, %o2
333 1:      EX_LD(LOAD(ldx, %o1 + 0x08, %g3), memc    333 1:      EX_LD(LOAD(ldx, %o1 + 0x08, %g3), memcpy_retl_o2_plus_o5)
334         add             %o1, 0x08, %o1            334         add             %o1, 0x08, %o1
335         subcc           %o5, 0x08, %o5            335         subcc           %o5, 0x08, %o5
336         srlx            %g3, %g2, GLOBAL_SPARE    336         srlx            %g3, %g2, GLOBAL_SPARE
337         or              GLOBAL_SPARE, %o4, GLO    337         or              GLOBAL_SPARE, %o4, GLOBAL_SPARE
338         EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0    338         EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_8)
339         add             %o0, 0x08, %o0            339         add             %o0, 0x08, %o0
340         bne,pt          %icc, 1b                  340         bne,pt          %icc, 1b
341          sllx           %g3, %g1, %o4             341          sllx           %g3, %g1, %o4
342         srl             %g1, 3, %g1               342         srl             %g1, 3, %g1
343         add             %o1, %g1, %o1             343         add             %o1, %g1, %o1
344         brz,pn          %o2, .Lexit               344         brz,pn          %o2, .Lexit
345          nop                                      345          nop
346         ba,pt           %icc, .Lsmall_unaligne    346         ba,pt           %icc, .Lsmall_unaligned
347                                                   347 
348 .Ltiny:                                           348 .Ltiny:
349         EX_LD(LOAD(ldub, %o1 + 0x00, %g1), mem    349         EX_LD(LOAD(ldub, %o1 + 0x00, %g1), memcpy_retl_o2)
350         subcc           %o2, 1, %o2               350         subcc           %o2, 1, %o2
351         be,pn           %icc, .Lexit              351         be,pn           %icc, .Lexit
352          EX_ST(STORE(stb, %g1, %o0 + 0x00), me    352          EX_ST(STORE(stb, %g1, %o0 + 0x00), memcpy_retl_o2_plus_1)
353         EX_LD(LOAD(ldub, %o1 + 0x01, %g1), mem    353         EX_LD(LOAD(ldub, %o1 + 0x01, %g1), memcpy_retl_o2)
354         subcc           %o2, 1, %o2               354         subcc           %o2, 1, %o2
355         be,pn           %icc, .Lexit              355         be,pn           %icc, .Lexit
356          EX_ST(STORE(stb, %g1, %o0 + 0x01), me    356          EX_ST(STORE(stb, %g1, %o0 + 0x01), memcpy_retl_o2_plus_1)
357         EX_LD(LOAD(ldub, %o1 + 0x02, %g1), mem    357         EX_LD(LOAD(ldub, %o1 + 0x02, %g1), memcpy_retl_o2)
358         ba,pt           %icc, .Lexit              358         ba,pt           %icc, .Lexit
359          EX_ST(STORE(stb, %g1, %o0 + 0x02), me    359          EX_ST(STORE(stb, %g1, %o0 + 0x02), memcpy_retl_o2)
360                                                   360 
361 .Lsmall:                                          361 .Lsmall:
362         andcc           %g2, 0x3, %g0             362         andcc           %g2, 0x3, %g0
363         bne,pn          %icc, .Lsmall_unaligne    363         bne,pn          %icc, .Lsmall_unaligned
364          andn           %o2, 0x4 - 1, %o5         364          andn           %o2, 0x4 - 1, %o5
365         sub             %o2, %o5, %o2             365         sub             %o2, %o5, %o2
366 1:                                                366 1:
367         EX_LD(LOAD(lduw, %o1 + 0x00, %g1), mem    367         EX_LD(LOAD(lduw, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5)
368         add             %o1, 0x04, %o1            368         add             %o1, 0x04, %o1
369         subcc           %o5, 0x04, %o5            369         subcc           %o5, 0x04, %o5
370         add             %o0, 0x04, %o0            370         add             %o0, 0x04, %o0
371         bne,pt          %icc, 1b                  371         bne,pt          %icc, 1b
372          EX_ST(STORE(stw, %g1, %o0 - 0x04), me    372          EX_ST(STORE(stw, %g1, %o0 - 0x04), memcpy_retl_o2_plus_o5_plus_4)
373         brz,pt          %o2, .Lexit               373         brz,pt          %o2, .Lexit
374          nop                                      374          nop
375         ba,a,pt         %icc, .Ltiny              375         ba,a,pt         %icc, .Ltiny
376                                                   376 
377 .Lsmall_unaligned:                                377 .Lsmall_unaligned:
378 1:      EX_LD(LOAD(ldub, %o1 + 0x00, %g1), mem    378 1:      EX_LD(LOAD(ldub, %o1 + 0x00, %g1), memcpy_retl_o2)
379         add             %o1, 1, %o1               379         add             %o1, 1, %o1
380         add             %o0, 1, %o0               380         add             %o0, 1, %o0
381         subcc           %o2, 1, %o2               381         subcc           %o2, 1, %o2
382         bne,pt          %icc, 1b                  382         bne,pt          %icc, 1b
383          EX_ST(STORE(stb, %g1, %o0 - 0x01), me    383          EX_ST(STORE(stb, %g1, %o0 - 0x01), memcpy_retl_o2_plus_1)
384         ba,a,pt         %icc, .Lexit              384         ba,a,pt         %icc, .Lexit
385          nop                                      385          nop
386         .size           FUNC_NAME, .-FUNC_NAME    386         .size           FUNC_NAME, .-FUNC_NAME
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php