~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/sparc/lib/NG4memcpy.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/sparc/lib/NG4memcpy.S (Architecture i386) and /arch/sparc64/lib/NG4memcpy.S (Architecture sparc64)


  1 /* SPDX-License-Identifier: GPL-2.0 */            
  2 /* NG4memcpy.S: Niagara-4 optimized memcpy.       
  3  *                                                
  4  * Copyright (C) 2012 David S. Miller (davem@d    
  5  */                                               
  6                                                   
  7 #ifdef __KERNEL__                                 
  8 #include <linux/linkage.h>                        
  9 #include <asm/visasm.h>                           
 10 #include <asm/asi.h>                              
 11 #define GLOBAL_SPARE    %g7                       
 12 #else                                             
 13 #define ASI_BLK_INIT_QUAD_LDD_P 0xe2              
 14 #define FPRS_FEF  0x04                            
 15                                                   
 16 /* On T4 it is very expensive to access ASRs l    
 17  * %asi, avoiding a read or a write can save ~    
 18  */                                               
 19 #define FPU_ENTER                       \         
 20         rd      %fprs, %o5;             \         
 21         andcc   %o5, FPRS_FEF, %g0;     \         
 22         be,a,pn %icc, 999f;             \         
 23          wr     %g0, FPRS_FEF, %fprs;   \         
 24         999:                                      
 25                                                   
 26 #ifdef MEMCPY_DEBUG                               
 27 #define VISEntryHalf FPU_ENTER; \                 
 28                      clr %g1; clr %g2; clr %g3    
 29 #define VISExitHalf and %o5, FPRS_FEF, %o5; wr    
 30 #else                                             
 31 #define VISEntryHalf FPU_ENTER                    
 32 #define VISExitHalf and %o5, FPRS_FEF, %o5; wr    
 33 #endif                                            
 34                                                   
 35 #define GLOBAL_SPARE    %g5                       
 36 #endif                                            
 37                                                   
 38 #ifndef STORE_ASI                                 
 39 #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA           
 40 #define STORE_ASI       ASI_BLK_INIT_QUAD_LDD_    
 41 #else                                             
 42 #define STORE_ASI       0x80            /* ASI    
 43 #endif                                            
 44 #endif                                            
 45                                                   
 46 #if !defined(EX_LD) && !defined(EX_ST)            
 47 #define NON_USER_COPY                             
 48 #endif                                            
 49                                                   
 50 #ifndef EX_LD                                     
 51 #define EX_LD(x,y)      x                         
 52 #endif                                            
 53 #ifndef EX_LD_FP                                  
 54 #define EX_LD_FP(x,y)   x                         
 55 #endif                                            
 56                                                   
 57 #ifndef EX_ST                                     
 58 #define EX_ST(x,y)      x                         
 59 #endif                                            
 60 #ifndef EX_ST_FP                                  
 61 #define EX_ST_FP(x,y)   x                         
 62 #endif                                            
 63                                                   
 64                                                   
 65 #ifndef LOAD                                      
 66 #define LOAD(type,addr,dest)    type [addr], d    
 67 #endif                                            
 68                                                   
 69 #ifndef STORE                                     
 70 #ifndef MEMCPY_DEBUG                              
 71 #define STORE(type,src,addr)    type src, [add    
 72 #else                                             
 73 #define STORE(type,src,addr)    type##a src, [    
 74 #endif                                            
 75 #endif                                            
 76                                                   
 77 #ifndef STORE_INIT                                
 78 #define STORE_INIT(src,addr)    stxa src, [add    
 79 #endif                                            
 80                                                   
 81 #ifndef FUNC_NAME                                 
 82 #define FUNC_NAME       NG4memcpy                 
 83 #endif                                            
 84 #ifndef PREAMBLE                                  
 85 #define PREAMBLE                                  
 86 #endif                                            
 87                                                   
 88 #ifndef XCC                                       
 89 #define XCC xcc                                   
 90 #endif                                            
 91                                                   
 92         .register       %g2,#scratch              
 93         .register       %g3,#scratch              
 94                                                   
 95         .text                                     
 96 #ifndef EX_RETVAL                                 
 97 #define EX_RETVAL(x)    x                         
 98 #endif                                            
 99         .align          64                        
100                                                   
101         .globl  FUNC_NAME                         
102         .type   FUNC_NAME,#function               
103 FUNC_NAME:      /* %o0=dst, %o1=src, %o2=len *    
104 #ifdef MEMCPY_DEBUG                               
105         wr              %g0, 0x80, %asi           
106 #endif                                            
107         srlx            %o2, 31, %g2              
108         cmp             %g2, 0                    
109         tne             %XCC, 5                   
110         PREAMBLE                                  
111         mov             %o0, %o3                  
112         brz,pn          %o2, .Lexit               
113          cmp            %o2, 3                    
114         ble,pn          %icc, .Ltiny              
115          cmp            %o2, 19                   
116         ble,pn          %icc, .Lsmall             
117          or             %o0, %o1, %g2             
118         cmp             %o2, 128                  
119         bl,pn           %icc, .Lmedium            
120          nop                                      
121                                                   
122 .Llarge:/* len >= 0x80 */                         
123         /* First get dest 8 byte aligned.  */     
124         sub             %g0, %o0, %g1             
125         and             %g1, 0x7, %g1             
126         brz,pt          %g1, 51f                  
127          sub            %o2, %g1, %o2             
128                                                   
129                                                   
130 1:      EX_LD(LOAD(ldub, %o1 + 0x00, %g2), mem    
131         add             %o1, 1, %o1               
132         subcc           %g1, 1, %g1               
133         add             %o0, 1, %o0               
134         bne,pt          %icc, 1b                  
135          EX_ST(STORE(stb, %g2, %o0 - 0x01), me    
136                                                   
137 51:     LOAD(prefetch, %o1 + 0x040, #n_reads_s    
138         LOAD(prefetch, %o1 + 0x080, #n_reads_s    
139         LOAD(prefetch, %o1 + 0x0c0, #n_reads_s    
140         LOAD(prefetch, %o1 + 0x100, #n_reads_s    
141         LOAD(prefetch, %o1 + 0x140, #n_reads_s    
142         LOAD(prefetch, %o1 + 0x180, #n_reads_s    
143         LOAD(prefetch, %o1 + 0x1c0, #n_reads_s    
144         LOAD(prefetch, %o1 + 0x200, #n_reads_s    
145                                                   
146         /* Check if we can use the straight fu    
147          * loop, or we require the alignaddr/f    
148          */                                       
149         andcc           %o1, 0x7, %o5             
150         bne,pn          %icc, .Llarge_src_unal    
151          sub            %g0, %o0, %g1             
152                                                   
153         /* Legitimize the use of initializing     
154          * to be 64-byte aligned.                 
155          */                                       
156         and             %g1, 0x3f, %g1            
157         brz,pt          %g1, .Llarge_aligned      
158          sub            %o2, %g1, %o2             
159                                                   
160 1:      EX_LD(LOAD(ldx, %o1 + 0x00, %g2), memc    
161         add             %o1, 8, %o1               
162         subcc           %g1, 8, %g1               
163         add             %o0, 8, %o0               
164         bne,pt          %icc, 1b                  
165          EX_ST(STORE(stx, %g2, %o0 - 0x08), me    
166                                                   
167 .Llarge_aligned:                                  
168         /* len >= 0x80 && src 8-byte aligned &    
169         andn            %o2, 0x3f, %o4            
170         sub             %o2, %o4, %o2             
171                                                   
172 1:      EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memc    
173         add             %o1, 0x40, %o1            
174         EX_LD(LOAD(ldx, %o1 - 0x38, %g2), memc    
175         subcc           %o4, 0x40, %o4            
176         EX_LD(LOAD(ldx, %o1 - 0x30, %g3), memc    
177         EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPA    
178         EX_LD(LOAD(ldx, %o1 - 0x20, %o5), memc    
179         EX_ST(STORE_INIT(%g1, %o0), memcpy_ret    
180         add             %o0, 0x08, %o0            
181         EX_ST(STORE_INIT(%g2, %o0), memcpy_ret    
182         add             %o0, 0x08, %o0            
183         EX_LD(LOAD(ldx, %o1 - 0x18, %g2), memc    
184         EX_ST(STORE_INIT(%g3, %o0), memcpy_ret    
185         add             %o0, 0x08, %o0            
186         EX_LD(LOAD(ldx, %o1 - 0x10, %g3), memc    
187         EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), m    
188         add             %o0, 0x08, %o0            
189         EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPA    
190         EX_ST(STORE_INIT(%o5, %o0), memcpy_ret    
191         add             %o0, 0x08, %o0            
192         EX_ST(STORE_INIT(%g2, %o0), memcpy_ret    
193         add             %o0, 0x08, %o0            
194         EX_ST(STORE_INIT(%g3, %o0), memcpy_ret    
195         add             %o0, 0x08, %o0            
196         EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), m    
197         add             %o0, 0x08, %o0            
198         bne,pt          %icc, 1b                  
199          LOAD(prefetch, %o1 + 0x200, #n_reads_    
200                                                   
201         membar          #StoreLoad | #StoreSto    
202                                                   
203         brz,pn          %o2, .Lexit               
204          cmp            %o2, 19                   
205         ble,pn          %icc, .Lsmall_unaligne    
206          nop                                      
207         ba,a,pt         %icc, .Lmedium_noprefe    
208                                                   
209 .Lexit: retl                                      
210          mov            EX_RETVAL(%o3), %o0       
211                                                   
212 .Llarge_src_unaligned:                            
213 #ifdef NON_USER_COPY                              
214         VISEntryHalfFast(.Lmedium_vis_entry_fa    
215 #else                                             
216         VISEntryHalf                              
217 #endif                                            
218         andn            %o2, 0x3f, %o4            
219         sub             %o2, %o4, %o2             
220         alignaddr       %o1, %g0, %g1             
221         add             %o1, %o4, %o1             
222         EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), m    
223 1:      EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), m    
224         subcc           %o4, 0x40, %o4            
225         EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), m    
226         EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), m    
227         EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), m    
228         EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10),     
229         EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12),     
230         EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14),     
231         faligndata      %f0, %f2, %f16            
232         EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), m    
233         faligndata      %f2, %f4, %f18            
234         add             %g1, 0x40, %g1            
235         faligndata      %f4, %f6, %f20            
236         faligndata      %f6, %f8, %f22            
237         faligndata      %f8, %f10, %f24           
238         faligndata      %f10, %f12, %f26          
239         faligndata      %f12, %f14, %f28          
240         faligndata      %f14, %f0, %f30           
241         EX_ST_FP(STORE(std, %f16, %o0 + 0x00),    
242         EX_ST_FP(STORE(std, %f18, %o0 + 0x08),    
243         EX_ST_FP(STORE(std, %f20, %o0 + 0x10),    
244         EX_ST_FP(STORE(std, %f22, %o0 + 0x18),    
245         EX_ST_FP(STORE(std, %f24, %o0 + 0x20),    
246         EX_ST_FP(STORE(std, %f26, %o0 + 0x28),    
247         EX_ST_FP(STORE(std, %f28, %o0 + 0x30),    
248         EX_ST_FP(STORE(std, %f30, %o0 + 0x38),    
249         add             %o0, 0x40, %o0            
250         bne,pt          %icc, 1b                  
251          LOAD(prefetch, %g1 + 0x200, #n_reads_    
252 #ifdef NON_USER_COPY                              
253         VISExitHalfFast                           
254 #else                                             
255         VISExitHalf                               
256 #endif                                            
257         brz,pn          %o2, .Lexit               
258          cmp            %o2, 19                   
259         ble,pn          %icc, .Lsmall_unaligne    
260          nop                                      
261         ba,a,pt         %icc, .Lmedium_unalign    
262                                                   
263 #ifdef NON_USER_COPY                              
264 .Lmedium_vis_entry_fail:                          
265          or             %o0, %o1, %g2             
266 #endif                                            
267 .Lmedium:                                         
268         LOAD(prefetch, %o1 + 0x40, #n_reads_st    
269         andcc           %g2, 0x7, %g0             
270         bne,pn          %icc, .Lmedium_unalign    
271          nop                                      
272 .Lmedium_noprefetch:                              
273         andncc          %o2, 0x20 - 1, %o5        
274         be,pn           %icc, 2f                  
275          sub            %o2, %o5, %o2             
276 1:      EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memc    
277         EX_LD(LOAD(ldx, %o1 + 0x08, %g2), memc    
278         EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPA    
279         EX_LD(LOAD(ldx, %o1 + 0x18, %o4), memc    
280         add             %o1, 0x20, %o1            
281         subcc           %o5, 0x20, %o5            
282         EX_ST(STORE(stx, %g1, %o0 + 0x00), mem    
283         EX_ST(STORE(stx, %g2, %o0 + 0x08), mem    
284         EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0    
285         EX_ST(STORE(stx, %o4, %o0 + 0x18), mem    
286         bne,pt          %icc, 1b                  
287          add            %o0, 0x20, %o0            
288 2:      andcc           %o2, 0x18, %o5            
289         be,pt           %icc, 3f                  
290          sub            %o2, %o5, %o2             
291                                                   
292 1:      EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memc    
293         add             %o1, 0x08, %o1            
294         add             %o0, 0x08, %o0            
295         subcc           %o5, 0x08, %o5            
296         bne,pt          %icc, 1b                  
297          EX_ST(STORE(stx, %g1, %o0 - 0x08), me    
298 3:      brz,pt          %o2, .Lexit               
299          cmp            %o2, 0x04                 
300         bl,pn           %icc, .Ltiny              
301          nop                                      
302         EX_LD(LOAD(lduw, %o1 + 0x00, %g1), mem    
303         add             %o1, 0x04, %o1            
304         add             %o0, 0x04, %o0            
305         subcc           %o2, 0x04, %o2            
306         bne,pn          %icc, .Ltiny              
307          EX_ST(STORE(stw, %g1, %o0 - 0x04), me    
308         ba,a,pt         %icc, .Lexit              
309 .Lmedium_unaligned:                               
310         /* First get dest 8 byte aligned.  */     
311         sub             %g0, %o0, %g1             
312         and             %g1, 0x7, %g1             
313         brz,pt          %g1, 2f                   
314          sub            %o2, %g1, %o2             
315                                                   
316 1:      EX_LD(LOAD(ldub, %o1 + 0x00, %g2), mem    
317         add             %o1, 1, %o1               
318         subcc           %g1, 1, %g1               
319         add             %o0, 1, %o0               
320         bne,pt          %icc, 1b                  
321          EX_ST(STORE(stb, %g2, %o0 - 0x01), me    
322 2:                                                
323         and             %o1, 0x7, %g1             
324         brz,pn          %g1, .Lmedium_noprefet    
325          sll            %g1, 3, %g1               
326         mov             64, %g2                   
327         sub             %g2, %g1, %g2             
328         andn            %o1, 0x7, %o1             
329         EX_LD(LOAD(ldx, %o1 + 0x00, %o4), memc    
330         sllx            %o4, %g1, %o4             
331         andn            %o2, 0x08 - 1, %o5        
332         sub             %o2, %o5, %o2             
333 1:      EX_LD(LOAD(ldx, %o1 + 0x08, %g3), memc    
334         add             %o1, 0x08, %o1            
335         subcc           %o5, 0x08, %o5            
336         srlx            %g3, %g2, GLOBAL_SPARE    
337         or              GLOBAL_SPARE, %o4, GLO    
338         EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0    
339         add             %o0, 0x08, %o0            
340         bne,pt          %icc, 1b                  
341          sllx           %g3, %g1, %o4             
342         srl             %g1, 3, %g1               
343         add             %o1, %g1, %o1             
344         brz,pn          %o2, .Lexit               
345          nop                                      
346         ba,pt           %icc, .Lsmall_unaligne    
347                                                   
348 .Ltiny:                                           
349         EX_LD(LOAD(ldub, %o1 + 0x00, %g1), mem    
350         subcc           %o2, 1, %o2               
351         be,pn           %icc, .Lexit              
352          EX_ST(STORE(stb, %g1, %o0 + 0x00), me    
353         EX_LD(LOAD(ldub, %o1 + 0x01, %g1), mem    
354         subcc           %o2, 1, %o2               
355         be,pn           %icc, .Lexit              
356          EX_ST(STORE(stb, %g1, %o0 + 0x01), me    
357         EX_LD(LOAD(ldub, %o1 + 0x02, %g1), mem    
358         ba,pt           %icc, .Lexit              
359          EX_ST(STORE(stb, %g1, %o0 + 0x02), me    
360                                                   
361 .Lsmall:                                          
362         andcc           %g2, 0x3, %g0             
363         bne,pn          %icc, .Lsmall_unaligne    
364          andn           %o2, 0x4 - 1, %o5         
365         sub             %o2, %o5, %o2             
366 1:                                                
367         EX_LD(LOAD(lduw, %o1 + 0x00, %g1), mem    
368         add             %o1, 0x04, %o1            
369         subcc           %o5, 0x04, %o5            
370         add             %o0, 0x04, %o0            
371         bne,pt          %icc, 1b                  
372          EX_ST(STORE(stw, %g1, %o0 - 0x04), me    
373         brz,pt          %o2, .Lexit               
374          nop                                      
375         ba,a,pt         %icc, .Ltiny              
376                                                   
377 .Lsmall_unaligned:                                
378 1:      EX_LD(LOAD(ldub, %o1 + 0x00, %g1), mem    
379         add             %o1, 1, %o1               
380         add             %o0, 1, %o0               
381         subcc           %o2, 1, %o2               
382         bne,pt          %icc, 1b                  
383          EX_ST(STORE(stb, %g1, %o0 - 0x01), me    
384         ba,a,pt         %icc, .Lexit              
385          nop                                      
386         .size           FUNC_NAME, .-FUNC_NAME    
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php