~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/sparc/lib/U1memcpy.S

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/sparc/lib/U1memcpy.S (Version linux-6.11.5) and /arch/sparc/lib/U1memcpy.S (Version linux-2.6.0)


  1 /* SPDX-License-Identifier: GPL-2.0 */            
  2 /* U1memcpy.S: UltraSPARC-I/II/IIi/IIe optimiz    
  3  *                                                
  4  * Copyright (C) 1997, 2004 David S. Miller (d    
  5  * Copyright (C) 1996, 1997, 1998, 1999 Jakub     
  6  */                                               
  7                                                   
  8 #ifdef __KERNEL__                                 
  9 #include <linux/export.h>                         
 10 #include <linux/linkage.h>                        
 11 #include <asm/visasm.h>                           
 12 #include <asm/asi.h>                              
 13 #define GLOBAL_SPARE    g7                        
 14 #else                                             
 15 #define GLOBAL_SPARE    g5                        
 16 #define ASI_BLK_P 0xf0                            
 17 #define FPRS_FEF  0x04                            
 18 #ifdef MEMCPY_DEBUG                               
 19 #define VISEntry rd %fprs, %o5; wr %g0, FPRS_F    
 20                  clr %g1; clr %g2; clr %g3; su    
 21 #define VISExit and %o5, FPRS_FEF, %o5; wr %o5    
 22 #else                                             
 23 #define VISEntry rd %fprs, %o5; wr %g0, FPRS_F    
 24 #define VISExit and %o5, FPRS_FEF, %o5; wr %o5    
 25 #endif                                            
 26 #endif                                            
 27                                                   
 28 #ifndef EX_LD                                     
 29 #define EX_LD(x,y)      x                         
 30 #endif                                            
 31 #ifndef EX_LD_FP                                  
 32 #define EX_LD_FP(x,y)   x                         
 33 #endif                                            
 34                                                   
 35 #ifndef EX_ST                                     
 36 #define EX_ST(x,y)      x                         
 37 #endif                                            
 38 #ifndef EX_ST_FP                                  
 39 #define EX_ST_FP(x,y)   x                         
 40 #endif                                            
 41                                                   
 42 #ifndef LOAD                                      
 43 #define LOAD(type,addr,dest)    type [addr], d    
 44 #endif                                            
 45                                                   
 46 #ifndef LOAD_BLK                                  
 47 #define LOAD_BLK(addr,dest)     ldda [addr] AS    
 48 #endif                                            
 49                                                   
 50 #ifndef STORE                                     
 51 #define STORE(type,src,addr)    type src, [add    
 52 #endif                                            
 53                                                   
 54 #ifndef STORE_BLK                                 
 55 #define STORE_BLK(src,addr)     stda src, [add    
 56 #endif                                            
 57                                                   
 58 #ifndef FUNC_NAME                                 
 59 #define FUNC_NAME       memcpy                    
 60 #endif                                            
 61                                                   
 62 #ifndef PREAMBLE                                  
 63 #define PREAMBLE                                  
 64 #endif                                            
 65                                                   
 66 #ifndef XCC                                       
 67 #define XCC xcc                                   
 68 #endif                                            
 69                                                   
 70 #define FREG_FROB(f1, f2, f3, f4, f5, f6, f7,     
 71         faligndata              %f1, %f2, %f48    
 72         faligndata              %f2, %f3, %f50    
 73         faligndata              %f3, %f4, %f52    
 74         faligndata              %f4, %f5, %f54    
 75         faligndata              %f5, %f6, %f56    
 76         faligndata              %f6, %f7, %f58    
 77         faligndata              %f7, %f8, %f60    
 78         faligndata              %f8, %f9, %f62    
 79                                                   
 80 #define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc    
 81         EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs    
 82         EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_g    
 83         add                     %src, 0x40, %s    
 84         subcc                   %GLOBAL_SPARE,    
 85         be,pn                   %xcc, jmptgt;     
 86          add                    %dest, 0x40, %    
 87                                                   
 88 #define LOOP_CHUNK1(src, dest, branch_dest)       
 89         MAIN_LOOP_CHUNK(src, dest, f0,  f48, b    
 90 #define LOOP_CHUNK2(src, dest, branch_dest)       
 91         MAIN_LOOP_CHUNK(src, dest, f16, f48, b    
 92 #define LOOP_CHUNK3(src, dest, branch_dest)       
 93         MAIN_LOOP_CHUNK(src, dest, f32, f48, b    
 94                                                   
 95 #define DO_SYNC                 membar  #Sync;    
 96 #define STORE_SYNC(dest, fsrc)                    
 97         EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_g    
 98         add                     %dest, 0x40, %    
 99         DO_SYNC                                   
100                                                   
101 #define STORE_JUMP(dest, fsrc, target)            
102         EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_g    
103         add                     %dest, 0x40, %    
104         ba,pt                   %xcc, target;     
105          nop;                                     
106                                                   
107 #define FINISH_VISCHUNK(dest, f0, f1)             
108         subcc                   %g3, 8, %g3;      
109         bl,pn                   %xcc, 95f;        
110          faligndata             %f0, %f1, %f48    
111         EX_ST_FP(STORE(std, %f48, %dest), U1_g    
112         add                     %dest, 8, %des    
113                                                   
114 #define UNEVEN_VISCHUNK_LAST(dest, f0, f1)        
115         subcc                   %g3, 8, %g3;      
116         bl,pn                   %xcc, 95f;        
117          fsrc2                  %f0, %f1;         
118                                                   
119 #define UNEVEN_VISCHUNK(dest, f0, f1)             
120         UNEVEN_VISCHUNK_LAST(dest, f0, f1)        
121         ba,a,pt                 %xcc, 93f;        
122                                                   
123         .register       %g2,#scratch              
124         .register       %g3,#scratch              
125                                                   
126         .text                                     
127 #ifndef EX_RETVAL                                 
128 #define EX_RETVAL(x)    x                         
129 ENTRY(U1_g1_1_fp)                                 
130         VISExitHalf                               
131         add             %g1, 1, %g1               
132         add             %g1, %g2, %g1             
133         retl                                      
134          add            %g1, %o2, %o0             
135 ENDPROC(U1_g1_1_fp)                               
136 ENTRY(U1_g2_0_fp)                                 
137         VISExitHalf                               
138         retl                                      
139          add            %g2, %o2, %o0             
140 ENDPROC(U1_g2_0_fp)                               
141 ENTRY(U1_g2_8_fp)                                 
142         VISExitHalf                               
143         add             %g2, 8, %g2               
144         retl                                      
145          add            %g2, %o2, %o0             
146 ENDPROC(U1_g2_8_fp)                               
147 ENTRY(U1_gs_0_fp)                                 
148         VISExitHalf                               
149         add             %GLOBAL_SPARE, %g3, %o    
150         retl                                      
151          add            %o0, %o2, %o0             
152 ENDPROC(U1_gs_0_fp)                               
153 ENTRY(U1_gs_80_fp)                                
154         VISExitHalf                               
155         add             %GLOBAL_SPARE, 0x80, %    
156         add             %GLOBAL_SPARE, %g3, %o    
157         retl                                      
158          add            %o0, %o2, %o0             
159 ENDPROC(U1_gs_80_fp)                              
160 ENTRY(U1_gs_40_fp)                                
161         VISExitHalf                               
162         add             %GLOBAL_SPARE, 0x40, %    
163         add             %GLOBAL_SPARE, %g3, %o    
164         retl                                      
165          add            %o0, %o2, %o0             
166 ENDPROC(U1_gs_40_fp)                              
167 ENTRY(U1_g3_0_fp)                                 
168         VISExitHalf                               
169         retl                                      
170          add            %g3, %o2, %o0             
171 ENDPROC(U1_g3_0_fp)                               
172 ENTRY(U1_g3_8_fp)                                 
173         VISExitHalf                               
174         add             %g3, 8, %g3               
175         retl                                      
176          add            %g3, %o2, %o0             
177 ENDPROC(U1_g3_8_fp)                               
178 ENTRY(U1_o2_0_fp)                                 
179         VISExitHalf                               
180         retl                                      
181          mov            %o2, %o0                  
182 ENDPROC(U1_o2_0_fp)                               
183 ENTRY(U1_o2_1_fp)                                 
184         VISExitHalf                               
185         retl                                      
186          add            %o2, 1, %o0               
187 ENDPROC(U1_o2_1_fp)                               
188 ENTRY(U1_gs_0)                                    
189         VISExitHalf                               
190         retl                                      
191          add            %GLOBAL_SPARE, %o2, %o    
192 ENDPROC(U1_gs_0)                                  
193 ENTRY(U1_gs_8)                                    
194         VISExitHalf                               
195         add             %GLOBAL_SPARE, %o2, %G    
196         retl                                      
197          add            %GLOBAL_SPARE, 0x8, %o    
198 ENDPROC(U1_gs_8)                                  
199 ENTRY(U1_gs_10)                                   
200         VISExitHalf                               
201         add             %GLOBAL_SPARE, %o2, %G    
202         retl                                      
203          add            %GLOBAL_SPARE, 0x10, %    
204 ENDPROC(U1_gs_10)                                 
205 ENTRY(U1_o2_0)                                    
206         retl                                      
207          mov            %o2, %o0                  
208 ENDPROC(U1_o2_0)                                  
209 ENTRY(U1_o2_8)                                    
210         retl                                      
211          add            %o2, 8, %o0               
212 ENDPROC(U1_o2_8)                                  
213 ENTRY(U1_o2_4)                                    
214         retl                                      
215          add            %o2, 4, %o0               
216 ENDPROC(U1_o2_4)                                  
217 ENTRY(U1_o2_1)                                    
218         retl                                      
219          add            %o2, 1, %o0               
220 ENDPROC(U1_o2_1)                                  
221 ENTRY(U1_g1_0)                                    
222         retl                                      
223          add            %g1, %o2, %o0             
224 ENDPROC(U1_g1_0)                                  
225 ENTRY(U1_g1_1)                                    
226         add             %g1, 1, %g1               
227         retl                                      
228          add            %g1, %o2, %o0             
229 ENDPROC(U1_g1_1)                                  
230 ENTRY(U1_gs_0_o2_adj)                             
231         and             %o2, 7, %o2               
232         retl                                      
233          add            %GLOBAL_SPARE, %o2, %o    
234 ENDPROC(U1_gs_0_o2_adj)                           
235 ENTRY(U1_gs_8_o2_adj)                             
236         and             %o2, 7, %o2               
237         add             %GLOBAL_SPARE, 8, %GLO    
238         retl                                      
239          add            %GLOBAL_SPARE, %o2, %o    
240 ENDPROC(U1_gs_8_o2_adj)                           
241 #endif                                            
242                                                   
243         .align          64                        
244                                                   
245         .globl          FUNC_NAME                 
246         .type           FUNC_NAME,#function       
247 FUNC_NAME:              /* %o0=dst, %o1=src, %    
248         srlx            %o2, 31, %g2              
249         cmp             %g2, 0                    
250         tne             %xcc, 5                   
251         PREAMBLE                                  
252         mov             %o0, %o4                  
253         cmp             %o2, 0                    
254         be,pn           %XCC, 85f                 
255          or             %o0, %o1, %o3             
256         cmp             %o2, 16                   
257         blu,a,pn        %XCC, 80f                 
258          or             %o3, %o2, %o3             
259                                                   
260         cmp             %o2, (5 * 64)             
261         blu,pt          %XCC, 70f                 
262          andcc          %o3, 0x7, %g0             
263                                                   
264         /* Clobbers o5/g1/g2/g3/g7/icc/xcc.  *    
265         VISEntry                                  
266                                                   
267         /* Is 'dst' already aligned on an 64-b    
268         andcc           %o0, 0x3f, %g2            
269         be,pt           %XCC, 2f                  
270                                                   
271         /* Compute abs((dst & 0x3f) - 0x40) in    
272          * of bytes to copy to make 'dst' 64-b    
273          * subtract this from 'len'.              
274          */                                       
275          sub            %o0, %o1, %GLOBAL_SPAR    
276         sub             %g2, 0x40, %g2            
277         sub             %g0, %g2, %g2             
278         sub             %o2, %g2, %o2             
279         andcc           %g2, 0x7, %g1             
280         be,pt           %icc, 2f                  
281          and            %g2, 0x38, %g2            
282                                                   
283 1:      subcc           %g1, 0x1, %g1             
284         EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3),     
285         EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL    
286         bgu,pt          %XCC, 1b                  
287          add            %o1, 0x1, %o1             
288                                                   
289         add             %o1, %GLOBAL_SPARE, %o    
290                                                   
291 2:      cmp             %g2, 0x0                  
292         and             %o1, 0x7, %g1             
293         be,pt           %icc, 3f                  
294          alignaddr      %o1, %g0, %o1             
295                                                   
296         EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_    
297 1:      EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1    
298         add             %o1, 0x8, %o1             
299         subcc           %g2, 0x8, %g2             
300         faligndata      %f4, %f6, %f0             
301         EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8    
302         be,pn           %icc, 3f                  
303          add            %o0, 0x8, %o0             
304                                                   
305         EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1    
306         add             %o1, 0x8, %o1             
307         subcc           %g2, 0x8, %g2             
308         faligndata      %f6, %f4, %f0             
309         EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8    
310         bne,pt          %icc, 1b                  
311          add            %o0, 0x8, %o0             
312                                                   
313         /* Destination is 64-byte aligned.  */    
314 3:                                                
315         membar            #LoadStore | #StoreS    
316                                                   
317         subcc           %o2, 0x40, %GLOBAL_SPA    
318         add             %o1, %g1, %g1             
319         andncc          %GLOBAL_SPARE, (0x40 -    
320         srl             %g1, 3, %g2               
321         sub             %o2, %GLOBAL_SPARE, %g    
322         andn            %o1, (0x40 - 1), %o1      
323         and             %g2, 7, %g2               
324         andncc          %g3, 0x7, %g3             
325         fsrc2           %f0, %f2                  
326         sub             %g3, 0x8, %g3             
327         sub             %o2, %GLOBAL_SPARE, %o    
328                                                   
329         add             %g1, %GLOBAL_SPARE, %g    
330         subcc           %o2, %g3, %o2             
331                                                   
332         EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_f    
333         add             %o1, 0x40, %o1            
334         add             %g1, %g3, %g1             
335         EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_    
336         add             %o1, 0x40, %o1            
337         sub             %GLOBAL_SPARE, 0x80, %    
338         EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80    
339         add             %o1, 0x40, %o1            
340                                                   
341         /* There are 8 instances of the unroll    
342          * one for each possible alignment of     
343          * source buffer.  Each loop instance     
344          * bytes.                                 
345          */                                       
346         sll             %g2, 3, %o3               
347         sub             %o3, %g2, %o3             
348         sllx            %o3, 4, %o3               
349         add             %o3, %g2, %o3             
350         sllx            %o3, 2, %g2               
351 1:      rd              %pc, %o3                  
352         add             %o3, %lo(1f - 1b), %o3    
353         jmpl            %o3 + %g2, %g0            
354          nop                                      
355                                                   
356         .align          64                        
357 1:      FREG_FROB(f0, f2, f4, f6, f8, f10,f12,    
358         LOOP_CHUNK1(o1, o0, 1f)                   
359         FREG_FROB(f16,f18,f20,f22,f24,f26,f28,    
360         LOOP_CHUNK2(o1, o0, 2f)                   
361         FREG_FROB(f32,f34,f36,f38,f40,f42,f44,    
362         LOOP_CHUNK3(o1, o0, 3f)                   
363         ba,pt           %xcc, 1b+4                
364          faligndata     %f0, %f2, %f48            
365 1:      FREG_FROB(f16,f18,f20,f22,f24,f26,f28,    
366         STORE_SYNC(o0, f48)                       
367         FREG_FROB(f32,f34,f36,f38,f40,f42,f44,    
368         STORE_JUMP(o0, f48, 40f)                  
369 2:      FREG_FROB(f32,f34,f36,f38,f40,f42,f44,    
370         STORE_SYNC(o0, f48)                       
371         FREG_FROB(f0, f2, f4, f6, f8, f10,f12,    
372         STORE_JUMP(o0, f48, 48f)                  
373 3:      FREG_FROB(f0, f2, f4, f6, f8, f10,f12,    
374         STORE_SYNC(o0, f48)                       
375         FREG_FROB(f16,f18,f20,f22,f24,f26,f28,    
376         STORE_JUMP(o0, f48, 56f)                  
377                                                   
378 1:      FREG_FROB(f2, f4, f6, f8, f10,f12,f14,    
379         LOOP_CHUNK1(o1, o0, 1f)                   
380         FREG_FROB(f18,f20,f22,f24,f26,f28,f30,    
381         LOOP_CHUNK2(o1, o0, 2f)                   
382         FREG_FROB(f34,f36,f38,f40,f42,f44,f46,    
383         LOOP_CHUNK3(o1, o0, 3f)                   
384         ba,pt           %xcc, 1b+4                
385          faligndata     %f2, %f4, %f48            
386 1:      FREG_FROB(f18,f20,f22,f24,f26,f28,f30,    
387         STORE_SYNC(o0, f48)                       
388         FREG_FROB(f34,f36,f38,f40,f42,f44,f46,    
389         STORE_JUMP(o0, f48, 41f)                  
390 2:      FREG_FROB(f34,f36,f38,f40,f42,f44,f46,    
391         STORE_SYNC(o0, f48)                       
392         FREG_FROB(f2, f4, f6, f8, f10,f12,f14,    
393         STORE_JUMP(o0, f48, 49f)                  
394 3:      FREG_FROB(f2, f4, f6, f8, f10,f12,f14,    
395         STORE_SYNC(o0, f48)                       
396         FREG_FROB(f18,f20,f22,f24,f26,f28,f30,    
397         STORE_JUMP(o0, f48, 57f)                  
398                                                   
399 1:      FREG_FROB(f4, f6, f8, f10,f12,f14,f16,    
400         LOOP_CHUNK1(o1, o0, 1f)                   
401         FREG_FROB(f20,f22,f24,f26,f28,f30,f32,    
402         LOOP_CHUNK2(o1, o0, 2f)                   
403         FREG_FROB(f36,f38,f40,f42,f44,f46,f0,     
404         LOOP_CHUNK3(o1, o0, 3f)                   
405         ba,pt           %xcc, 1b+4                
406          faligndata     %f4, %f6, %f48            
407 1:      FREG_FROB(f20,f22,f24,f26,f28,f30,f32,    
408         STORE_SYNC(o0, f48)                       
409         FREG_FROB(f36,f38,f40,f42,f44,f46,f0,     
410         STORE_JUMP(o0, f48, 42f)                  
411 2:      FREG_FROB(f36,f38,f40,f42,f44,f46,f0,     
412         STORE_SYNC(o0, f48)                       
413         FREG_FROB(f4, f6, f8, f10,f12,f14,f16,    
414         STORE_JUMP(o0, f48, 50f)                  
415 3:      FREG_FROB(f4, f6, f8, f10,f12,f14,f16,    
416         STORE_SYNC(o0, f48)                       
417         FREG_FROB(f20,f22,f24,f26,f28,f30,f32,    
418         STORE_JUMP(o0, f48, 58f)                  
419                                                   
420 1:      FREG_FROB(f6, f8, f10,f12,f14,f16,f18,    
421         LOOP_CHUNK1(o1, o0, 1f)                   
422         FREG_FROB(f22,f24,f26,f28,f30,f32,f34,    
423         LOOP_CHUNK2(o1, o0, 2f)                   
424         FREG_FROB(f38,f40,f42,f44,f46,f0, f2,     
425         LOOP_CHUNK3(o1, o0, 3f)                   
426         ba,pt           %xcc, 1b+4                
427          faligndata     %f6, %f8, %f48            
428 1:      FREG_FROB(f22,f24,f26,f28,f30,f32,f34,    
429         STORE_SYNC(o0, f48)                       
430         FREG_FROB(f38,f40,f42,f44,f46,f0, f2,     
431         STORE_JUMP(o0, f48, 43f)                  
432 2:      FREG_FROB(f38,f40,f42,f44,f46,f0, f2,     
433         STORE_SYNC(o0, f48)                       
434         FREG_FROB(f6, f8, f10,f12,f14,f16,f18,    
435         STORE_JUMP(o0, f48, 51f)                  
436 3:      FREG_FROB(f6, f8, f10,f12,f14,f16,f18,    
437         STORE_SYNC(o0, f48)                       
438         FREG_FROB(f22,f24,f26,f28,f30,f32,f34,    
439         STORE_JUMP(o0, f48, 59f)                  
440                                                   
441 1:      FREG_FROB(f8, f10,f12,f14,f16,f18,f20,    
442         LOOP_CHUNK1(o1, o0, 1f)                   
443         FREG_FROB(f24,f26,f28,f30,f32,f34,f36,    
444         LOOP_CHUNK2(o1, o0, 2f)                   
445         FREG_FROB(f40,f42,f44,f46,f0, f2, f4,     
446         LOOP_CHUNK3(o1, o0, 3f)                   
447         ba,pt           %xcc, 1b+4                
448          faligndata     %f8, %f10, %f48           
449 1:      FREG_FROB(f24,f26,f28,f30,f32,f34,f36,    
450         STORE_SYNC(o0, f48)                       
451         FREG_FROB(f40,f42,f44,f46,f0, f2, f4,     
452         STORE_JUMP(o0, f48, 44f)                  
453 2:      FREG_FROB(f40,f42,f44,f46,f0, f2, f4,     
454         STORE_SYNC(o0, f48)                       
455         FREG_FROB(f8, f10,f12,f14,f16,f18,f20,    
456         STORE_JUMP(o0, f48, 52f)                  
457 3:      FREG_FROB(f8, f10,f12,f14,f16,f18,f20,    
458         STORE_SYNC(o0, f48)                       
459         FREG_FROB(f24,f26,f28,f30,f32,f34,f36,    
460         STORE_JUMP(o0, f48, 60f)                  
461                                                   
462 1:      FREG_FROB(f10,f12,f14,f16,f18,f20,f22,    
463         LOOP_CHUNK1(o1, o0, 1f)                   
464         FREG_FROB(f26,f28,f30,f32,f34,f36,f38,    
465         LOOP_CHUNK2(o1, o0, 2f)                   
466         FREG_FROB(f42,f44,f46,f0, f2, f4, f6,     
467         LOOP_CHUNK3(o1, o0, 3f)                   
468         ba,pt           %xcc, 1b+4                
469          faligndata     %f10, %f12, %f48          
470 1:      FREG_FROB(f26,f28,f30,f32,f34,f36,f38,    
471         STORE_SYNC(o0, f48)                       
472         FREG_FROB(f42,f44,f46,f0, f2, f4, f6,     
473         STORE_JUMP(o0, f48, 45f)                  
474 2:      FREG_FROB(f42,f44,f46,f0, f2, f4, f6,     
475         STORE_SYNC(o0, f48)                       
476         FREG_FROB(f10,f12,f14,f16,f18,f20,f22,    
477         STORE_JUMP(o0, f48, 53f)                  
478 3:      FREG_FROB(f10,f12,f14,f16,f18,f20,f22,    
479         STORE_SYNC(o0, f48)                       
480         FREG_FROB(f26,f28,f30,f32,f34,f36,f38,    
481         STORE_JUMP(o0, f48, 61f)                  
482                                                   
483 1:      FREG_FROB(f12,f14,f16,f18,f20,f22,f24,    
484         LOOP_CHUNK1(o1, o0, 1f)                   
485         FREG_FROB(f28,f30,f32,f34,f36,f38,f40,    
486         LOOP_CHUNK2(o1, o0, 2f)                   
487         FREG_FROB(f44,f46,f0, f2, f4, f6, f8,     
488         LOOP_CHUNK3(o1, o0, 3f)                   
489         ba,pt           %xcc, 1b+4                
490          faligndata     %f12, %f14, %f48          
491 1:      FREG_FROB(f28,f30,f32,f34,f36,f38,f40,    
492         STORE_SYNC(o0, f48)                       
493         FREG_FROB(f44,f46,f0, f2, f4, f6, f8,     
494         STORE_JUMP(o0, f48, 46f)                  
495 2:      FREG_FROB(f44,f46,f0, f2, f4, f6, f8,     
496         STORE_SYNC(o0, f48)                       
497         FREG_FROB(f12,f14,f16,f18,f20,f22,f24,    
498         STORE_JUMP(o0, f48, 54f)                  
499 3:      FREG_FROB(f12,f14,f16,f18,f20,f22,f24,    
500         STORE_SYNC(o0, f48)                       
501         FREG_FROB(f28,f30,f32,f34,f36,f38,f40,    
502         STORE_JUMP(o0, f48, 62f)                  
503                                                   
504 1:      FREG_FROB(f14,f16,f18,f20,f22,f24,f26,    
505         LOOP_CHUNK1(o1, o0, 1f)                   
506         FREG_FROB(f30,f32,f34,f36,f38,f40,f42,    
507         LOOP_CHUNK2(o1, o0, 2f)                   
508         FREG_FROB(f46,f0, f2, f4, f6, f8, f10,    
509         LOOP_CHUNK3(o1, o0, 3f)                   
510         ba,pt           %xcc, 1b+4                
511          faligndata     %f14, %f16, %f48          
512 1:      FREG_FROB(f30,f32,f34,f36,f38,f40,f42,    
513         STORE_SYNC(o0, f48)                       
514         FREG_FROB(f46,f0, f2, f4, f6, f8, f10,    
515         STORE_JUMP(o0, f48, 47f)                  
516 2:      FREG_FROB(f46,f0, f2, f4, f6, f8, f10,    
517         STORE_SYNC(o0, f48)                       
518         FREG_FROB(f14,f16,f18,f20,f22,f24,f26,    
519         STORE_JUMP(o0, f48, 55f)                  
520 3:      FREG_FROB(f14,f16,f18,f20,f22,f24,f26,    
521         STORE_SYNC(o0, f48)                       
522         FREG_FROB(f30,f32,f34,f36,f38,f40,f42,    
523         STORE_JUMP(o0, f48, 63f)                  
524                                                   
525 40:     FINISH_VISCHUNK(o0, f0,  f2)              
526 41:     FINISH_VISCHUNK(o0, f2,  f4)              
527 42:     FINISH_VISCHUNK(o0, f4,  f6)              
528 43:     FINISH_VISCHUNK(o0, f6,  f8)              
529 44:     FINISH_VISCHUNK(o0, f8,  f10)             
530 45:     FINISH_VISCHUNK(o0, f10, f12)             
531 46:     FINISH_VISCHUNK(o0, f12, f14)             
532 47:     UNEVEN_VISCHUNK(o0, f14, f0)              
533 48:     FINISH_VISCHUNK(o0, f16, f18)             
534 49:     FINISH_VISCHUNK(o0, f18, f20)             
535 50:     FINISH_VISCHUNK(o0, f20, f22)             
536 51:     FINISH_VISCHUNK(o0, f22, f24)             
537 52:     FINISH_VISCHUNK(o0, f24, f26)             
538 53:     FINISH_VISCHUNK(o0, f26, f28)             
539 54:     FINISH_VISCHUNK(o0, f28, f30)             
540 55:     UNEVEN_VISCHUNK(o0, f30, f0)              
541 56:     FINISH_VISCHUNK(o0, f32, f34)             
542 57:     FINISH_VISCHUNK(o0, f34, f36)             
543 58:     FINISH_VISCHUNK(o0, f36, f38)             
544 59:     FINISH_VISCHUNK(o0, f38, f40)             
545 60:     FINISH_VISCHUNK(o0, f40, f42)             
546 61:     FINISH_VISCHUNK(o0, f42, f44)             
547 62:     FINISH_VISCHUNK(o0, f44, f46)             
548 63:     UNEVEN_VISCHUNK_LAST(o0, f46, f0)         
549                                                   
550 93:     EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_    
551         add             %o1, 8, %o1               
552         subcc           %g3, 8, %g3               
553         faligndata      %f0, %f2, %f8             
554         EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8    
555         bl,pn           %xcc, 95f                 
556          add            %o0, 8, %o0               
557         EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_    
558         add             %o1, 8, %o1               
559         subcc           %g3, 8, %g3               
560         faligndata      %f2, %f0, %f8             
561         EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8    
562         bge,pt          %xcc, 93b                 
563          add            %o0, 8, %o0               
564                                                   
565 95:     brz,pt          %o2, 2f                   
566          mov            %g1, %o1                  
567                                                   
568 1:      EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0    
569         add             %o1, 1, %o1               
570         subcc           %o2, 1, %o2               
571         EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1    
572         bne,pt          %xcc, 1b                  
573          add            %o0, 1, %o0               
574                                                   
575 2:      membar          #StoreLoad | #StoreSto    
576         VISExit                                   
577         retl                                      
578          mov            EX_RETVAL(%o4), %o0       
579                                                   
580         .align          64                        
581 70:     /* 16 < len <= (5 * 64) */                
582         bne,pn          %XCC, 75f                 
583          sub            %o0, %o1, %o3             
584                                                   
585 72:     andn            %o2, 0xf, %GLOBAL_SPAR    
586         and             %o2, 0xf, %o2             
587 1:      EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_g    
588         EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_g    
589         subcc           %GLOBAL_SPARE, 0x10, %    
590         EX_ST(STORE(stx, %o5, %o1 + %o3), U1_g    
591         add             %o1, 0x8, %o1             
592         EX_ST(STORE(stx, %g1, %o1 + %o3), U1_g    
593         bgu,pt          %XCC, 1b                  
594          add            %o1, 0x8, %o1             
595 73:     andcc           %o2, 0x8, %g0             
596         be,pt           %XCC, 1f                  
597          nop                                      
598         EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0)       
599         sub             %o2, 0x8, %o2             
600         EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o    
601         add             %o1, 0x8, %o1             
602 1:      andcc           %o2, 0x4, %g0             
603         be,pt           %XCC, 1f                  
604          nop                                      
605         EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0)      
606         sub             %o2, 0x4, %o2             
607         EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o    
608         add             %o1, 0x4, %o1             
609 1:      cmp             %o2, 0                    
610         be,pt           %XCC, 85f                 
611          nop                                      
612         ba,pt           %xcc, 90f                 
613          nop                                      
614                                                   
615 75:     andcc           %o0, 0x7, %g1             
616         sub             %g1, 0x8, %g1             
617         be,pn           %icc, 2f                  
618          sub            %g0, %g1, %g1             
619         sub             %o2, %g1, %o2             
620                                                   
621 1:      EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0)      
622         subcc           %g1, 1, %g1               
623         EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g    
624         bgu,pt          %icc, 1b                  
625          add            %o1, 1, %o1               
626                                                   
627 2:      add             %o1, %o3, %o0             
628         andcc           %o1, 0x7, %g1             
629         bne,pt          %icc, 8f                  
630          sll            %g1, 3, %g1               
631                                                   
632         cmp             %o2, 16                   
633         bgeu,pt         %icc, 72b                 
634          nop                                      
635         ba,a,pt         %xcc, 73b                 
636                                                   
637 8:      mov             64, %o3                   
638         andn            %o1, 0x7, %o1             
639         EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0)       
640         sub             %o3, %g1, %o3             
641         andn            %o2, 0x7, %GLOBAL_SPAR    
642         sllx            %g2, %g1, %g2             
643 1:      EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs    
644         subcc           %GLOBAL_SPARE, 0x8, %G    
645         add             %o1, 0x8, %o1             
646         srlx            %g3, %o3, %o5             
647         or              %o5, %g2, %o5             
648         EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2    
649         add             %o0, 0x8, %o0             
650         bgu,pt          %icc, 1b                  
651          sllx           %g3, %g1, %g2             
652                                                   
653         srl             %g1, 3, %g1               
654         andcc           %o2, 0x7, %o2             
655         be,pn           %icc, 85f                 
656          add            %o1, %g1, %o1             
657         ba,pt           %xcc, 90f                 
658          sub            %o0, %o1, %o3             
659                                                   
660         .align          64                        
661 80:     /* 0 < len <= 16 */                       
662         andcc           %o3, 0x3, %g0             
663         bne,pn          %XCC, 90f                 
664          sub            %o0, %o1, %o3             
665                                                   
666 1:      EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0)      
667         subcc           %o2, 4, %o2               
668         EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o    
669         bgu,pt          %XCC, 1b                  
670          add            %o1, 4, %o1               
671                                                   
672 85:     retl                                      
673          mov            EX_RETVAL(%o4), %o0       
674                                                   
675         .align          32                        
676 90:     EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0)      
677         subcc           %o2, 1, %o2               
678         EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o    
679         bgu,pt          %XCC, 90b                 
680          add            %o1, 1, %o1               
681         retl                                      
682          mov            EX_RETVAL(%o4), %o0       
683                                                   
684         .size           FUNC_NAME, .-FUNC_NAME    
685 EXPORT_SYMBOL(FUNC_NAME)                          
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php