~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/lib/memcpy.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/arm64/lib/memcpy.S (Architecture alpha) and /arch/i386/lib/memcpy.S (Architecture i386)


  1 /* SPDX-License-Identifier: GPL-2.0-only */       
  2 /*                                                
  3  * Copyright (c) 2012-2021, Arm Limited.          
  4  *                                                
  5  * Adapted from the original at:                  
  6  * https://github.com/ARM-software/optimized-r    
  7  */                                               
  8                                                   
  9 #include <linux/linkage.h>                        
 10 #include <asm/assembler.h>                        
 11                                                   
 12 /* Assumptions:                                   
 13  *                                                
 14  * ARMv8-a, AArch64, unaligned accesses.          
 15  *                                                
 16  */                                               
 17                                                   
 18 #define L(label) .L ## label                      
 19                                                   
 20 #define dstin   x0                                
 21 #define src     x1                                
 22 #define count   x2                                
 23 #define dst     x3                                
 24 #define srcend  x4                                
 25 #define dstend  x5                                
 26 #define A_l     x6                                
 27 #define A_lw    w6                                
 28 #define A_h     x7                                
 29 #define B_l     x8                                
 30 #define B_lw    w8                                
 31 #define B_h     x9                                
 32 #define C_l     x10                               
 33 #define C_lw    w10                               
 34 #define C_h     x11                               
 35 #define D_l     x12                               
 36 #define D_h     x13                               
 37 #define E_l     x14                               
 38 #define E_h     x15                               
 39 #define F_l     x16                               
 40 #define F_h     x17                               
 41 #define G_l     count                             
 42 #define G_h     dst                               
 43 #define H_l     src                               
 44 #define H_h     srcend                            
 45 #define tmp1    x14                               
 46                                                   
 47 /* This implementation handles overlaps and su    
 48    from a single entry point.  It uses unalign    
 49    sequences to keep the code small, simple an    
 50                                                   
 51    Copies are split into 3 main cases: small c    
 52    copies of up to 128 bytes, and large copies    
 53    check is negligible since it is only requir    
 54                                                   
 55    Large copies use a software pipelined loop     
 56    The destination pointer is 16-byte aligned     
 57    The loop tail is handled by always copying     
 58 */                                                
 59                                                   
 60 SYM_FUNC_START(__pi_memcpy)                       
 61         add     srcend, src, count                
 62         add     dstend, dstin, count              
 63         cmp     count, 128                        
 64         b.hi    L(copy_long)                      
 65         cmp     count, 32                         
 66         b.hi    L(copy32_128)                     
 67                                                   
 68         /* Small copies: 0..32 bytes.  */         
 69         cmp     count, 16                         
 70         b.lo    L(copy16)                         
 71         ldp     A_l, A_h, [src]                   
 72         ldp     D_l, D_h, [srcend, -16]           
 73         stp     A_l, A_h, [dstin]                 
 74         stp     D_l, D_h, [dstend, -16]           
 75         ret                                       
 76                                                   
 77         /* Copy 8-15 bytes.  */                   
 78 L(copy16):                                        
 79         tbz     count, 3, L(copy8)                
 80         ldr     A_l, [src]                        
 81         ldr     A_h, [srcend, -8]                 
 82         str     A_l, [dstin]                      
 83         str     A_h, [dstend, -8]                 
 84         ret                                       
 85                                                   
 86         .p2align 3                                
 87         /* Copy 4-7 bytes.  */                    
 88 L(copy8):                                         
 89         tbz     count, 2, L(copy4)                
 90         ldr     A_lw, [src]                       
 91         ldr     B_lw, [srcend, -4]                
 92         str     A_lw, [dstin]                     
 93         str     B_lw, [dstend, -4]                
 94         ret                                       
 95                                                   
 96         /* Copy 0..3 bytes using a branchless     
 97 L(copy4):                                         
 98         cbz     count, L(copy0)                   
 99         lsr     tmp1, count, 1                    
100         ldrb    A_lw, [src]                       
101         ldrb    C_lw, [srcend, -1]                
102         ldrb    B_lw, [src, tmp1]                 
103         strb    A_lw, [dstin]                     
104         strb    B_lw, [dstin, tmp1]               
105         strb    C_lw, [dstend, -1]                
106 L(copy0):                                         
107         ret                                       
108                                                   
109         .p2align 4                                
110         /* Medium copies: 33..128 bytes.  */      
111 L(copy32_128):                                    
112         ldp     A_l, A_h, [src]                   
113         ldp     B_l, B_h, [src, 16]               
114         ldp     C_l, C_h, [srcend, -32]           
115         ldp     D_l, D_h, [srcend, -16]           
116         cmp     count, 64                         
117         b.hi    L(copy128)                        
118         stp     A_l, A_h, [dstin]                 
119         stp     B_l, B_h, [dstin, 16]             
120         stp     C_l, C_h, [dstend, -32]           
121         stp     D_l, D_h, [dstend, -16]           
122         ret                                       
123                                                   
124         .p2align 4                                
125         /* Copy 65..128 bytes.  */                
126 L(copy128):                                       
127         ldp     E_l, E_h, [src, 32]               
128         ldp     F_l, F_h, [src, 48]               
129         cmp     count, 96                         
130         b.ls    L(copy96)                         
131         ldp     G_l, G_h, [srcend, -64]           
132         ldp     H_l, H_h, [srcend, -48]           
133         stp     G_l, G_h, [dstend, -64]           
134         stp     H_l, H_h, [dstend, -48]           
135 L(copy96):                                        
136         stp     A_l, A_h, [dstin]                 
137         stp     B_l, B_h, [dstin, 16]             
138         stp     E_l, E_h, [dstin, 32]             
139         stp     F_l, F_h, [dstin, 48]             
140         stp     C_l, C_h, [dstend, -32]           
141         stp     D_l, D_h, [dstend, -16]           
142         ret                                       
143                                                   
144         .p2align 4                                
145         /* Copy more than 128 bytes.  */          
146 L(copy_long):                                     
147         /* Use backwards copy if there is an o    
148         sub     tmp1, dstin, src                  
149         cbz     tmp1, L(copy0)                    
150         cmp     tmp1, count                       
151         b.lo    L(copy_long_backwards)            
152                                                   
153         /* Copy 16 bytes and then align dst to    
154                                                   
155         ldp     D_l, D_h, [src]                   
156         and     tmp1, dstin, 15                   
157         bic     dst, dstin, 15                    
158         sub     src, src, tmp1                    
159         add     count, count, tmp1      /* Cou    
160         ldp     A_l, A_h, [src, 16]               
161         stp     D_l, D_h, [dstin]                 
162         ldp     B_l, B_h, [src, 32]               
163         ldp     C_l, C_h, [src, 48]               
164         ldp     D_l, D_h, [src, 64]!              
165         subs    count, count, 128 + 16  /* Tes    
166         b.ls    L(copy64_from_end)                
167                                                   
168 L(loop64):                                        
169         stp     A_l, A_h, [dst, 16]               
170         ldp     A_l, A_h, [src, 16]               
171         stp     B_l, B_h, [dst, 32]               
172         ldp     B_l, B_h, [src, 32]               
173         stp     C_l, C_h, [dst, 48]               
174         ldp     C_l, C_h, [src, 48]               
175         stp     D_l, D_h, [dst, 64]!              
176         ldp     D_l, D_h, [src, 64]!              
177         subs    count, count, 64                  
178         b.hi    L(loop64)                         
179                                                   
180         /* Write the last iteration and copy 6    
181 L(copy64_from_end):                               
182         ldp     E_l, E_h, [srcend, -64]           
183         stp     A_l, A_h, [dst, 16]               
184         ldp     A_l, A_h, [srcend, -48]           
185         stp     B_l, B_h, [dst, 32]               
186         ldp     B_l, B_h, [srcend, -32]           
187         stp     C_l, C_h, [dst, 48]               
188         ldp     C_l, C_h, [srcend, -16]           
189         stp     D_l, D_h, [dst, 64]               
190         stp     E_l, E_h, [dstend, -64]           
191         stp     A_l, A_h, [dstend, -48]           
192         stp     B_l, B_h, [dstend, -32]           
193         stp     C_l, C_h, [dstend, -16]           
194         ret                                       
195                                                   
196         .p2align 4                                
197                                                   
198         /* Large backwards copy for overlappin    
199            Copy 16 bytes and then align dst to    
200 L(copy_long_backwards):                           
201         ldp     D_l, D_h, [srcend, -16]           
202         and     tmp1, dstend, 15                  
203         sub     srcend, srcend, tmp1              
204         sub     count, count, tmp1                
205         ldp     A_l, A_h, [srcend, -16]           
206         stp     D_l, D_h, [dstend, -16]           
207         ldp     B_l, B_h, [srcend, -32]           
208         ldp     C_l, C_h, [srcend, -48]           
209         ldp     D_l, D_h, [srcend, -64]!          
210         sub     dstend, dstend, tmp1              
211         subs    count, count, 128                 
212         b.ls    L(copy64_from_start)              
213                                                   
214 L(loop64_backwards):                              
215         stp     A_l, A_h, [dstend, -16]           
216         ldp     A_l, A_h, [srcend, -16]           
217         stp     B_l, B_h, [dstend, -32]           
218         ldp     B_l, B_h, [srcend, -32]           
219         stp     C_l, C_h, [dstend, -48]           
220         ldp     C_l, C_h, [srcend, -48]           
221         stp     D_l, D_h, [dstend, -64]!          
222         ldp     D_l, D_h, [srcend, -64]!          
223         subs    count, count, 64                  
224         b.hi    L(loop64_backwards)               
225                                                   
226         /* Write the last iteration and copy 6    
227 L(copy64_from_start):                             
228         ldp     G_l, G_h, [src, 48]               
229         stp     A_l, A_h, [dstend, -16]           
230         ldp     A_l, A_h, [src, 32]               
231         stp     B_l, B_h, [dstend, -32]           
232         ldp     B_l, B_h, [src, 16]               
233         stp     C_l, C_h, [dstend, -48]           
234         ldp     C_l, C_h, [src]                   
235         stp     D_l, D_h, [dstend, -64]           
236         stp     G_l, G_h, [dstin, 48]             
237         stp     A_l, A_h, [dstin, 32]             
238         stp     B_l, B_h, [dstin, 16]             
239         stp     C_l, C_h, [dstin]                 
240         ret                                       
241 SYM_FUNC_END(__pi_memcpy)                         
242                                                   
243 SYM_FUNC_ALIAS(__memcpy, __pi_memcpy)             
244 EXPORT_SYMBOL(__memcpy)                           
245 SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)             
246 EXPORT_SYMBOL(memcpy)                             
247                                                   
248 SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy)         
249                                                   
250 SYM_FUNC_ALIAS(__memmove, __pi_memmove)           
251 EXPORT_SYMBOL(__memmove)                          
252 SYM_FUNC_ALIAS_WEAK(memmove, __memmove)           
253 EXPORT_SYMBOL(memmove)                            
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php