~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/riscv/lib/memmove.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/riscv/lib/memmove.S (Architecture sparc) and /arch/i386/lib/memmove.S (Architecture i386)


  1 /* SPDX-License-Identifier: GPL-2.0-only */       
  2 /*                                                
  3  * Copyright (C) 2022 Michael T. Kloos <michael    
  4  */                                               
  5                                                   
  6 #include <linux/linkage.h>                        
  7 #include <asm/asm.h>                              
  8                                                   
  9 SYM_FUNC_START(__memmove)                         
 10         /*                                        
 11          * Returns                                
 12          *   a0 - dest                            
 13          *                                        
 14          * Parameters                             
 15          *   a0 - Inclusive first byte of dest    
 16          *   a1 - Inclusive first byte of src     
 17          *   a2 - Length of copy n                
 18          *                                        
 19          * Because the return matches the para    
 20          * we will not clobber or modify that     
 21          *                                        
 22          * Note: This currently only works on     
 23          * To port to big-endian, reverse the     
 24          * in the 2 misaligned fixup copy loop    
 25          */                                       
 26                                                   
 27         /* Return if nothing to do */             
 28         beq a0, a1, .Lreturn_from_memmove         
 29         beqz a2, .Lreturn_from_memmove            
 30                                                   
 31         /*                                        
 32          * Register Uses                          
 33          *      Forward Copy: a1 - Index count    
 34          *      Reverse Copy: a4 - Index count    
 35          *      Forward Copy: t3 - Index count    
 36          *      Reverse Copy: t4 - Index count    
 37          *   Both Copy Modes: t5 - Inclusive f    
 38          *   Both Copy Modes: t6 - Non-Inclusi    
 39          *   Both Copy Modes: t0 - Link / Temp    
 40          *   Both Copy Modes: t1 - Temporary f    
 41          *   Both Copy Modes: t2 - Temporary f    
 42          *   Both Copy Modes: a5 - dest to src    
 43          *   Both Copy Modes: a6 - Shift ammou    
 44          *   Both Copy Modes: a7 - Inverse Shi    
 45          *   Both Copy Modes: a2 - Alternate b    
 46          */                                       
 47                                                   
 48         /*                                        
 49          * Solve for some register values now.    
 50          * Byte copy does not need t5 or t6.      
 51          */                                       
 52         mv   t3, a0                               
 53         add  t4, a0, a2                           
 54         add  a4, a1, a2                           
 55                                                   
 56         /*                                        
 57          * Byte copy if copying less than (2 *    
 58          * cause problems with the bulk copy i    
 59          * small enough not to bother.            
 60          */                                       
 61         andi t0, a2, -(2 * SZREG)                 
 62         beqz t0, .Lbyte_copy                      
 63                                                   
 64         /*                                        
 65          * Now solve for t5 and t6.               
 66          */                                       
 67         andi t5, t3, -SZREG                       
 68         andi t6, t4, -SZREG                       
 69         /*                                        
 70          * If dest(Register t3) rounded down t    
 71          * aligned SZREG address, does not equ    
 72          * to find the low-bound of SZREG alig    
 73          * region.  Note that this could overs    
 74          * region if n is less than SZREG.  Th    
 75          * we always byte copy if n is less th    
 76          * Otherwise, dest is already naturall    
 77          */                                       
 78         beq  t5, t3, 1f                           
 79                 addi t5, t5, SZREG                
 80         1:                                        
 81                                                   
 82         /*                                        
 83          * If the dest and src are co-aligned     
 84          * no need for the full rigmarole of a    
 85          * Instead, do a simpler co-aligned co    
 86          */                                       
 87         xor  t0, a0, a1                           
 88         andi t1, t0, (SZREG - 1)                  
 89         beqz t1, .Lcoaligned_copy                 
 90         /* Fall through to misaligned fixup co    
 91                                                   
 92 .Lmisaligned_fixup_copy:                          
 93         bltu a1, a0, .Lmisaligned_fixup_copy_r    
 94                                                   
 95 .Lmisaligned_fixup_copy_forward:                  
 96         jal  t0, .Lbyte_copy_until_aligned_for    
 97                                                   
 98         andi a5, a1, (SZREG - 1) /* Find the a    
 99         slli a6, a5, 3 /* Multiply by 8 to con    
100         sub  a5, a1, t3 /* Find the difference    
101         andi a1, a1, -SZREG /* Align the src p    
102         addi a2, t6, SZREG /* The other breakp    
103                                                   
104         /*                                        
105          * Compute The Inverse Shift              
106          * a7 = XLEN - a6 = XLEN + -a6            
107          * 2s complement negation to find the     
108          * Add that to XLEN.  XLEN = SZREG * 8    
109          */                                       
110         not  a7, a6                               
111         addi a7, a7, (SZREG * 8 + 1)              
112                                                   
113         /*                                        
114          * Fix Misalignment Copy Loop - Forwar    
115          * load_val0 = load_ptr[0];               
116          * do {                                   
117          *      load_val1 = load_ptr[1];          
118          *      store_ptr += 2;                   
119          *      store_ptr[0 - 2] = (load_val0     
120          *                                        
121          *      if (store_ptr == {a2})            
122          *              break;                    
123          *                                        
124          *      load_val0 = load_ptr[2];          
125          *      load_ptr += 2;                    
126          *      store_ptr[1 - 2] = (load_val1     
127          *                                        
128          * } while (store_ptr != store_ptr_end    
129          * store_ptr = store_ptr_end;             
130          */                                       
131                                                   
132         REG_L t0, (0 * SZREG)(a1)                 
133         1:                                        
134         REG_L t1, (1 * SZREG)(a1)                 
135         addi  t3, t3, (2 * SZREG)                 
136         srl   t0, t0, a6                          
137         sll   t2, t1, a7                          
138         or    t2, t0, t2                          
139         REG_S t2, ((0 * SZREG) - (2 * SZREG))(    
140                                                   
141         beq   t3, a2, 2f                          
142                                                   
143         REG_L t0, (2 * SZREG)(a1)                 
144         addi  a1, a1, (2 * SZREG)                 
145         srl   t1, t1, a6                          
146         sll   t2, t0, a7                          
147         or    t2, t1, t2                          
148         REG_S t2, ((1 * SZREG) - (2 * SZREG))(    
149                                                   
150         bne   t3, t6, 1b                          
151         2:                                        
152         mv    t3, t6 /* Fix the dest pointer i    
153                                                   
154         add  a1, t3, a5 /* Restore the src poi    
155         j .Lbyte_copy_forward /* Copy any rema    
156                                                   
157 .Lmisaligned_fixup_copy_reverse:                  
158         jal  t0, .Lbyte_copy_until_aligned_rev    
159                                                   
160         andi a5, a4, (SZREG - 1) /* Find the a    
161         slli a6, a5, 3 /* Multiply by 8 to con    
162         sub  a5, a4, t4 /* Find the difference    
163         andi a4, a4, -SZREG /* Align the src p    
164         addi a2, t5, -SZREG /* The other break    
165                                                   
166         /*                                        
167          * Compute The Inverse Shift              
168          * a7 = XLEN - a6 = XLEN + -a6            
169          * 2s complement negation to find the     
170          * Add that to XLEN.  XLEN = SZREG * 8    
171          */                                       
172         not  a7, a6                               
173         addi a7, a7, (SZREG * 8 + 1)              
174                                                   
175         /*                                        
176          * Fix Misalignment Copy Loop - Revers    
177          * load_val1 = load_ptr[0];               
178          * do {                                   
179          *      load_val0 = load_ptr[-1];         
180          *      store_ptr -= 2;                   
181          *      store_ptr[1] = (load_val0 >> {    
182          *                                        
183          *      if (store_ptr == {a2})            
184          *              break;                    
185          *                                        
186          *      load_val1 = load_ptr[-2];         
187          *      load_ptr -= 2;                    
188          *      store_ptr[0] = (load_val1 >> {    
189          *                                        
190          * } while (store_ptr != store_ptr_end    
191          * store_ptr = store_ptr_end;             
192          */                                       
193                                                   
194         REG_L t1, ( 0 * SZREG)(a4)                
195         1:                                        
196         REG_L t0, (-1 * SZREG)(a4)                
197         addi  t4, t4, (-2 * SZREG)                
198         sll   t1, t1, a7                          
199         srl   t2, t0, a6                          
200         or    t2, t1, t2                          
201         REG_S t2, ( 1 * SZREG)(t4)                
202                                                   
203         beq   t4, a2, 2f                          
204                                                   
205         REG_L t1, (-2 * SZREG)(a4)                
206         addi  a4, a4, (-2 * SZREG)                
207         sll   t0, t0, a7                          
208         srl   t2, t1, a6                          
209         or    t2, t0, t2                          
210         REG_S t2, ( 0 * SZREG)(t4)                
211                                                   
212         bne   t4, t5, 1b                          
213         2:                                        
214         mv    t4, t5 /* Fix the dest pointer i    
215                                                   
216         add  a4, t4, a5 /* Restore the src poi    
217         j .Lbyte_copy_reverse /* Copy any rema    
218                                                   
219 /*                                                
220  * Simple copy loops for SZREG co-aligned memo    
221  * These also make calls to do byte copies for    
222  * data at their terminations.                    
223  */                                               
224 .Lcoaligned_copy:                                 
225         bltu a1, a0, .Lcoaligned_copy_reverse     
226                                                   
227 .Lcoaligned_copy_forward:                         
228         jal t0, .Lbyte_copy_until_aligned_forw    
229                                                   
230         1:                                        
231         REG_L t1, ( 0 * SZREG)(a1)                
232         addi  a1, a1, SZREG                       
233         addi  t3, t3, SZREG                       
234         REG_S t1, (-1 * SZREG)(t3)                
235         bne   t3, t6, 1b                          
236                                                   
237         j .Lbyte_copy_forward /* Copy any rema    
238                                                   
239 .Lcoaligned_copy_reverse:                         
240         jal t0, .Lbyte_copy_until_aligned_reve    
241                                                   
242         1:                                        
243         REG_L t1, (-1 * SZREG)(a4)                
244         addi  a4, a4, -SZREG                      
245         addi  t4, t4, -SZREG                      
246         REG_S t1, ( 0 * SZREG)(t4)                
247         bne   t4, t5, 1b                          
248                                                   
249         j .Lbyte_copy_reverse /* Copy any rema    
250                                                   
251 /*                                                
252  * These are basically sub-functions within th    
253  * are used to byte copy until the dest pointe    
254  * At which point, a bulk copy method can be u    
255  * calling code.  These work on the same regis    
256  * copy loops.  Therefore, the register values    
257  * up from where they were left and we avoid c    
258  * without any overhead except the call in and    
259  */                                               
260 .Lbyte_copy_until_aligned_forward:                
261         beq  t3, t5, 2f                           
262         1:                                        
263         lb   t1,  0(a1)                           
264         addi a1, a1, 1                            
265         addi t3, t3, 1                            
266         sb   t1, -1(t3)                           
267         bne  t3, t5, 1b                           
268         2:                                        
269         jalr zero, 0x0(t0) /* Return to multib    
270                                                   
271 .Lbyte_copy_until_aligned_reverse:                
272         beq  t4, t6, 2f                           
273         1:                                        
274         lb   t1, -1(a4)                           
275         addi a4, a4, -1                           
276         addi t4, t4, -1                           
277         sb   t1,  0(t4)                           
278         bne  t4, t6, 1b                           
279         2:                                        
280         jalr zero, 0x0(t0) /* Return to multib    
281                                                   
282 /*                                                
283  * Simple byte copy loops.                        
284  * These will byte copy until they reach the e    
285  * At that point, they will call to return fro    
286  */                                               
287 .Lbyte_copy:                                      
288         bltu a1, a0, .Lbyte_copy_reverse          
289                                                   
290 .Lbyte_copy_forward:                              
291         beq  t3, t4, 2f                           
292         1:                                        
293         lb   t1,  0(a1)                           
294         addi a1, a1, 1                            
295         addi t3, t3, 1                            
296         sb   t1, -1(t3)                           
297         bne  t3, t4, 1b                           
298         2:                                        
299         ret                                       
300                                                   
301 .Lbyte_copy_reverse:                              
302         beq  t4, t3, 2f                           
303         1:                                        
304         lb   t1, -1(a4)                           
305         addi a4, a4, -1                           
306         addi t4, t4, -1                           
307         sb   t1,  0(t4)                           
308         bne  t4, t3, 1b                           
309         2:                                        
310                                                   
311 .Lreturn_from_memmove:                            
312         ret                                       
313                                                   
314 SYM_FUNC_END(__memmove)                           
315 SYM_FUNC_ALIAS_WEAK(memmove, __memmove)           
316 SYM_FUNC_ALIAS(__pi_memmove, __memmove)           
317 SYM_FUNC_ALIAS(__pi___memmove, __memmove)         
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php