~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/microblaze/lib/fastcopy.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/microblaze/lib/fastcopy.S (Version linux-6.12-rc7) and /arch/m68k/lib/fastcopy.S (Version policy-sample)


  1 /*                                                
  2  * Copyright (C) 2008-2009 Michal Simek <monstr    
  3  * Copyright (C) 2008-2009 PetaLogix              
  4  * Copyright (C) 2008 Jim Law - Iris LP  All r    
  5  *                                                
  6  * This file is subject to the terms and condi    
  7  * Public License.  See the file COPYING in th    
  8  * archive for more details.                      
  9  *                                                
 10  * Written by Jim Law <jlaw@irispower.com>         
 11  *                                                
 12  * intended to replace:                           
 13  *      memcpy in memcpy.c and                    
 14  *      memmove in memmove.c                      
 15  * ... in arch/microblaze/lib                     
 16  *                                                
 17  *                                                
 18  * assly_fastcopy.S                               
 19  *                                                
 20  * Attempt at quicker memcpy and memmove for M    
 21  *      Input : Operand1 in Reg r5 - destinati    
 22  *              Operand2 in Reg r6 - source ad    
 23  *              Operand3 in Reg r7 - number of    
 24  *      Output: Result in Reg r3 - starting de    
 25  *                                                
 26  *                                                
 27  * Explanation:                                   
 28  *      Perform (possibly unaligned) copy of a    
 29  *      between mem locations with size of xfe    
 30  */                                               
 31                                                   
 32 #include <linux/linkage.h>                        
 33         .text                                     
 34         .globl  memcpy                            
 35         .type  memcpy, @function                  
 36         .ent    memcpy                            
 37                                                   
 38 memcpy:                                           
 39 fast_memcpy_ascending:                            
 40         /* move d to return register as value     
 41         addi    r3, r5, 0                         
 42                                                   
 43         addi    r4, r0, 4       /* n = 4 */       
 44         cmpu    r4, r4, r7      /* n = c - n      
 45         blti    r4, a_xfer_end  /* if n < 0, l    
 46                                                   
 47         /* transfer first 0~3 bytes to get ali    
 48         andi    r4, r5, 3               /* n =    
 49         /* if zero, destination already aligne    
 50         beqi    r4, a_dalign_done                 
 51         /* n = 4 - n (yields 3, 2, 1 transfers    
 52         rsubi   r4, r4, 4                         
 53         rsub    r7, r4, r7              /* c =    
 54                                                   
 55 a_xfer_first_loop:                                
 56         /* if no bytes left to transfer, trans    
 57         beqi    r4, a_dalign_done                 
 58         lbui    r11, r6, 0              /* h =    
 59         sbi     r11, r5, 0              /* *d     
 60         addi    r6, r6, 1               /* s++    
 61         addi    r5, r5, 1               /* d++    
 62         brid    a_xfer_first_loop       /* loo    
 63         addi    r4, r4, -1              /* n--    
 64                                                   
 65 a_dalign_done:                                    
 66         addi    r4, r0, 32              /* n =    
 67         cmpu    r4, r4, r7              /* n =    
 68         /* if n < 0, less than one block to tr    
 69         blti    r4, a_block_done                  
 70                                                   
 71 a_block_xfer:                                     
 72         andi    r4, r7, 0xffffffe0      /* n =    
 73         rsub    r7, r4, r7              /* c =    
 74                                                   
 75         andi    r9, r6, 3               /* t1     
 76         /* if temp != 0, unaligned transfers n    
 77         bnei    r9, a_block_unaligned             
 78                                                   
 79 a_block_aligned:                                  
 80         lwi     r9, r6, 0               /* t1     
 81         lwi     r10, r6, 4              /* t2     
 82         lwi     r11, r6, 8              /* t3     
 83         lwi     r12, r6, 12             /* t4     
 84         swi     r9, r5, 0               /* *(d    
 85         swi     r10, r5, 4              /* *(d    
 86         swi     r11, r5, 8              /* *(d    
 87         swi     r12, r5, 12             /* *(d    
 88         lwi     r9, r6, 16              /* t1     
 89         lwi     r10, r6, 20             /* t2     
 90         lwi     r11, r6, 24             /* t3     
 91         lwi     r12, r6, 28             /* t4     
 92         swi     r9, r5, 16              /* *(d    
 93         swi     r10, r5, 20             /* *(d    
 94         swi     r11, r5, 24             /* *(d    
 95         swi     r12, r5, 28             /* *(d    
 96         addi    r6, r6, 32              /* s =    
 97         addi    r4, r4, -32             /* n =    
 98         bneid   r4, a_block_aligned     /* whi    
 99         addi    r5, r5, 32              /* d =    
100         bri     a_block_done                      
101                                                   
102 a_block_unaligned:                                
103         andi    r8, r6, 0xfffffffc      /* as     
104         add     r6, r6, r4              /* s =    
105         lwi     r11, r8, 0              /* h =    
106                                                   
107         addi    r9, r9, -1                        
108         beqi    r9, a_block_u1          /* t1     
109         addi    r9, r9, -1                        
110         beqi    r9, a_block_u2          /* t1     
111                                                   
112 a_block_u3:                                       
113         bslli   r11, r11, 24    /* h = h << 24    
114 a_bu3_loop:                                       
115         lwi     r12, r8, 4      /* v = *(as +     
116         bsrli   r9, r12, 8      /* t1 = v >> 8    
117         or      r9, r11, r9     /* t1 = h | t1    
118         swi     r9, r5, 0       /* *(d + 0) =     
119         bslli   r11, r12, 24    /* h = v << 24    
120         lwi     r12, r8, 8      /* v = *(as +     
121         bsrli   r9, r12, 8      /* t1 = v >> 8    
122         or      r9, r11, r9     /* t1 = h | t1    
123         swi     r9, r5, 4       /* *(d + 4) =     
124         bslli   r11, r12, 24    /* h = v << 24    
125         lwi     r12, r8, 12     /* v = *(as +     
126         bsrli   r9, r12, 8      /* t1 = v >> 8    
127         or      r9, r11, r9     /* t1 = h | t1    
128         swi     r9, r5, 8       /* *(d + 8) =     
129         bslli   r11, r12, 24    /* h = v << 24    
130         lwi     r12, r8, 16     /* v = *(as +     
131         bsrli   r9, r12, 8      /* t1 = v >> 8    
132         or      r9, r11, r9     /* t1 = h | t1    
133         swi     r9, r5, 12      /* *(d + 12) =    
134         bslli   r11, r12, 24    /* h = v << 24    
135         lwi     r12, r8, 20     /* v = *(as +     
136         bsrli   r9, r12, 8      /* t1 = v >> 8    
137         or      r9, r11, r9     /* t1 = h | t1    
138         swi     r9, r5, 16      /* *(d + 16) =    
139         bslli   r11, r12, 24    /* h = v << 24    
140         lwi     r12, r8, 24     /* v = *(as +     
141         bsrli   r9, r12, 8      /* t1 = v >> 8    
142         or      r9, r11, r9     /* t1 = h | t1    
143         swi     r9, r5, 20      /* *(d + 20) =    
144         bslli   r11, r12, 24    /* h = v << 24    
145         lwi     r12, r8, 28     /* v = *(as +     
146         bsrli   r9, r12, 8      /* t1 = v >> 8    
147         or      r9, r11, r9     /* t1 = h | t1    
148         swi     r9, r5, 24      /* *(d + 24) =    
149         bslli   r11, r12, 24    /* h = v << 24    
150         lwi     r12, r8, 32     /* v = *(as +     
151         bsrli   r9, r12, 8      /* t1 = v >> 8    
152         or      r9, r11, r9     /* t1 = h | t1    
153         swi     r9, r5, 28      /* *(d + 28) =    
154         bslli   r11, r12, 24    /* h = v << 24    
155         addi    r8, r8, 32      /* as = as + 3    
156         addi    r4, r4, -32     /* n = n - 32     
157         bneid   r4, a_bu3_loop  /* while (n) l    
158         addi    r5, r5, 32      /* d = d + 32     
159         bri     a_block_done                      
160                                                   
161 a_block_u1:                                       
162         bslli   r11, r11, 8     /* h = h << 8     
163 a_bu1_loop:                                       
164         lwi     r12, r8, 4      /* v = *(as +     
165         bsrli   r9, r12, 24     /* t1 = v >> 2    
166         or      r9, r11, r9     /* t1 = h | t1    
167         swi     r9, r5, 0       /* *(d + 0) =     
168         bslli   r11, r12, 8     /* h = v << 8     
169         lwi     r12, r8, 8      /* v = *(as +     
170         bsrli   r9, r12, 24     /* t1 = v >> 2    
171         or      r9, r11, r9     /* t1 = h | t1    
172         swi     r9, r5, 4       /* *(d + 4) =     
173         bslli   r11, r12, 8     /* h = v << 8     
174         lwi     r12, r8, 12     /* v = *(as +     
175         bsrli   r9, r12, 24     /* t1 = v >> 2    
176         or      r9, r11, r9     /* t1 = h | t1    
177         swi     r9, r5, 8       /* *(d + 8) =     
178         bslli   r11, r12, 8     /* h = v << 8     
179         lwi     r12, r8, 16     /* v = *(as +     
180         bsrli   r9, r12, 24     /* t1 = v >> 2    
181         or      r9, r11, r9     /* t1 = h | t1    
182         swi     r9, r5, 12      /* *(d + 12) =    
183         bslli   r11, r12, 8     /* h = v << 8     
184         lwi     r12, r8, 20     /* v = *(as +     
185         bsrli   r9, r12, 24     /* t1 = v >> 2    
186         or      r9, r11, r9     /* t1 = h | t1    
187         swi     r9, r5, 16      /* *(d + 16) =    
188         bslli   r11, r12, 8     /* h = v << 8     
189         lwi     r12, r8, 24     /* v = *(as +     
190         bsrli   r9, r12, 24     /* t1 = v >> 2    
191         or      r9, r11, r9     /* t1 = h | t1    
192         swi     r9, r5, 20      /* *(d + 20) =    
193         bslli   r11, r12, 8     /* h = v << 8     
194         lwi     r12, r8, 28     /* v = *(as +     
195         bsrli   r9, r12, 24     /* t1 = v >> 2    
196         or      r9, r11, r9     /* t1 = h | t1    
197         swi     r9, r5, 24      /* *(d + 24) =    
198         bslli   r11, r12, 8     /* h = v << 8     
199         lwi     r12, r8, 32     /* v = *(as +     
200         bsrli   r9, r12, 24     /* t1 = v >> 2    
201         or      r9, r11, r9     /* t1 = h | t1    
202         swi     r9, r5, 28      /* *(d + 28) =    
203         bslli   r11, r12, 8     /* h = v << 8     
204         addi    r8, r8, 32      /* as = as + 3    
205         addi    r4, r4, -32     /* n = n - 32     
206         bneid   r4, a_bu1_loop  /* while (n) l    
207         addi    r5, r5, 32      /* d = d + 32     
208         bri     a_block_done                      
209                                                   
210 a_block_u2:                                       
211         bslli   r11, r11, 16    /* h = h << 16    
212 a_bu2_loop:                                       
213         lwi     r12, r8, 4      /* v = *(as +     
214         bsrli   r9, r12, 16     /* t1 = v >> 1    
215         or      r9, r11, r9     /* t1 = h | t1    
216         swi     r9, r5, 0       /* *(d + 0) =     
217         bslli   r11, r12, 16    /* h = v << 16    
218         lwi     r12, r8, 8      /* v = *(as +     
219         bsrli   r9, r12, 16     /* t1 = v >> 1    
220         or      r9, r11, r9     /* t1 = h | t1    
221         swi     r9, r5, 4       /* *(d + 4) =     
222         bslli   r11, r12, 16    /* h = v << 16    
223         lwi     r12, r8, 12     /* v = *(as +     
224         bsrli   r9, r12, 16     /* t1 = v >> 1    
225         or      r9, r11, r9     /* t1 = h | t1    
226         swi     r9, r5, 8       /* *(d + 8) =     
227         bslli   r11, r12, 16    /* h = v << 16    
228         lwi     r12, r8, 16     /* v = *(as +     
229         bsrli   r9, r12, 16     /* t1 = v >> 1    
230         or      r9, r11, r9     /* t1 = h | t1    
231         swi     r9, r5, 12      /* *(d + 12) =    
232         bslli   r11, r12, 16    /* h = v << 16    
233         lwi     r12, r8, 20     /* v = *(as +     
234         bsrli   r9, r12, 16     /* t1 = v >> 1    
235         or      r9, r11, r9     /* t1 = h | t1    
236         swi     r9, r5, 16      /* *(d + 16) =    
237         bslli   r11, r12, 16    /* h = v << 16    
238         lwi     r12, r8, 24     /* v = *(as +     
239         bsrli   r9, r12, 16     /* t1 = v >> 1    
240         or      r9, r11, r9     /* t1 = h | t1    
241         swi     r9, r5, 20      /* *(d + 20) =    
242         bslli   r11, r12, 16    /* h = v << 16    
243         lwi     r12, r8, 28     /* v = *(as +     
244         bsrli   r9, r12, 16     /* t1 = v >> 1    
245         or      r9, r11, r9     /* t1 = h | t1    
246         swi     r9, r5, 24      /* *(d + 24) =    
247         bslli   r11, r12, 16    /* h = v << 16    
248         lwi     r12, r8, 32     /* v = *(as +     
249         bsrli   r9, r12, 16     /* t1 = v >> 1    
250         or      r9, r11, r9     /* t1 = h | t1    
251         swi     r9, r5, 28      /* *(d + 28) =    
252         bslli   r11, r12, 16    /* h = v << 16    
253         addi    r8, r8, 32      /* as = as + 3    
254         addi    r4, r4, -32     /* n = n - 32     
255         bneid   r4, a_bu2_loop  /* while (n) l    
256         addi    r5, r5, 32      /* d = d + 32     
257                                                   
258 a_block_done:                                     
259         addi    r4, r0, 4       /* n = 4 */       
260         cmpu    r4, r4, r7      /* n = c - n      
261         blti    r4, a_xfer_end  /* if n < 0, l    
262                                                   
263 a_word_xfer:                                      
264         andi    r4, r7, 0xfffffffc      /* n =    
265         addi    r10, r0, 0              /* off    
266                                                   
267         andi    r9, r6, 3               /* t1     
268         /* if temp != 0, unaligned transfers n    
269         bnei    r9, a_word_unaligned              
270                                                   
271 a_word_aligned:                                   
272         lw      r9, r6, r10             /* t1     
273         sw      r9, r5, r10             /* *(d    
274         addi    r4, r4,-4               /* n--    
275         bneid   r4, a_word_aligned      /* loo    
276         addi    r10, r10, 4             /* off    
277                                                   
278         bri     a_word_done                       
279                                                   
280 a_word_unaligned:                                 
281         andi    r8, r6, 0xfffffffc      /* as     
282         lwi     r11, r8, 0              /* h =    
283         addi    r8, r8, 4               /* as     
284                                                   
285         addi    r9, r9, -1                        
286         beqi    r9, a_word_u1           /* t1     
287         addi    r9, r9, -1                        
288         beqi    r9, a_word_u2           /* t1     
289                                                   
290 a_word_u3:                                        
291         bslli   r11, r11, 24    /* h = h << 24    
292 a_wu3_loop:                                       
293         lw      r12, r8, r10    /* v = *(as +     
294         bsrli   r9, r12, 8      /* t1 = v >> 8    
295         or      r9, r11, r9     /* t1 = h | t1    
296         sw      r9, r5, r10     /* *(d + offse    
297         bslli   r11, r12, 24    /* h = v << 24    
298         addi    r4, r4,-4       /* n = n - 4 *    
299         bneid   r4, a_wu3_loop  /* while (n) l    
300         addi    r10, r10, 4     /* offset = of    
301                                                   
302         bri     a_word_done                       
303                                                   
304 a_word_u1:                                        
305         bslli   r11, r11, 8     /* h = h << 8     
306 a_wu1_loop:                                       
307         lw      r12, r8, r10    /* v = *(as +     
308         bsrli   r9, r12, 24     /* t1 = v >> 2    
309         or      r9, r11, r9     /* t1 = h | t1    
310         sw      r9, r5, r10     /* *(d + offse    
311         bslli   r11, r12, 8     /* h = v << 8     
312         addi    r4, r4,-4       /* n = n - 4 *    
313         bneid   r4, a_wu1_loop  /* while (n) l    
314         addi    r10, r10, 4     /* offset = of    
315                                                   
316         bri     a_word_done                       
317                                                   
318 a_word_u2:                                        
319         bslli   r11, r11, 16    /* h = h << 16    
320 a_wu2_loop:                                       
321         lw      r12, r8, r10    /* v = *(as +     
322         bsrli   r9, r12, 16     /* t1 = v >> 1    
323         or      r9, r11, r9     /* t1 = h | t1    
324         sw      r9, r5, r10     /* *(d + offse    
325         bslli   r11, r12, 16    /* h = v << 16    
326         addi    r4, r4,-4       /* n = n - 4 *    
327         bneid   r4, a_wu2_loop  /* while (n) l    
328         addi    r10, r10, 4     /* offset = of    
329                                                   
330 a_word_done:                                      
331         add     r5, r5, r10     /* d = d + off    
332         add     r6, r6, r10     /* s = s + off    
333         rsub    r7, r10, r7     /* c = c - off    
334                                                   
335 a_xfer_end:                                       
336 a_xfer_end_loop:                                  
337         beqi    r7, a_done              /* whi    
338         lbui    r9, r6, 0               /* t1     
339         addi    r6, r6, 1               /* s++    
340         sbi     r9, r5, 0               /* *d     
341         addi    r7, r7, -1              /* c--    
342         brid    a_xfer_end_loop         /* loo    
343         addi    r5, r5, 1               /* d++    
344                                                   
345 a_done:                                           
346         rtsd    r15, 8                            
347         nop                                       
348                                                   
349 .size  memcpy, . - memcpy                         
350 .end memcpy                                       
351 /*--------------------------------------------    
352         .globl  memmove                           
353         .type  memmove, @function                 
354         .ent    memmove                           
355                                                   
356 memmove:                                          
357         cmpu    r4, r5, r6      /* n = s - d *    
358         bgei    r4,fast_memcpy_ascending          
359                                                   
360 fast_memcpy_descending:                           
361         /* move d to return register as value     
362         addi    r3, r5, 0                         
363                                                   
364         add     r5, r5, r7      /* d = d + c *    
365         add     r6, r6, r7      /* s = s + c *    
366                                                   
367         addi    r4, r0, 4       /* n = 4 */       
368         cmpu    r4, r4, r7      /* n = c - n      
369         blti    r4,d_xfer_end   /* if n < 0, l    
370                                                   
371         /* transfer first 0~3 bytes to get ali    
372         andi    r4, r5, 3               /* n =    
373         /* if zero, destination already aligne    
374         beqi    r4,d_dalign_done                  
375         rsub    r7, r4, r7              /* c =    
376                                                   
377 d_xfer_first_loop:                                
378         /* if no bytes left to transfer, trans    
379         beqi    r4,d_dalign_done                  
380         addi    r6, r6, -1              /* s--    
381         addi    r5, r5, -1              /* d--    
382         lbui    r11, r6, 0              /* h =    
383         sbi     r11, r5, 0              /* *d     
384         brid    d_xfer_first_loop       /* loo    
385         addi    r4, r4, -1              /* n--    
386                                                   
387 d_dalign_done:                                    
388         addi    r4, r0, 32      /* n = 32 */      
389         cmpu    r4, r4, r7      /* n = c - n      
390         /* if n < 0, less than one block to tr    
391         blti    r4, d_block_done                  
392                                                   
393 d_block_xfer:                                     
394         andi    r4, r7, 0xffffffe0      /* n =    
395         rsub    r7, r4, r7              /* c =    
396                                                   
397         andi    r9, r6, 3               /* t1     
398         /* if temp != 0, unaligned transfers n    
399         bnei    r9, d_block_unaligned             
400                                                   
401 d_block_aligned:                                  
402         addi    r6, r6, -32             /* s =    
403         addi    r5, r5, -32             /* d =    
404         lwi     r9, r6, 28              /* t1     
405         lwi     r10, r6, 24             /* t2     
406         lwi     r11, r6, 20             /* t3     
407         lwi     r12, r6, 16             /* t4     
408         swi     r9, r5, 28              /* *(d    
409         swi     r10, r5, 24             /* *(d    
410         swi     r11, r5, 20             /* *(d    
411         swi     r12, r5, 16             /* *(d    
412         lwi     r9, r6, 12              /* t1     
413         lwi     r10, r6, 8              /* t2     
414         lwi     r11, r6, 4              /* t3     
415         lwi     r12, r6, 0              /* t4     
416         swi     r9, r5, 12              /* *(d    
417         swi     r10, r5, 8              /* *(d    
418         swi     r11, r5, 4              /* *(d    
419         addi    r4, r4, -32             /* n =    
420         bneid   r4, d_block_aligned     /* whi    
421         swi     r12, r5, 0              /* *(d    
422         bri     d_block_done                      
423                                                   
424 d_block_unaligned:                                
425         andi    r8, r6, 0xfffffffc      /* as     
426         rsub    r6, r4, r6              /* s =    
427         lwi     r11, r8, 0              /* h =    
428                                                   
429         addi    r9, r9, -1                        
430         beqi    r9,d_block_u1           /* t1     
431         addi    r9, r9, -1                        
432         beqi    r9,d_block_u2           /* t1     
433                                                   
434 d_block_u3:                                       
435         bsrli   r11, r11, 8     /* h = h >> 8     
436 d_bu3_loop:                                       
437         addi    r8, r8, -32     /* as = as - 3    
438         addi    r5, r5, -32     /* d = d - 32     
439         lwi     r12, r8, 28     /* v = *(as +     
440         bslli   r9, r12, 24     /* t1 = v << 2    
441         or      r9, r11, r9     /* t1 = h | t1    
442         swi     r9, r5, 28      /* *(d + 28) =    
443         bsrli   r11, r12, 8     /* h = v >> 8     
444         lwi     r12, r8, 24     /* v = *(as +     
445         bslli   r9, r12, 24     /* t1 = v << 2    
446         or      r9, r11, r9     /* t1 = h | t1    
447         swi     r9, r5, 24      /* *(d + 24) =    
448         bsrli   r11, r12, 8     /* h = v >> 8     
449         lwi     r12, r8, 20     /* v = *(as +     
450         bslli   r9, r12, 24     /* t1 = v << 2    
451         or      r9, r11, r9     /* t1 = h | t1    
452         swi     r9, r5, 20      /* *(d + 20) =    
453         bsrli   r11, r12, 8     /* h = v >> 8     
454         lwi     r12, r8, 16     /* v = *(as +     
455         bslli   r9, r12, 24     /* t1 = v << 2    
456         or      r9, r11, r9     /* t1 = h | t1    
457         swi     r9, r5, 16      /* *(d + 16) =    
458         bsrli   r11, r12, 8     /* h = v >> 8     
459         lwi     r12, r8, 12     /* v = *(as +     
460         bslli   r9, r12, 24     /* t1 = v << 2    
461         or      r9, r11, r9     /* t1 = h | t1    
462         swi     r9, r5, 12      /* *(d + 112)     
463         bsrli   r11, r12, 8     /* h = v >> 8     
464         lwi     r12, r8, 8      /* v = *(as +     
465         bslli   r9, r12, 24     /* t1 = v << 2    
466         or      r9, r11, r9     /* t1 = h | t1    
467         swi     r9, r5, 8       /* *(d + 8) =     
468         bsrli   r11, r12, 8     /* h = v >> 8     
469         lwi     r12, r8, 4      /* v = *(as +     
470         bslli   r9, r12, 24     /* t1 = v << 2    
471         or      r9, r11, r9     /* t1 = h | t1    
472         swi     r9, r5, 4       /* *(d + 4) =     
473         bsrli   r11, r12, 8     /* h = v >> 8     
474         lwi     r12, r8, 0      /* v = *(as +     
475         bslli   r9, r12, 24     /* t1 = v << 2    
476         or      r9, r11, r9     /* t1 = h | t1    
477         swi     r9, r5, 0       /* *(d + 0) =     
478         addi    r4, r4, -32     /* n = n - 32     
479         bneid   r4, d_bu3_loop  /* while (n) l    
480         bsrli   r11, r12, 8     /* h = v >> 8     
481         bri     d_block_done                      
482                                                   
483 d_block_u1:                                       
484         bsrli   r11, r11, 24    /* h = h >> 24    
485 d_bu1_loop:                                       
486         addi    r8, r8, -32     /* as = as - 3    
487         addi    r5, r5, -32     /* d = d - 32     
488         lwi     r12, r8, 28     /* v = *(as +     
489         bslli   r9, r12, 8      /* t1 = v << 8    
490         or      r9, r11, r9     /* t1 = h | t1    
491         swi     r9, r5, 28      /* *(d + 28) =    
492         bsrli   r11, r12, 24    /* h = v >> 24    
493         lwi     r12, r8, 24     /* v = *(as +     
494         bslli   r9, r12, 8      /* t1 = v << 8    
495         or      r9, r11, r9     /* t1 = h | t1    
496         swi     r9, r5, 24      /* *(d + 24) =    
497         bsrli   r11, r12, 24    /* h = v >> 24    
498         lwi     r12, r8, 20     /* v = *(as +     
499         bslli   r9, r12, 8      /* t1 = v << 8    
500         or      r9, r11, r9     /* t1 = h | t1    
501         swi     r9, r5, 20      /* *(d + 20) =    
502         bsrli   r11, r12, 24    /* h = v >> 24    
503         lwi     r12, r8, 16     /* v = *(as +     
504         bslli   r9, r12, 8      /* t1 = v << 8    
505         or      r9, r11, r9     /* t1 = h | t1    
506         swi     r9, r5, 16      /* *(d + 16) =    
507         bsrli   r11, r12, 24    /* h = v >> 24    
508         lwi     r12, r8, 12     /* v = *(as +     
509         bslli   r9, r12, 8      /* t1 = v << 8    
510         or      r9, r11, r9     /* t1 = h | t1    
511         swi     r9, r5, 12      /* *(d + 112)     
512         bsrli   r11, r12, 24    /* h = v >> 24    
513         lwi     r12, r8, 8      /* v = *(as +     
514         bslli   r9, r12, 8      /* t1 = v << 8    
515         or      r9, r11, r9     /* t1 = h | t1    
516         swi     r9, r5, 8       /* *(d + 8) =     
517         bsrli   r11, r12, 24    /* h = v >> 24    
518         lwi     r12, r8, 4      /* v = *(as +     
519         bslli   r9, r12, 8      /* t1 = v << 8    
520         or      r9, r11, r9     /* t1 = h | t1    
521         swi     r9, r5, 4       /* *(d + 4) =     
522         bsrli   r11, r12, 24    /* h = v >> 24    
523         lwi     r12, r8, 0      /* v = *(as +     
524         bslli   r9, r12, 8      /* t1 = v << 8    
525         or      r9, r11, r9     /* t1 = h | t1    
526         swi     r9, r5, 0       /* *(d + 0) =     
527         addi    r4, r4, -32     /* n = n - 32     
528         bneid   r4, d_bu1_loop  /* while (n) l    
529         bsrli   r11, r12, 24    /* h = v >> 24    
530         bri     d_block_done                      
531                                                   
532 d_block_u2:                                       
533         bsrli   r11, r11, 16    /* h = h >> 16    
534 d_bu2_loop:                                       
535         addi    r8, r8, -32     /* as = as - 3    
536         addi    r5, r5, -32     /* d = d - 32     
537         lwi     r12, r8, 28     /* v = *(as +     
538         bslli   r9, r12, 16     /* t1 = v << 1    
539         or      r9, r11, r9     /* t1 = h | t1    
540         swi     r9, r5, 28      /* *(d + 28) =    
541         bsrli   r11, r12, 16    /* h = v >> 16    
542         lwi     r12, r8, 24     /* v = *(as +     
543         bslli   r9, r12, 16     /* t1 = v << 1    
544         or      r9, r11, r9     /* t1 = h | t1    
545         swi     r9, r5, 24      /* *(d + 24) =    
546         bsrli   r11, r12, 16    /* h = v >> 16    
547         lwi     r12, r8, 20     /* v = *(as +     
548         bslli   r9, r12, 16     /* t1 = v << 1    
549         or      r9, r11, r9     /* t1 = h | t1    
550         swi     r9, r5, 20      /* *(d + 20) =    
551         bsrli   r11, r12, 16    /* h = v >> 16    
552         lwi     r12, r8, 16     /* v = *(as +     
553         bslli   r9, r12, 16     /* t1 = v << 1    
554         or      r9, r11, r9     /* t1 = h | t1    
555         swi     r9, r5, 16      /* *(d + 16) =    
556         bsrli   r11, r12, 16    /* h = v >> 16    
557         lwi     r12, r8, 12     /* v = *(as +     
558         bslli   r9, r12, 16     /* t1 = v << 1    
559         or      r9, r11, r9     /* t1 = h | t1    
560         swi     r9, r5, 12      /* *(d + 112)     
561         bsrli   r11, r12, 16    /* h = v >> 16    
562         lwi     r12, r8, 8      /* v = *(as +     
563         bslli   r9, r12, 16     /* t1 = v << 1    
564         or      r9, r11, r9     /* t1 = h | t1    
565         swi     r9, r5, 8       /* *(d + 8) =     
566         bsrli   r11, r12, 16    /* h = v >> 16    
567         lwi     r12, r8, 4      /* v = *(as +     
568         bslli   r9, r12, 16     /* t1 = v << 1    
569         or      r9, r11, r9     /* t1 = h | t1    
570         swi     r9, r5, 4       /* *(d + 4) =     
571         bsrli   r11, r12, 16    /* h = v >> 16    
572         lwi     r12, r8, 0      /* v = *(as +     
573         bslli   r9, r12, 16     /* t1 = v << 1    
574         or      r9, r11, r9     /* t1 = h | t1    
575         swi     r9, r5, 0       /* *(d + 0) =     
576         addi    r4, r4, -32     /* n = n - 32     
577         bneid   r4, d_bu2_loop  /* while (n) l    
578         bsrli   r11, r12, 16    /* h = v >> 16    
579                                                   
580 d_block_done:                                     
581         addi    r4, r0, 4       /* n = 4 */       
582         cmpu    r4, r4, r7      /* n = c - n      
583         blti    r4,d_xfer_end   /* if n < 0, l    
584                                                   
585 d_word_xfer:                                      
586         andi    r4, r7, 0xfffffffc      /* n =    
587         rsub    r5, r4, r5              /* d =    
588         rsub    r6, r4, r6              /* s =    
589         rsub    r7, r4, r7              /* c =    
590                                                   
591         andi    r9, r6, 3               /* t1     
592         /* if temp != 0, unaligned transfers n    
593         bnei    r9, d_word_unaligned              
594                                                   
595 d_word_aligned:                                   
596         addi    r4, r4,-4               /* n--    
597         lw      r9, r6, r4              /* t1     
598         bneid   r4, d_word_aligned      /* loo    
599         sw      r9, r5, r4              /* *(d    
600                                                   
601         bri     d_word_done                       
602                                                   
603 d_word_unaligned:                                 
604         andi    r8, r6, 0xfffffffc      /* as     
605         lw      r11, r8, r4             /* h =    
606                                                   
607         addi    r9, r9, -1                        
608         beqi    r9,d_word_u1            /* t1     
609         addi    r9, r9, -1                        
610         beqi    r9,d_word_u2            /* t1     
611                                                   
612 d_word_u3:                                        
613         bsrli   r11, r11, 8     /* h = h >> 8     
614 d_wu3_loop:                                       
615         addi    r4, r4,-4       /* n = n - 4 *    
616         lw      r12, r8, r4     /* v = *(as +     
617         bslli   r9, r12, 24     /* t1 = v << 2    
618         or      r9, r11, r9     /* t1 = h | t1    
619         sw      r9, r5, r4      /* *(d + n) =     
620         bneid   r4, d_wu3_loop  /* while (n) l    
621         bsrli   r11, r12, 8     /* h = v >> 8     
622                                                   
623         bri     d_word_done                       
624                                                   
625 d_word_u1:                                        
626         bsrli   r11, r11, 24    /* h = h >> 24    
627 d_wu1_loop:                                       
628         addi    r4, r4,-4       /* n = n - 4 *    
629         lw      r12, r8, r4     /* v = *(as +     
630         bslli   r9, r12, 8      /* t1 = v << 8    
631         or      r9, r11, r9     /* t1 = h | t1    
632         sw      r9, r5, r4      /* *(d + n) =     
633         bneid   r4, d_wu1_loop  /* while (n) l    
634         bsrli   r11, r12, 24    /* h = v >> 24    
635                                                   
636         bri     d_word_done                       
637                                                   
638 d_word_u2:                                        
639         bsrli   r11, r11, 16    /* h = h >> 16    
640 d_wu2_loop:                                       
641         addi    r4, r4,-4       /* n = n - 4 *    
642         lw      r12, r8, r4     /* v = *(as +     
643         bslli   r9, r12, 16     /* t1 = v << 1    
644         or      r9, r11, r9     /* t1 = h | t1    
645         sw      r9, r5, r4      /* *(d + n) =     
646         bneid   r4, d_wu2_loop  /* while (n) l    
647         bsrli   r11, r12, 16    /* h = v >> 16    
648                                                   
649 d_word_done:                                      
650                                                   
651 d_xfer_end:                                       
652 d_xfer_end_loop:                                  
653         beqi    r7, a_done              /* whi    
654         addi    r6, r6, -1              /* s--    
655         lbui    r9, r6, 0               /* t1     
656         addi    r5, r5, -1              /* d--    
657         sbi     r9, r5, 0               /* *d     
658         brid    d_xfer_end_loop         /* loo    
659         addi    r7, r7, -1              /* c--    
660                                                   
661 d_done:                                           
662         rtsd    r15, 8                            
663         nop                                       
664                                                   
665 .size  memmove, . - memmove                       
666 .end memmove                                      
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php