~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/sh/lib/memcpy-sh4.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/sh/lib/memcpy-sh4.S (Version linux-6.12-rc7) and /arch/alpha/lib/memcpy-sh4.S (Version linux-4.4.302)


  1 /* SPDX-License-Identifier: GPL-2.0 */            
  2 /*                                                
  3  * "memcpy" implementation of SuperH              
  4  *                                                
  5  * Copyright (C) 1999  Niibe Yutaka               
  6  * Copyright (c) 2002  STMicroelectronics Ltd     
  7  *   Modified from memcpy.S and micro-optimise    
  8  *   Stuart Menefy (stuart.menefy@st.com)         
  9  *                                                
 10  */                                               
 11 #include <linux/linkage.h>                        
 12                                                   
 13 /*                                                
 14  * void *memcpy(void *dst, const void *src, si    
 15  *                                                
 16  * It is assumed that there is no overlap betw    
 17  * If there is an overlap, then the results ar    
 18  */                                               
 19                                                   
 20         !                                         
 21         !       GHIJ KLMN OPQR -->  ...G HIJK     
 22         !                                         
 23                                                   
 24         ! Size is 16 or greater, and may have     
 25                                                   
 26         .balign 32                                
 27 .Lcase1:                                          
 28         ! Read a long word and write a long wo    
 29         ! At the start of each iteration, r7 c    
 30         add     #-1,r5          !  79 EX          
 31         mov     r4,r2           !   5 MT (0 cy    
 32                                                   
 33         mov.l   @(r0,r5),r7     !  21 LS (2 cy    
 34         add     #-4,r5          !  50 EX          
 35                                                   
 36         add     #7,r2           !  79 EX          
 37         !                                         
 38 #ifdef CONFIG_CPU_LITTLE_ENDIAN                   
 39         ! 6 cycles, 4 bytes per iteration         
 40 3:      mov.l   @(r0,r5),r1     !  21 LS (late    
 41         mov     r7, r3          !   5 MT (late    
 42                                                   
 43         cmp/hi  r2,r0           !  57 MT          
 44         shll16  r3              ! 103 EX          
 45                                                   
 46         mov     r1,r6           !   5 MT (late    
 47         shll8   r3              ! 102 EX          
 48                                                   
 49         shlr8   r6              ! 106 EX          
 50         mov     r1, r7          !   5 MT (late    
 51                                                   
 52         or      r6,r3           !  82 EX          
 53         bt/s    3b              ! 109 BR          
 54                                                   
 55          mov.l  r3,@-r0         !  30 LS          
 56 #else                                             
 57 3:      mov.l   @(r0,r5),r1     !  21 LS (late    
 58         mov     r7,r3           !   5 MT (late    
 59                                                   
 60         cmp/hi  r2,r0           !  57 MT          
 61         shlr16  r3              ! 107 EX          
 62                                                   
 63         shlr8   r3              ! 106 EX          
 64         mov     r1,r6           !   5 MT (late    
 65                                                   
 66         shll8   r6              ! 102 EX          
 67         mov     r1,r7           !   5 MT (late    
 68                                                   
 69         or      r6,r3           !  82 EX          
 70         bt/s    3b              ! 109 BR          
 71                                                   
 72          mov.l  r3,@-r0         !  30 LS          
 73 #endif                                            
 74         ! Finally, copy a byte at once, if nec    
 75                                                   
 76         add     #4,r5           !  50 EX          
 77         cmp/eq  r4,r0           !  54 MT          
 78                                                   
 79         add     #-6,r2          !  50 EX          
 80         bt      9f              ! 109 BR          
 81                                                   
 82 8:      cmp/hi  r2,r0           !  57 MT          
 83         mov.b   @(r0,r5),r1     !  20 LS (late    
 84                                                   
 85         bt/s    8b              ! 109 BR          
 86                                                   
 87          mov.b  r1,@-r0         !  29 LS          
 88                                                   
 89 9:      rts                                       
 90          nop                                      
 91                                                   
 92                                                   
 93         !                                         
 94         !       GHIJ KLMN OPQR -->  .GHI JKLM     
 95         !                                         
 96                                                   
 97         ! Size is 16 or greater, and may have     
 98                                                   
 99         .balign 32                                
100 .Lcase3:                                          
101         ! Read a long word and write a long wo    
102         ! At the start of each iteration, r7 c    
103         add     #-3,r5          ! 79 EX           
104         mov     r4,r2           !  5 MT (0 cyc    
105                                                   
106         mov.l   @(r0,r5),r7     ! 21 LS (2 cyc    
107         add     #-4,r5          ! 50 EX           
108                                                   
109         add     #7,r2           !  79 EX          
110         !                                         
111 #ifdef CONFIG_CPU_LITTLE_ENDIAN                   
112         ! 6 cycles, 4 bytes per iteration         
113 3:      mov.l   @(r0,r5),r1     !  21 LS (late    
114         mov     r7, r3          !   5 MT (late    
115                                                   
116         cmp/hi  r2,r0           !  57 MT          
117         shll8   r3              ! 102 EX          
118                                                   
119         mov     r1,r6           !   5 MT (late    
120         shlr16  r6              ! 107 EX          
121                                                   
122         shlr8   r6              ! 106 EX          
123         mov     r1, r7          !   5 MT (late    
124                                                   
125         or      r6,r3           !  82 EX          
126         bt/s    3b              ! 109 BR          
127                                                   
128          mov.l  r3,@-r0         !  30 LS          
129 #else                                             
130 3:      mov     r7,r3           ! OPQR            
131         shlr8   r3              ! xOPQ            
132         mov.l   @(r0,r5),r7     ! KLMN            
133         mov     r7,r6                             
134         shll16  r6                                
135         shll8   r6              ! Nxxx            
136         or      r6,r3           ! NOPQ            
137         cmp/hi  r2,r0                             
138         bt/s    3b                                
139          mov.l  r3,@-r0                           
140 #endif                                            
141                                                   
142         ! Finally, copy a byte at once, if nec    
143                                                   
144         add     #6,r5           !  50 EX          
145         cmp/eq  r4,r0           !  54 MT          
146                                                   
147         add     #-6,r2          !  50 EX          
148         bt      9f              ! 109 BR          
149                                                   
150 8:      cmp/hi  r2,r0           !  57 MT          
151         mov.b   @(r0,r5),r1     !  20 LS (late    
152                                                   
153         bt/s    8b              ! 109 BR          
154                                                   
155          mov.b  r1,@-r0         !  29 LS          
156                                                   
157 9:      rts                                       
158          nop                                      
159                                                   
160 ENTRY(memcpy)                                     
161                                                   
162         ! Calculate the invariants which will     
163         ! of the code:                            
164         !                                         
165         !      r4   -->  [ ...  ] DST             
166         !                [ ...  ]                 
167         !                  :                      
168         !      r0   -->  [ ...  ]       r0+r5     
169         !                                         
170         !                                         
171                                                   
172         ! Short circuit the common case of src    
173         ! and test for zero length move           
174                                                   
175         mov     r6, r0          !   5 MT (0 cy    
176         or      r4, r0          !  82 EX          
177                                                   
178         or      r5, r0          !  82 EX          
179         tst     r6, r6          !  86 MT          
180                                                   
181         bt/s    99f             ! 111 BR          
182          tst    #3, r0          !  87 MT          
183                                                   
184         mov     r4, r0          !   5 MT (0 cy    
185         add     r6, r0          !  49 EX          
186                                                   
187         mov     #16, r1         !   6 EX          
188         bt/s    .Lcase00        ! 111 BR          
189                                                   
190          sub    r4, r5          !  75 EX          
191                                                   
192         ! Arguments are not nicely long word a    
193         ! Check for small copies, and if so do    
194         !                                         
195         ! Deciding on an exact value of 'small    
196         ! using the optimised routines become     
197         ! cycle counts for differnet sizes usi    
198         !       size    byte-at-time    long      
199         !       16      42              39-40     
200         !       24      58              43-44     
201         !       36      82              49-50     
202         ! However the penalty for getting it '    
203         ! aligned data (and this is more commo    
204                                                   
205         cmp/gt  r6,r1           !  56 MT          
206                                                   
207         add     #-1,r5          !  50 EX          
208         bf/s    6f              ! 108 BR          
209                                                   
210          mov    r5, r3          !   5 MT (late    
211         shlr    r6              ! 104 EX          
212                                                   
213         mov.b   @(r0,r5),r1     !  20 LS (late    
214         bf/s    4f              ! 111 BR          
215                                                   
216          add    #-1,r3          !  50 EX          
217         tst     r6, r6          !  86 MT          
218                                                   
219         bt/s    98f             ! 110 BR          
220          mov.b  r1,@-r0         !  29 LS          
221                                                   
222         ! 4 cycles, 2 bytes per iteration         
223 3:      mov.b   @(r0,r5),r1     !  20 LS (late    
224                                                   
225 4:      mov.b   @(r0,r3),r2     !  20 LS (late    
226         dt      r6              !  67 EX          
227                                                   
228         mov.b   r1,@-r0         !  29 LS          
229         bf/s    3b              ! 111 BR          
230                                                   
231          mov.b  r2,@-r0         !  29 LS          
232 98:                                               
233         rts                                       
234          nop                                      
235                                                   
236 99:     rts                                       
237          mov    r4, r0                            
238                                                   
239         ! Size is not small, so its worthwhile    
240         ! First align destination to a long wo    
241         !                                         
242         ! r5 = normal value -1                    
243                                                   
244 6:      tst     #3, r0          !  87 MT          
245         mov     #3, r3          !   6 EX          
246                                                   
247         bt/s    2f              ! 111 BR          
248          and    r0,r3           !  78 EX          
249                                                   
250         ! 3 cycles, 1 byte per iteration          
251 1:      dt      r3              !  67 EX          
252         mov.b   @(r0,r5),r1     !  19 LS (late    
253                                                   
254         add     #-1, r6         !  79 EX          
255         bf/s    1b              ! 109 BR          
256                                                   
257          mov.b  r1,@-r0         !  28 LS          
258                                                   
259 2:      add     #1, r5          !  79 EX          
260                                                   
261         ! Now select the appropriate bulk tran    
262         ! alignment of src and dst.               
263                                                   
264         mov     r0, r3          !   5 MT (late    
265                                                   
266         mov     r5, r0          !   5 MT (late    
267         tst     #1, r0          !  87 MT          
268                                                   
269         bf/s    1f              ! 111 BR          
270          mov    #64, r7         !   6 EX          
271                                                   
272         ! bit 0 clear                             
273                                                   
274         cmp/ge  r7, r6          !  55 MT          
275                                                   
276         bt/s    2f              ! 111 BR          
277          tst    #2, r0          !  87 MT          
278                                                   
279         ! small                                   
280         bt/s    .Lcase0                           
281          mov    r3, r0                            
282                                                   
283         bra     .Lcase2                           
284          nop                                      
285                                                   
286         ! big                                     
287 2:      bt/s    .Lcase0b                          
288          mov    r3, r0                            
289                                                   
290         bra     .Lcase2b                          
291          nop                                      
292                                                   
293         ! bit 0 set                               
294 1:      tst     #2, r0          ! 87 MT           
295                                                   
296         bt/s    .Lcase1                           
297          mov    r3, r0                            
298                                                   
299         bra     .Lcase3                           
300          nop                                      
301                                                   
302                                                   
303         !                                         
304         !       GHIJ KLMN OPQR -->  GHIJ KLMN     
305         !                                         
306                                                   
307         ! src, dst and size are all long word     
308         ! size is non-zero                        
309                                                   
310         .balign 32                                
311 .Lcase00:                                         
312         mov     #64, r1         !   6 EX          
313         mov     r5, r3          !   5 MT (late    
314                                                   
315         cmp/gt  r6, r1          !  56 MT          
316         add     #-4, r5         !  50 EX          
317                                                   
318         bf      .Lcase00b       ! 108 BR          
319         shlr2   r6              ! 105 EX          
320                                                   
321         shlr    r6              ! 104 EX          
322         mov.l   @(r0, r5), r1   !  21 LS (late    
323                                                   
324         bf/s    4f              ! 111 BR          
325          add    #-8, r3         !  50 EX          
326                                                   
327         tst     r6, r6          !  86 MT          
328         bt/s    5f              ! 110 BR          
329                                                   
330          mov.l  r1,@-r0         !  30 LS          
331                                                   
332         ! 4 cycles, 2 long words per iteration    
333 3:      mov.l   @(r0, r5), r1   !  21 LS (late    
334                                                   
335 4:      mov.l   @(r0, r3), r2   !  21 LS (late    
336         dt      r6              !  67 EX          
337                                                   
338         mov.l   r1, @-r0        !  30 LS          
339         bf/s    3b              ! 109 BR          
340                                                   
341          mov.l  r2, @-r0        !  30 LS          
342                                                   
343 5:      rts                                       
344          nop                                      
345                                                   
346                                                   
347         ! Size is 16 or greater and less than     
348                                                   
349         .balign 32                                
350 .Lcase0:                                          
351         add     #-4, r5         !  50 EX          
352         mov     r4, r7          !   5 MT (late    
353                                                   
354         mov.l   @(r0, r5), r1   !  21 LS (late    
355         mov     #4, r2          !   6 EX          
356                                                   
357         add     #11, r7         !  50 EX          
358         tst     r2, r6          !  86 MT          
359                                                   
360         mov     r5, r3          !   5 MT (late    
361         bt/s    4f              ! 111 BR          
362                                                   
363          add    #-4, r3         !  50 EX          
364         mov.l   r1,@-r0         !  30 LS          
365                                                   
366         ! 4 cycles, 2 long words per iteration    
367 3:      mov.l   @(r0, r5), r1   !  21 LS (late    
368                                                   
369 4:      mov.l   @(r0, r3), r2   !  21 LS (late    
370         cmp/hi  r7, r0                            
371                                                   
372         mov.l   r1, @-r0        !  30 LS          
373         bt/s    3b              ! 109 BR          
374                                                   
375          mov.l  r2, @-r0        !  30 LS          
376                                                   
377         ! Copy the final 0-3 bytes                
378                                                   
379         add     #3,r5           !  50 EX          
380                                                   
381         cmp/eq  r0, r4          !  54 MT          
382         add     #-10, r7        !  50 EX          
383                                                   
384         bt      9f              ! 110 BR          
385                                                   
386         ! 3 cycles, 1 byte per iteration          
387 1:      mov.b   @(r0,r5),r1     !  19 LS          
388         cmp/hi  r7,r0           !  57 MT          
389                                                   
390         bt/s    1b              ! 111 BR          
391          mov.b  r1,@-r0         !  28 LS          
392                                                   
393 9:      rts                                       
394          nop                                      
395                                                   
396         ! Size is at least 64 bytes, so will b    
397         !                                         
398         !   r2 = rounded up r4                    
399         !   r3 = rounded down r0                  
400                                                   
401         .balign 32                                
402 .Lcase0b:                                         
403         add     #-4, r5         !  50 EX          
404                                                   
405 .Lcase00b:                                        
406         mov     r0, r3          !   5 MT (late    
407         mov     #(~0x1f), r1    !   6 EX          
408                                                   
409         and     r1, r3          !  78 EX          
410         mov     r4, r2          !   5 MT (late    
411                                                   
412         cmp/eq  r3, r0          !  54 MT          
413         add     #0x1f, r2       !  50 EX          
414                                                   
415         bt/s    1f              ! 110 BR          
416          and    r1, r2          !  78 EX          
417                                                   
418         ! copy initial words until cache line     
419                                                   
420         mov.l   @(r0, r5), r1   !  21 LS (late    
421         tst     #4, r0          !  87 MT          
422                                                   
423         mov     r5, r6          !   5 MT (late    
424         add     #-4, r6         !  50 EX          
425                                                   
426         bt/s    4f              ! 111 BR          
427          add    #8, r3          !  50 EX          
428                                                   
429         tst     #0x18, r0       !  87 MT          
430                                                   
431         bt/s    1f              ! 109 BR          
432          mov.l  r1,@-r0         !  30 LS          
433                                                   
434         ! 4 cycles, 2 long words per iteration    
435 3:      mov.l   @(r0, r5), r1   !  21 LS (late    
436                                                   
437 4:      mov.l   @(r0, r6), r7   !  21 LS (late    
438         cmp/eq  r3, r0          !  54 MT          
439                                                   
440         mov.l   r1, @-r0        !  30 LS          
441         bf/s    3b              ! 109 BR          
442                                                   
443          mov.l  r7, @-r0        !  30 LS          
444                                                   
445         ! Copy the cache line aligned blocks      
446         !                                         
447         ! In use: r0, r2, r4, r5                  
448         ! Scratch: r1, r3, r6, r7                 
449         !                                         
450         ! We could do this with the four scrat    
451         ! and dest hit the same cache line, th    
452         ! use of additional registers.            
453         !                                         
454         ! We also need r0 as a temporary (for     
455         !   r5:  src (was r0+r5)                  
456         !   r1:  dest (was r0)                    
457         ! this can be reversed at the end, so     
458         ! state.                                  
459         !                                         
460 1:      mov.l   r8, @-r15       !  30 LS          
461         add     r0, r5          !  49 EX          
462                                                   
463         mov.l   r9, @-r15       !  30 LS          
464         mov     r0, r1          !   5 MT (late    
465                                                   
466         mov.l   r10, @-r15      !  30 LS          
467         add     #-0x1c, r5      !  50 EX          
468                                                   
469         mov.l   r11, @-r15      !  30 LS          
470                                                   
471         ! 16 cycles, 32 bytes per iteration       
472 2:      mov.l   @(0x00,r5),r0   ! 18 LS (laten    
473         add     #-0x20, r1      ! 50 EX           
474         mov.l   @(0x04,r5),r3   ! 18 LS (laten    
475         mov.l   @(0x08,r5),r6   ! 18 LS (laten    
476         mov.l   @(0x0c,r5),r7   ! 18 LS (laten    
477         mov.l   @(0x10,r5),r8   ! 18 LS (laten    
478         mov.l   @(0x14,r5),r9   ! 18 LS (laten    
479         mov.l   @(0x18,r5),r10  ! 18 LS (laten    
480         mov.l   @(0x1c,r5),r11  ! 18 LS (laten    
481         movca.l r0,@r1          ! 40 LS (laten    
482         mov.l   r3,@(0x04,r1)   ! 33 LS           
483         mov.l   r6,@(0x08,r1)   ! 33 LS           
484         mov.l   r7,@(0x0c,r1)   ! 33 LS           
485                                                   
486         mov.l   r8,@(0x10,r1)   ! 33 LS           
487         add     #-0x20, r5      ! 50 EX           
488                                                   
489         mov.l   r9,@(0x14,r1)   ! 33 LS           
490         cmp/eq  r2,r1           ! 54 MT           
491                                                   
492         mov.l   r10,@(0x18,r1)  !  33 LS          
493         bf/s    2b              ! 109 BR          
494                                                   
495          mov.l  r11,@(0x1c,r1)  !  33 LS          
496                                                   
497         mov     r1, r0          !   5 MT (late    
498                                                   
499         mov.l   @r15+, r11      !  15 LS          
500         sub     r1, r5          !  75 EX          
501                                                   
502         mov.l   @r15+, r10      !  15 LS          
503         cmp/eq  r4, r0          !  54 MT          
504                                                   
505         bf/s    1f              ! 109 BR          
506          mov.l   @r15+, r9      !  15 LS          
507                                                   
508         rts                                       
509 1:       mov.l  @r15+, r8       !  15 LS          
510         sub     r4, r1          !  75 EX          
511                                                   
512         ! number of trailing bytes is non-zero    
513         !                                         
514         ! invariants restored (r5 already decr    
515         ! also r1=num bytes remaining             
516                                                   
517         mov     #4, r2          !   6 EX          
518         mov     r4, r7          !   5 MT (late    
519                                                   
520         add     #0x1c, r5       !  50 EX          
521         cmp/hs  r2, r1          !  58 MT          
522                                                   
523         bf/s    5f              ! 108 BR          
524          add     #11, r7        !  50 EX          
525                                                   
526         mov.l   @(r0, r5), r6   !  21 LS (late    
527         tst     r2, r1          !  86 MT          
528                                                   
529         mov     r5, r3          !   5 MT (late    
530         bt/s    4f              ! 111 BR          
531                                                   
532          add    #-4, r3         !  50 EX          
533         cmp/hs  r2, r1          !  58 MT          
534                                                   
535         bt/s    5f              ! 111 BR          
536          mov.l  r6,@-r0         !  30 LS          
537                                                   
538         ! 4 cycles, 2 long words per iteration    
539 3:      mov.l   @(r0, r5), r6   !  21 LS (late    
540                                                   
541 4:      mov.l   @(r0, r3), r2   !  21 LS (late    
542         cmp/hi  r7, r0                            
543                                                   
544         mov.l   r6, @-r0        !  30 LS          
545         bt/s    3b              ! 109 BR          
546                                                   
547          mov.l  r2, @-r0        !  30 LS          
548                                                   
549         ! Copy the final 0-3 bytes                
550                                                   
551 5:      cmp/eq  r0, r4          !  54 MT          
552         add     #-10, r7        !  50 EX          
553                                                   
554         bt      9f              ! 110 BR          
555         add     #3,r5           !  50 EX          
556                                                   
557         ! 3 cycles, 1 byte per iteration          
558 1:      mov.b   @(r0,r5),r1     !  19 LS          
559         cmp/hi  r7,r0           !  57 MT          
560                                                   
561         bt/s    1b              ! 111 BR          
562          mov.b  r1,@-r0         !  28 LS          
563                                                   
564 9:      rts                                       
565          nop                                      
566                                                   
567         !                                         
568         !       GHIJ KLMN OPQR -->  ..GH IJKL     
569         !                                         
570                                                   
571         .balign 32                                
572 .Lcase2:                                          
573         ! Size is 16 or greater and less then     
574                                                   
575 2:      mov     r5, r6          !   5 MT (late    
576         add     #-2,r5          !  50 EX          
577                                                   
578         mov     r4,r2           !   5 MT (late    
579         add     #-4,r6          !  50 EX          
580                                                   
581         add     #7,r2           !  50 EX          
582 3:      mov.w   @(r0,r5),r1     !  20 LS (late    
583                                                   
584         mov.w   @(r0,r6),r3     !  20 LS (late    
585         cmp/hi  r2,r0           !  57 MT          
586                                                   
587         mov.w   r1,@-r0         !  29 LS          
588         bt/s    3b              ! 111 BR          
589                                                   
590          mov.w  r3,@-r0         !  29 LS          
591                                                   
592         bra     10f                               
593          nop                                      
594                                                   
595                                                   
596         .balign 32                                
597 .Lcase2b:                                         
598         ! Size is at least 64 bytes, so will b    
599         !                                         
600         !   r2 = rounded up r4                    
601         !   r3 = rounded down r0                  
602                                                   
603         mov     r0, r3          !   5 MT (late    
604         mov     #(~0x1f), r1    !   6 EX          
605                                                   
606         and     r1, r3          !  78 EX          
607         mov     r4, r2          !   5 MT (late    
608                                                   
609         cmp/eq  r3, r0          !  54 MT          
610         add     #0x1f, r2       !  50 EX          
611                                                   
612         add     #-2, r5         !  50 EX          
613         bt/s    1f              ! 110 BR          
614          and    r1, r2          !  78 EX          
615                                                   
616         ! Copy a short word one at a time unti    
617         !   Normal values: r0, r2, r3, r4         
618         !   Unused: r1, r6, r7                    
619         !   Mod: r5 (=r5-2)                       
620         !                                         
621         add     #2, r3          !  50 EX          
622                                                   
623 2:      mov.w   @(r0,r5),r1     !  20 LS (late    
624         cmp/eq  r3,r0           !  54 MT          
625                                                   
626         bf/s    2b              ! 111 BR          
627                                                   
628          mov.w  r1,@-r0         !  29 LS          
629                                                   
630         ! Copy the cache line aligned blocks      
631         !                                         
632         ! In use: r0, r2, r4, r5 (=r5-2)          
633         ! Scratch: r1, r3, r6, r7                 
634         !                                         
635         ! We could do this with the four scrat    
636         ! and dest hit the same cache line, th    
637         ! use of additional registers.            
638         !                                         
639         ! We also need r0 as a temporary (for     
640         !   r5:  src (was r0+r5)                  
641         !   r1:  dest (was r0)                    
642         ! this can be reversed at the end, so     
643         ! state.                                  
644         !                                         
645 1:      mov.l   r8, @-r15       !  30 LS          
646         add     r0, r5          !  49 EX          
647                                                   
648         mov.l   r9, @-r15       !  30 LS          
649         mov     r0, r1          !   5 MT (late    
650                                                   
651         mov.l   r10, @-r15      !  30 LS          
652         add     #-0x1e, r5      !  50 EX          
653                                                   
654         mov.l   r11, @-r15      !  30 LS          
655                                                   
656         mov.l   r12, @-r15      !  30 LS          
657                                                   
658         ! 17 cycles, 32 bytes per iteration       
659 #ifdef CONFIG_CPU_LITTLE_ENDIAN                   
660 2:      mov.w   @r5+, r0        !  14 LS (late    
661         add     #-0x20, r1      !  50 EX          
662                                                   
663         mov.l   @r5+, r3        !  15 LS (late    
664                                                   
665         mov.l   @r5+, r6        !  15 LS (late    
666         shll16  r0              ! 103 EX          
667                                                   
668         mov.l   @r5+, r7        !  15 LS (late    
669         xtrct   r3, r0          !  48 EX          
670                                                   
671         mov.l   @r5+, r8        !  15 LS (late    
672         xtrct   r6, r3          !  48 EX          
673                                                   
674         mov.l   @r5+, r9        !  15 LS (late    
675         xtrct   r7, r6          !  48 EX          
676                                                   
677         mov.l   @r5+, r10       !  15 LS (late    
678         xtrct   r8, r7          !  48 EX          
679                                                   
680         mov.l   @r5+, r11       !  15 LS (late    
681         xtrct   r9, r8          !  48 EX          
682                                                   
683         mov.w   @r5+, r12       !  15 LS (late    
684         xtrct   r10, r9         !  48 EX          
685                                                   
686         movca.l r0,@r1          !  40 LS (late    
687         xtrct   r11, r10        !  48 EX          
688                                                   
689         mov.l   r3, @(0x04,r1)  !  33 LS          
690         xtrct   r12, r11        !  48 EX          
691                                                   
692         mov.l   r6, @(0x08,r1)  !  33 LS          
693                                                   
694         mov.l   r7, @(0x0c,r1)  !  33 LS          
695                                                   
696         mov.l   r8, @(0x10,r1)  !  33 LS          
697         add     #-0x40, r5      !  50 EX          
698                                                   
699         mov.l   r9, @(0x14,r1)  !  33 LS          
700         cmp/eq  r2,r1           !  54 MT          
701                                                   
702         mov.l   r10, @(0x18,r1) !  33 LS          
703         bf/s    2b              ! 109 BR          
704                                                   
705          mov.l  r11, @(0x1c,r1) !  33 LS          
706 #else                                             
707 2:      mov.w   @(0x1e,r5), r0  !  17 LS (late    
708         add     #-2, r5         !  50 EX          
709                                                   
710         mov.l   @(0x1c,r5), r3  !  18 LS (late    
711         add     #-4, r1         !  50 EX          
712                                                   
713         mov.l   @(0x18,r5), r6  !  18 LS (late    
714         shll16  r0              ! 103 EX          
715                                                   
716         mov.l   @(0x14,r5), r7  !  18 LS (late    
717         xtrct   r3, r0          !  48 EX          
718                                                   
719         mov.l   @(0x10,r5), r8  !  18 LS (late    
720         xtrct   r6, r3          !  48 EX          
721                                                   
722         mov.l   @(0x0c,r5), r9  !  18 LS (late    
723         xtrct   r7, r6          !  48 EX          
724                                                   
725         mov.l   @(0x08,r5), r10 !  18 LS (late    
726         xtrct   r8, r7          !  48 EX          
727                                                   
728         mov.l   @(0x04,r5), r11 !  18 LS (late    
729         xtrct   r9, r8          !  48 EX          
730                                                   
731         mov.l   @(0x00,r5), r12 !  18 LS (late    
732         xtrct   r10, r9         !  48 EX          
733                                                   
734         movca.l r0,@r1          !  40 LS (late    
735         add     #-0x1c, r1      !  50 EX          
736                                                   
737         mov.l   r3, @(0x18,r1)  !  33 LS          
738         xtrct   r11, r10        !  48 EX          
739                                                   
740         mov.l   r6, @(0x14,r1)  !  33 LS          
741         xtrct   r12, r11        !  48 EX          
742                                                   
743         mov.l   r7, @(0x10,r1)  !  33 LS          
744                                                   
745         mov.l   r8, @(0x0c,r1)  !  33 LS          
746         add     #-0x1e, r5      !  50 EX          
747                                                   
748         mov.l   r9, @(0x08,r1)  !  33 LS          
749         cmp/eq  r2,r1           !  54 MT          
750                                                   
751         mov.l   r10, @(0x04,r1) !  33 LS          
752         bf/s    2b              ! 109 BR          
753                                                   
754          mov.l  r11, @(0x00,r1) !  33 LS          
755 #endif                                            
756                                                   
757         mov.l   @r15+, r12                        
758         mov     r1, r0          !   5 MT (late    
759                                                   
760         mov.l   @r15+, r11      !  15 LS          
761         sub     r1, r5          !  75 EX          
762                                                   
763         mov.l   @r15+, r10      !  15 LS          
764         cmp/eq  r4, r0          !  54 MT          
765                                                   
766         bf/s    1f              ! 109 BR          
767          mov.l   @r15+, r9      !  15 LS          
768                                                   
769         rts                                       
770 1:       mov.l  @r15+, r8       !  15 LS          
771                                                   
772         add     #0x1e, r5       !  50 EX          
773                                                   
774         ! Finish off a short word at a time       
775         ! r5 must be invariant - 2                
776 10:     mov     r4,r2           !   5 MT (late    
777         add     #1,r2           !  50 EX          
778                                                   
779         cmp/hi  r2, r0          !  57 MT          
780         bf/s    1f              ! 109 BR          
781                                                   
782          add    #2, r2          !  50 EX          
783                                                   
784 3:      mov.w   @(r0,r5),r1     !  20 LS          
785         cmp/hi  r2,r0           !  57 MT          
786                                                   
787         bt/s    3b              ! 109 BR          
788                                                   
789          mov.w  r1,@-r0         !  29 LS          
790 1:                                                
791                                                   
792         !                                         
793         ! Finally, copy the last byte if neces    
794         cmp/eq  r4,r0           !  54 MT          
795         bt/s    9b                                
796          add    #1,r5                             
797         mov.b   @(r0,r5),r1                       
798         rts                                       
799          mov.b  r1,@-r0                           
800                                                   
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php