~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/xtensa/lib/checksum.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/xtensa/lib/checksum.S (Version linux-6.12-rc7) and /arch/i386/lib/checksum.S (Version linux-5.17.15)


  1 /* SPDX-License-Identifier: GPL-2.0-or-later *    
  2 /*                                                
  3  * INET         An implementation of the TCP/I    
  4  *              operating system.  INET is imp    
  5  *              interface as the means of comm    
  6  *                                                
  7  *              IP/TCP/UDP checksumming routin    
  8  *                                                
  9  * Xtensa version:  Copyright (C) 2001 Tensili    
 10  *                  Optimized by Joe Taylor       
 11  */                                               
 12                                                   
 13 #include <linux/errno.h>                          
 14 #include <linux/linkage.h>                        
 15 #include <asm/asmmacro.h>                         
 16 #include <asm/core.h>                             
 17                                                   
 18 /*                                                
 19  * computes a partial checksum, e.g. for TCP/U    
 20  */                                               
 21                                                   
 22 /*                                                
 23  * unsigned int csum_partial(const unsigned ch    
 24  *                           unsigned int sum)    
 25  *    a2 = buf                                    
 26  *    a3 = len                                    
 27  *    a4 = sum                                    
 28  *                                                
 29  * This function assumes 2- or 4-byte alignmen    
 30  */                                               
 31                                                   
 32 /* ONES_ADD converts twos-complement math to o    
 33 #define ONES_ADD(sum, val)        \               
 34         add     sum, sum, val   ; \               
 35         bgeu    sum, val, 99f   ; \               
 36         addi    sum, sum, 1     ; \               
 37 99:                             ;                 
 38                                                   
 39 .text                                             
 40 ENTRY(csum_partial)                               
 41                                                   
 42         /*                                        
 43          * Experiments with Ethernet and SLIP     
 44          * is aligned on either a 2-byte or 4-    
 45          */                                       
 46         abi_entry_default                         
 47         extui   a5, a2, 0, 2                      
 48         bnez    a5, 8f          /* branch if 2    
 49         /* Fall-through on common case, 4-byte    
 50 1:                                                
 51         srli    a5, a3, 5       /* 32-byte chu    
 52 #if XCHAL_HAVE_LOOPS                              
 53         loopgtz a5, 2f                            
 54 #else                                             
 55         beqz    a5, 2f                            
 56         slli    a5, a5, 5                         
 57         add     a5, a5, a2      /* a5 = end of    
 58 .Loop1:                                           
 59 #endif                                            
 60         l32i    a6, a2, 0                         
 61         l32i    a7, a2, 4                         
 62         ONES_ADD(a4, a6)                          
 63         ONES_ADD(a4, a7)                          
 64         l32i    a6, a2, 8                         
 65         l32i    a7, a2, 12                        
 66         ONES_ADD(a4, a6)                          
 67         ONES_ADD(a4, a7)                          
 68         l32i    a6, a2, 16                        
 69         l32i    a7, a2, 20                        
 70         ONES_ADD(a4, a6)                          
 71         ONES_ADD(a4, a7)                          
 72         l32i    a6, a2, 24                        
 73         l32i    a7, a2, 28                        
 74         ONES_ADD(a4, a6)                          
 75         ONES_ADD(a4, a7)                          
 76         addi    a2, a2, 4*8                       
 77 #if !XCHAL_HAVE_LOOPS                             
 78         blt     a2, a5, .Loop1                    
 79 #endif                                            
 80 2:                                                
 81         extui   a5, a3, 2, 3    /* remaining 4    
 82 #if XCHAL_HAVE_LOOPS                              
 83         loopgtz a5, 3f                            
 84 #else                                             
 85         beqz    a5, 3f                            
 86         slli    a5, a5, 2                         
 87         add     a5, a5, a2      /* a5 = end of    
 88 .Loop2:                                           
 89 #endif                                            
 90         l32i    a6, a2, 0                         
 91         ONES_ADD(a4, a6)                          
 92         addi    a2, a2, 4                         
 93 #if !XCHAL_HAVE_LOOPS                             
 94         blt     a2, a5, .Loop2                    
 95 #endif                                            
 96 3:                                                
 97         _bbci.l a3, 1, 5f       /* remaining 2    
 98         l16ui   a6, a2, 0                         
 99         ONES_ADD(a4, a6)                          
100         addi    a2, a2, 2                         
101 5:                                                
102         _bbci.l a3, 0, 7f       /* remaining 1    
103 6:      l8ui    a6, a2, 0                         
104 #ifdef __XTENSA_EB__                              
105         slli    a6, a6, 8       /* load byte i    
106 #endif                                            
107         ONES_ADD(a4, a6)                          
108 7:                                                
109         mov     a2, a4                            
110         abi_ret_default                           
111                                                   
112         /* uncommon case, buf is 2-byte aligne    
113 8:                                                
114         beqz    a3, 7b          /* branch if l    
115         beqi    a3, 1, 6b       /* branch if l    
116                                                   
117         extui   a5, a2, 0, 1                      
118         bnez    a5, 8f          /* branch if 1    
119                                                   
120         l16ui   a6, a2, 0       /* common case    
121         ONES_ADD(a4, a6)                          
122         addi    a2, a2, 2       /* adjust buf     
123         addi    a3, a3, -2      /* adjust len     
124         j       1b              /* now buf is     
125                                                   
126         /* case: odd-byte aligned, len > 1        
127          * This case is dog slow, so don't giv    
128          * (I don't think this ever happens, b    
129          */                                       
130 8:                                                
131         srli    a5, a3, 2       /* 4-byte chun    
132 #if XCHAL_HAVE_LOOPS                              
133         loopgtz a5, 2f                            
134 #else                                             
135         beqz    a5, 2f                            
136         slli    a5, a5, 2                         
137         add     a5, a5, a2      /* a5 = end of    
138 .Loop3:                                           
139 #endif                                            
140         l8ui    a6, a2, 0       /* bits 24..31    
141         l16ui   a7, a2, 1       /* bits  8..23    
142         l8ui    a8, a2, 3       /* bits  0.. 8    
143 #ifdef  __XTENSA_EB__                             
144         slli    a6, a6, 24                        
145 #else                                             
146         slli    a8, a8, 24                        
147 #endif                                            
148         slli    a7, a7, 8                         
149         or      a7, a7, a6                        
150         or      a7, a7, a8                        
151         ONES_ADD(a4, a7)                          
152         addi    a2, a2, 4                         
153 #if !XCHAL_HAVE_LOOPS                             
154         blt     a2, a5, .Loop3                    
155 #endif                                            
156 2:                                                
157         _bbci.l a3, 1, 3f       /* remaining 2    
158         l8ui    a6, a2, 0                         
159         l8ui    a7, a2, 1                         
160 #ifdef  __XTENSA_EB__                             
161         slli    a6, a6, 8                         
162 #else                                             
163         slli    a7, a7, 8                         
164 #endif                                            
165         or      a7, a7, a6                        
166         ONES_ADD(a4, a7)                          
167         addi    a2, a2, 2                         
168 3:                                                
169         j       5b              /* branch to h    
170                                                   
171 ENDPROC(csum_partial)                             
172 EXPORT_SYMBOL(csum_partial)                       
173                                                   
174 /*                                                
175  * Copy from ds while checksumming, otherwise     
176  */                                               
177                                                   
178 /*                                                
179 unsigned int csum_partial_copy_generic (const     
180         a2  = src                                 
181         a3  = dst                                 
182         a4  = len                                 
183         a5  = sum                                 
184         a8  = temp                                
185         a9  = temp                                
186         a10 = temp                                
187                                                   
188     This function is optimized for 4-byte alig    
189     alignments work, but not nearly as efficie    
190  */                                               
191                                                   
192 ENTRY(csum_partial_copy_generic)                  
193                                                   
194         abi_entry_default                         
195         movi    a5, -1                            
196         or      a10, a2, a3                       
197                                                   
198         /* We optimize the following alignment    
199         aligned case.  Two bbsi.l instructions    
200         (commented out below).  However, both     
201         of the imm8 range, so the assembler re    
202         equivalent bbci.l, j combinations, whi    
203         slower. */                                
204                                                   
205         extui   a9, a10, 0, 2                     
206         beqz    a9, 1f          /* branch if b    
207         bbsi.l  a10, 0, 5f      /* branch if o    
208         j       3f              /* one address    
209                                                   
210 /*      _bbsi.l a10, 0, 5f */   /* branch if o    
211 /*      _bbsi.l a10, 1, 3f */   /* branch if 2    
212                                                   
213 1:                                                
214         /* src and dst are both 4-byte aligned    
215         srli    a10, a4, 5      /* 32-byte chu    
216 #if XCHAL_HAVE_LOOPS                              
217         loopgtz a10, 2f                           
218 #else                                             
219         beqz    a10, 2f                           
220         slli    a10, a10, 5                       
221         add     a10, a10, a2    /* a10 = end o    
222 .Loop5:                                           
223 #endif                                            
224 EX(10f) l32i    a9, a2, 0                         
225 EX(10f) l32i    a8, a2, 4                         
226 EX(10f) s32i    a9, a3, 0                         
227 EX(10f) s32i    a8, a3, 4                         
228         ONES_ADD(a5, a9)                          
229         ONES_ADD(a5, a8)                          
230 EX(10f) l32i    a9, a2, 8                         
231 EX(10f) l32i    a8, a2, 12                        
232 EX(10f) s32i    a9, a3, 8                         
233 EX(10f) s32i    a8, a3, 12                        
234         ONES_ADD(a5, a9)                          
235         ONES_ADD(a5, a8)                          
236 EX(10f) l32i    a9, a2, 16                        
237 EX(10f) l32i    a8, a2, 20                        
238 EX(10f) s32i    a9, a3, 16                        
239 EX(10f) s32i    a8, a3, 20                        
240         ONES_ADD(a5, a9)                          
241         ONES_ADD(a5, a8)                          
242 EX(10f) l32i    a9, a2, 24                        
243 EX(10f) l32i    a8, a2, 28                        
244 EX(10f) s32i    a9, a3, 24                        
245 EX(10f) s32i    a8, a3, 28                        
246         ONES_ADD(a5, a9)                          
247         ONES_ADD(a5, a8)                          
248         addi    a2, a2, 32                        
249         addi    a3, a3, 32                        
250 #if !XCHAL_HAVE_LOOPS                             
251         blt     a2, a10, .Loop5                   
252 #endif                                            
253 2:                                                
254         extui   a10, a4, 2, 3   /* remaining 4    
255         extui   a4, a4, 0, 2    /* reset len f    
256 #if XCHAL_HAVE_LOOPS                              
257         loopgtz a10, 3f                           
258 #else                                             
259         beqz    a10, 3f                           
260         slli    a10, a10, 2                       
261         add     a10, a10, a2    /* a10 = end o    
262 .Loop6:                                           
263 #endif                                            
264 EX(10f) l32i    a9, a2, 0                         
265 EX(10f) s32i    a9, a3, 0                         
266         ONES_ADD(a5, a9)                          
267         addi    a2, a2, 4                         
268         addi    a3, a3, 4                         
269 #if !XCHAL_HAVE_LOOPS                             
270         blt     a2, a10, .Loop6                   
271 #endif                                            
272 3:                                                
273         /*                                        
274         Control comes to here in two cases: (1    
275         to here from the 4-byte alignment case    
276         one 2-byte chunk.  (2) It branches to     
277         either src or dst is 2-byte aligned, a    
278         here, except for perhaps a trailing od    
279         inefficient, so align your addresses t    
280                                                   
281         a2 = src                                  
282         a3 = dst                                  
283         a4 = len                                  
284         a5 = sum                                  
285         */                                        
286         srli    a10, a4, 1      /* 2-byte chun    
287 #if XCHAL_HAVE_LOOPS                              
288         loopgtz a10, 4f                           
289 #else                                             
290         beqz    a10, 4f                           
291         slli    a10, a10, 1                       
292         add     a10, a10, a2    /* a10 = end o    
293 .Loop7:                                           
294 #endif                                            
295 EX(10f) l16ui   a9, a2, 0                         
296 EX(10f) s16i    a9, a3, 0                         
297         ONES_ADD(a5, a9)                          
298         addi    a2, a2, 2                         
299         addi    a3, a3, 2                         
300 #if !XCHAL_HAVE_LOOPS                             
301         blt     a2, a10, .Loop7                   
302 #endif                                            
303 4:                                                
304         /* This section processes a possible t    
305         _bbci.l a4, 0, 8f       /* 1-byte chun    
306 EX(10f) l8ui    a9, a2, 0                         
307 EX(10f) s8i     a9, a3, 0                         
308 #ifdef __XTENSA_EB__                              
309         slli    a9, a9, 8       /* shift byte     
310 #endif                                            
311         ONES_ADD(a5, a9)                          
312 8:                                                
313         mov     a2, a5                            
314         abi_ret_default                           
315                                                   
316 5:                                                
317         /* Control branch to here when either     
318         process all bytes using 8-bit accesses    
319         so don't feed us an odd address. */       
320                                                   
321         srli    a10, a4, 1      /* handle in p    
322 #if XCHAL_HAVE_LOOPS                              
323         loopgtz a10, 6f                           
324 #else                                             
325         beqz    a10, 6f                           
326         slli    a10, a10, 1                       
327         add     a10, a10, a2    /* a10 = end o    
328 .Loop8:                                           
329 #endif                                            
330 EX(10f) l8ui    a9, a2, 0                         
331 EX(10f) l8ui    a8, a2, 1                         
332 EX(10f) s8i     a9, a3, 0                         
333 EX(10f) s8i     a8, a3, 1                         
334 #ifdef __XTENSA_EB__                              
335         slli    a9, a9, 8       /* combine int    
336 #else                           /* for checksu    
337         slli    a8, a8, 8                         
338 #endif                                            
339         or      a9, a9, a8                        
340         ONES_ADD(a5, a9)                          
341         addi    a2, a2, 2                         
342         addi    a3, a3, 2                         
343 #if !XCHAL_HAVE_LOOPS                             
344         blt     a2, a10, .Loop8                   
345 #endif                                            
346 6:                                                
347         j       4b              /* process the    
348                                                   
349 ENDPROC(csum_partial_copy_generic)                
350 EXPORT_SYMBOL(csum_partial_copy_generic)          
351                                                   
352                                                   
353 # Exception handler:                              
354 .section .fixup, "ax"                             
355 10:                                               
356         movi    a2, 0                             
357         abi_ret_default                           
358                                                   
359 .previous                                         
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php