~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/xtensa/lib/checksum.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0-or-later */
  2 /*
  3  * INET         An implementation of the TCP/IP protocol suite for the LINUX
  4  *              operating system.  INET is implemented using the  BSD Socket
  5  *              interface as the means of communication with the user level.
  6  *
  7  *              IP/TCP/UDP checksumming routines
  8  *
  9  * Xtensa version:  Copyright (C) 2001 Tensilica, Inc. by Kevin Chea
 10  *                  Optimized by Joe Taylor
 11  */
 12 
 13 #include <linux/errno.h>
 14 #include <linux/linkage.h>
 15 #include <asm/asmmacro.h>
 16 #include <asm/core.h>
 17 
 18 /*
 19  * computes a partial checksum, e.g. for TCP/UDP fragments
 20  */
 21 
 22 /*
 23  * unsigned int csum_partial(const unsigned char *buf, int len,
 24  *                           unsigned int sum);
 25  *    a2 = buf
 26  *    a3 = len
 27  *    a4 = sum
 28  *
 29  * This function assumes 2- or 4-byte alignment.  Other alignments will fail!
 30  */
 31 
 32 /* ONES_ADD converts twos-complement math to ones-complement. */
 33 #define ONES_ADD(sum, val)        \
 34         add     sum, sum, val   ; \
 35         bgeu    sum, val, 99f   ; \
 36         addi    sum, sum, 1     ; \
 37 99:                             ;
 38 
 39 .text
 40 ENTRY(csum_partial)
 41 
 42         /*
 43          * Experiments with Ethernet and SLIP connections show that buf
 44          * is aligned on either a 2-byte or 4-byte boundary.
 45          */
 46         abi_entry_default
 47         extui   a5, a2, 0, 2
 48         bnez    a5, 8f          /* branch if 2-byte aligned */
 49         /* Fall-through on common case, 4-byte alignment */
 50 1:
 51         srli    a5, a3, 5       /* 32-byte chunks */
 52 #if XCHAL_HAVE_LOOPS
 53         loopgtz a5, 2f
 54 #else
 55         beqz    a5, 2f
 56         slli    a5, a5, 5
 57         add     a5, a5, a2      /* a5 = end of last 32-byte chunk */
 58 .Loop1:
 59 #endif
 60         l32i    a6, a2, 0
 61         l32i    a7, a2, 4
 62         ONES_ADD(a4, a6)
 63         ONES_ADD(a4, a7)
 64         l32i    a6, a2, 8
 65         l32i    a7, a2, 12
 66         ONES_ADD(a4, a6)
 67         ONES_ADD(a4, a7)
 68         l32i    a6, a2, 16
 69         l32i    a7, a2, 20
 70         ONES_ADD(a4, a6)
 71         ONES_ADD(a4, a7)
 72         l32i    a6, a2, 24
 73         l32i    a7, a2, 28
 74         ONES_ADD(a4, a6)
 75         ONES_ADD(a4, a7)
 76         addi    a2, a2, 4*8
 77 #if !XCHAL_HAVE_LOOPS
 78         blt     a2, a5, .Loop1
 79 #endif
 80 2:
 81         extui   a5, a3, 2, 3    /* remaining 4-byte chunks */
 82 #if XCHAL_HAVE_LOOPS
 83         loopgtz a5, 3f
 84 #else
 85         beqz    a5, 3f
 86         slli    a5, a5, 2
 87         add     a5, a5, a2      /* a5 = end of last 4-byte chunk */
 88 .Loop2:
 89 #endif
 90         l32i    a6, a2, 0
 91         ONES_ADD(a4, a6)
 92         addi    a2, a2, 4
 93 #if !XCHAL_HAVE_LOOPS
 94         blt     a2, a5, .Loop2
 95 #endif
 96 3:
 97         _bbci.l a3, 1, 5f       /* remaining 2-byte chunk */
 98         l16ui   a6, a2, 0
 99         ONES_ADD(a4, a6)
100         addi    a2, a2, 2
101 5:
102         _bbci.l a3, 0, 7f       /* remaining 1-byte chunk */
103 6:      l8ui    a6, a2, 0
104 #ifdef __XTENSA_EB__
105         slli    a6, a6, 8       /* load byte into bits 8..15 */
106 #endif
107         ONES_ADD(a4, a6)
108 7:
109         mov     a2, a4
110         abi_ret_default
111 
112         /* uncommon case, buf is 2-byte aligned */
113 8:
114         beqz    a3, 7b          /* branch if len == 0 */
115         beqi    a3, 1, 6b       /* branch if len == 1 */
116 
117         extui   a5, a2, 0, 1
118         bnez    a5, 8f          /* branch if 1-byte aligned */
119 
120         l16ui   a6, a2, 0       /* common case, len >= 2 */
121         ONES_ADD(a4, a6)
122         addi    a2, a2, 2       /* adjust buf */
123         addi    a3, a3, -2      /* adjust len */
124         j       1b              /* now buf is 4-byte aligned */
125 
126         /* case: odd-byte aligned, len > 1
127          * This case is dog slow, so don't give us an odd address.
128          * (I don't think this ever happens, but just in case.)
129          */
130 8:
131         srli    a5, a3, 2       /* 4-byte chunks */
132 #if XCHAL_HAVE_LOOPS
133         loopgtz a5, 2f
134 #else
135         beqz    a5, 2f
136         slli    a5, a5, 2
137         add     a5, a5, a2      /* a5 = end of last 4-byte chunk */
138 .Loop3:
139 #endif
140         l8ui    a6, a2, 0       /* bits 24..31 */
141         l16ui   a7, a2, 1       /* bits  8..23 */
142         l8ui    a8, a2, 3       /* bits  0.. 8 */
143 #ifdef  __XTENSA_EB__
144         slli    a6, a6, 24
145 #else
146         slli    a8, a8, 24
147 #endif
148         slli    a7, a7, 8
149         or      a7, a7, a6
150         or      a7, a7, a8
151         ONES_ADD(a4, a7)
152         addi    a2, a2, 4
153 #if !XCHAL_HAVE_LOOPS
154         blt     a2, a5, .Loop3
155 #endif
156 2:
157         _bbci.l a3, 1, 3f       /* remaining 2-byte chunk, still odd addr */
158         l8ui    a6, a2, 0
159         l8ui    a7, a2, 1
160 #ifdef  __XTENSA_EB__
161         slli    a6, a6, 8
162 #else
163         slli    a7, a7, 8
164 #endif
165         or      a7, a7, a6
166         ONES_ADD(a4, a7)
167         addi    a2, a2, 2
168 3:
169         j       5b              /* branch to handle the remaining byte */
170 
171 ENDPROC(csum_partial)
172 EXPORT_SYMBOL(csum_partial)
173 
174 /*
175  * Copy from ds while checksumming, otherwise like csum_partial
176  */
177 
178 /*
179 unsigned int csum_partial_copy_generic (const char *src, char *dst, int len)
180         a2  = src
181         a3  = dst
182         a4  = len
183         a5  = sum
184         a8  = temp
185         a9  = temp
186         a10 = temp
187 
188     This function is optimized for 4-byte aligned addresses.  Other
189     alignments work, but not nearly as efficiently.
190  */
191 
192 ENTRY(csum_partial_copy_generic)
193 
194         abi_entry_default
195         movi    a5, -1
196         or      a10, a2, a3
197 
198         /* We optimize the following alignment tests for the 4-byte
199         aligned case.  Two bbsi.l instructions might seem more optimal
200         (commented out below).  However, both labels 5: and 3: are out
201         of the imm8 range, so the assembler relaxes them into
202         equivalent bbci.l, j combinations, which is actually
203         slower. */
204 
205         extui   a9, a10, 0, 2
206         beqz    a9, 1f          /* branch if both are 4-byte aligned */
207         bbsi.l  a10, 0, 5f      /* branch if one address is odd */
208         j       3f              /* one address is 2-byte aligned */
209 
210 /*      _bbsi.l a10, 0, 5f */   /* branch if odd address */
211 /*      _bbsi.l a10, 1, 3f */   /* branch if 2-byte-aligned address */
212 
213 1:
214         /* src and dst are both 4-byte aligned */
215         srli    a10, a4, 5      /* 32-byte chunks */
216 #if XCHAL_HAVE_LOOPS
217         loopgtz a10, 2f
218 #else
219         beqz    a10, 2f
220         slli    a10, a10, 5
221         add     a10, a10, a2    /* a10 = end of last 32-byte src chunk */
222 .Loop5:
223 #endif
224 EX(10f) l32i    a9, a2, 0
225 EX(10f) l32i    a8, a2, 4
226 EX(10f) s32i    a9, a3, 0
227 EX(10f) s32i    a8, a3, 4
228         ONES_ADD(a5, a9)
229         ONES_ADD(a5, a8)
230 EX(10f) l32i    a9, a2, 8
231 EX(10f) l32i    a8, a2, 12
232 EX(10f) s32i    a9, a3, 8
233 EX(10f) s32i    a8, a3, 12
234         ONES_ADD(a5, a9)
235         ONES_ADD(a5, a8)
236 EX(10f) l32i    a9, a2, 16
237 EX(10f) l32i    a8, a2, 20
238 EX(10f) s32i    a9, a3, 16
239 EX(10f) s32i    a8, a3, 20
240         ONES_ADD(a5, a9)
241         ONES_ADD(a5, a8)
242 EX(10f) l32i    a9, a2, 24
243 EX(10f) l32i    a8, a2, 28
244 EX(10f) s32i    a9, a3, 24
245 EX(10f) s32i    a8, a3, 28
246         ONES_ADD(a5, a9)
247         ONES_ADD(a5, a8)
248         addi    a2, a2, 32
249         addi    a3, a3, 32
250 #if !XCHAL_HAVE_LOOPS
251         blt     a2, a10, .Loop5
252 #endif
253 2:
254         extui   a10, a4, 2, 3   /* remaining 4-byte chunks */
255         extui   a4, a4, 0, 2    /* reset len for general-case, 2-byte chunks */
256 #if XCHAL_HAVE_LOOPS
257         loopgtz a10, 3f
258 #else
259         beqz    a10, 3f
260         slli    a10, a10, 2
261         add     a10, a10, a2    /* a10 = end of last 4-byte src chunk */
262 .Loop6:
263 #endif
264 EX(10f) l32i    a9, a2, 0
265 EX(10f) s32i    a9, a3, 0
266         ONES_ADD(a5, a9)
267         addi    a2, a2, 4
268         addi    a3, a3, 4
269 #if !XCHAL_HAVE_LOOPS
270         blt     a2, a10, .Loop6
271 #endif
272 3:
273         /*
274         Control comes to here in two cases: (1) It may fall through
275         to here from the 4-byte alignment case to process, at most,
276         one 2-byte chunk.  (2) It branches to here from above if
277         either src or dst is 2-byte aligned, and we process all bytes
278         here, except for perhaps a trailing odd byte.  It's
279         inefficient, so align your addresses to 4-byte boundaries.
280 
281         a2 = src
282         a3 = dst
283         a4 = len
284         a5 = sum
285         */
286         srli    a10, a4, 1      /* 2-byte chunks */
287 #if XCHAL_HAVE_LOOPS
288         loopgtz a10, 4f
289 #else
290         beqz    a10, 4f
291         slli    a10, a10, 1
292         add     a10, a10, a2    /* a10 = end of last 2-byte src chunk */
293 .Loop7:
294 #endif
295 EX(10f) l16ui   a9, a2, 0
296 EX(10f) s16i    a9, a3, 0
297         ONES_ADD(a5, a9)
298         addi    a2, a2, 2
299         addi    a3, a3, 2
300 #if !XCHAL_HAVE_LOOPS
301         blt     a2, a10, .Loop7
302 #endif
303 4:
304         /* This section processes a possible trailing odd byte. */
305         _bbci.l a4, 0, 8f       /* 1-byte chunk */
306 EX(10f) l8ui    a9, a2, 0
307 EX(10f) s8i     a9, a3, 0
308 #ifdef __XTENSA_EB__
309         slli    a9, a9, 8       /* shift byte to bits 8..15 */
310 #endif
311         ONES_ADD(a5, a9)
312 8:
313         mov     a2, a5
314         abi_ret_default
315 
316 5:
317         /* Control branch to here when either src or dst is odd.  We
318         process all bytes using 8-bit accesses.  Grossly inefficient,
319         so don't feed us an odd address. */
320 
321         srli    a10, a4, 1      /* handle in pairs for 16-bit csum */
322 #if XCHAL_HAVE_LOOPS
323         loopgtz a10, 6f
324 #else
325         beqz    a10, 6f
326         slli    a10, a10, 1
327         add     a10, a10, a2    /* a10 = end of last odd-aligned, 2-byte src chunk */
328 .Loop8:
329 #endif
330 EX(10f) l8ui    a9, a2, 0
331 EX(10f) l8ui    a8, a2, 1
332 EX(10f) s8i     a9, a3, 0
333 EX(10f) s8i     a8, a3, 1
334 #ifdef __XTENSA_EB__
335         slli    a9, a9, 8       /* combine into a single 16-bit value */
336 #else                           /* for checksum computation */
337         slli    a8, a8, 8
338 #endif
339         or      a9, a9, a8
340         ONES_ADD(a5, a9)
341         addi    a2, a2, 2
342         addi    a3, a3, 2
343 #if !XCHAL_HAVE_LOOPS
344         blt     a2, a10, .Loop8
345 #endif
346 6:
347         j       4b              /* process the possible trailing odd byte */
348 
349 ENDPROC(csum_partial_copy_generic)
350 EXPORT_SYMBOL(csum_partial_copy_generic)
351 
352 
353 # Exception handler:
354 .section .fixup, "ax"
355 10:
356         movi    a2, 0
357         abi_ret_default
358 
359 .previous

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php