1 /* SPDX-License-Identifier: GPL-2.0-or-later * << 2 /* 1 /* 3 * INET An implementation of the TCP/I 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is imp 3 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of comm 4 * interface as the means of communication with the user level. 6 * 5 * 7 * IP/TCP/UDP checksumming routin 6 * IP/TCP/UDP checksumming routines 8 * 7 * 9 * Xtensa version: Copyright (C) 2001 Tensili !! 8 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 10 * Optimized by Joe Taylor !! 9 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> >> 10 * Tom May, <ftom@netcom.com> >> 11 * Pentium Pro/II routines: >> 12 * Alexander Kjeldaas <astor@guardian.no> >> 13 * Finn Arne Gangstad <finnag@guardian.no> >> 14 * Lots of code moved from tcp.c and ip.c; see those files >> 15 * for more names. >> 16 * >> 17 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception >> 18 * handling. >> 19 * Andi Kleen, add zeroing on error >> 20 * converted to pure assembler >> 21 * >> 22 * This program is free software; you can redistribute it and/or >> 23 * modify it under the terms of the GNU General Public License >> 24 * as published by the Free Software Foundation; either version >> 25 * 2 of the License, or (at your option) any later version. 11 */ 26 */ 12 27 13 #include <linux/errno.h> !! 28 #include <linux/config.h> 14 #include <linux/linkage.h> !! 29 #include <asm/errno.h> 15 #include <asm/asmmacro.h> !! 30 16 #include <asm/core.h> << 17 << 18 /* 31 /* 19 * computes a partial checksum, e.g. for TCP/U 32 * computes a partial checksum, e.g. for TCP/UDP fragments 20 */ 33 */ 21 34 22 /* !! 35 /* 23 * unsigned int csum_partial(const unsigned ch !! 36 unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) 24 * unsigned int sum) << 25 * a2 = buf << 26 * a3 = len << 27 * a4 = sum << 28 * << 29 * This function assumes 2- or 4-byte alignmen << 30 */ 37 */ 31 !! 38 32 /* ONES_ADD converts twos-complement math to o << 33 #define ONES_ADD(sum, val) \ << 34 add sum, sum, val ; \ << 35 bgeu sum, val, 99f ; \ << 36 addi sum, sum, 1 ; \ << 37 99: ; << 38 << 39 .text 39 .text 40 ENTRY(csum_partial) !! 40 .align 4 41 !! 41 .globl csum_partial 42 /* !! 42 43 * Experiments with Ethernet and SLIP !! 43 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM 44 * is aligned on either a 2-byte or 4- !! 44 45 */ !! 45 /* 46 abi_entry_default !! 46 * Experiments with Ethernet and SLIP connections show that buff 47 extui a5, a2, 0, 2 !! 47 * is aligned on either a 2-byte or 4-byte boundary. We get at 48 bnez a5, 8f /* branch if 2 !! 48 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 49 /* Fall-through on common case, 4-byte !! 49 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 50 1: !! 50 * alignment for the unrolled loop. 51 srli a5, a3, 5 /* 32-byte chu !! 51 */ 52 #if XCHAL_HAVE_LOOPS !! 52 csum_partial: 53 loopgtz a5, 2f !! 53 pushl %esi 54 #else !! 54 pushl %ebx 55 beqz a5, 2f !! 55 movl 20(%esp),%eax # Function arg: unsigned int sum 56 slli a5, a5, 5 !! 56 movl 16(%esp),%ecx # Function arg: int len 57 add a5, a5, a2 /* a5 = end of !! 57 movl 12(%esp),%esi # Function arg: unsigned char *buff 58 .Loop1: !! 58 testl $3, %esi # Check alignment. 59 #endif !! 59 jz 2f # Jump if alignment is ok. 60 l32i a6, a2, 0 !! 60 testl $1, %esi # Check alignment. 61 l32i a7, a2, 4 !! 61 jz 10f # Jump if alignment is boundary of 2bytes. 62 ONES_ADD(a4, a6) !! 62 63 ONES_ADD(a4, a7) !! 63 # buf is odd 64 l32i a6, a2, 8 !! 64 dec %ecx 65 l32i a7, a2, 12 !! 65 jl 8f 66 ONES_ADD(a4, a6) !! 66 movzbl (%esi), %ebx 67 ONES_ADD(a4, a7) !! 67 adcl %ebx, %eax 68 l32i a6, a2, 16 !! 68 roll $8, %eax 69 l32i a7, a2, 20 !! 69 inc %esi 70 ONES_ADD(a4, a6) !! 70 testl $2, %esi 71 ONES_ADD(a4, a7) !! 71 jz 2f 72 l32i a6, a2, 24 !! 72 10: 73 l32i a7, a2, 28 !! 73 subl $2, %ecx # Alignment uses up two bytes. 74 ONES_ADD(a4, a6) !! 74 jae 1f # Jump if we had at least two bytes. 75 ONES_ADD(a4, a7) !! 75 addl $2, %ecx # ecx was < 2. Deal with it. 76 addi a2, a2, 4*8 !! 76 jmp 4f 77 #if !XCHAL_HAVE_LOOPS !! 77 1: movw (%esi), %bx 78 blt a2, a5, .Loop1 !! 78 addl $2, %esi 79 #endif !! 79 addw %bx, %ax >> 80 adcl $0, %eax 80 2: 81 2: 81 extui a5, a3, 2, 3 /* remaining 4 !! 82 movl %ecx, %edx 82 #if XCHAL_HAVE_LOOPS !! 83 shrl $5, %ecx 83 loopgtz a5, 3f !! 84 jz 2f >> 85 testl %esi, %esi >> 86 1: movl (%esi), %ebx >> 87 adcl %ebx, %eax >> 88 movl 4(%esi), %ebx >> 89 adcl %ebx, %eax >> 90 movl 8(%esi), %ebx >> 91 adcl %ebx, %eax >> 92 movl 12(%esi), %ebx >> 93 adcl %ebx, %eax >> 94 movl 16(%esi), %ebx >> 95 adcl %ebx, %eax >> 96 movl 20(%esi), %ebx >> 97 adcl %ebx, %eax >> 98 movl 24(%esi), %ebx >> 99 adcl %ebx, %eax >> 100 movl 28(%esi), %ebx >> 101 adcl %ebx, %eax >> 102 lea 32(%esi), %esi >> 103 dec %ecx >> 104 jne 1b >> 105 adcl $0, %eax >> 106 2: movl %edx, %ecx >> 107 andl $0x1c, %edx >> 108 je 4f >> 109 shrl $2, %edx # This clears CF >> 110 3: adcl (%esi), %eax >> 111 lea 4(%esi), %esi >> 112 dec %edx >> 113 jne 3b >> 114 adcl $0, %eax >> 115 4: andl $3, %ecx >> 116 jz 7f >> 117 cmpl $2, %ecx >> 118 jb 5f >> 119 movw (%esi),%cx >> 120 leal 2(%esi),%esi >> 121 je 6f >> 122 shll $16,%ecx >> 123 5: movb (%esi),%cl >> 124 6: addl %ecx,%eax >> 125 adcl $0, %eax >> 126 7: >> 127 testl $1, 12(%esp) >> 128 jz 8f >> 129 roll $8, %eax >> 130 8: >> 131 popl %ebx >> 132 popl %esi >> 133 ret >> 134 84 #else 135 #else 85 beqz a5, 3f << 86 slli a5, a5, 2 << 87 add a5, a5, a2 /* a5 = end of << 88 .Loop2: << 89 #endif << 90 l32i a6, a2, 0 << 91 ONES_ADD(a4, a6) << 92 addi a2, a2, 4 << 93 #if !XCHAL_HAVE_LOOPS << 94 blt a2, a5, .Loop2 << 95 #endif << 96 3: << 97 _bbci.l a3, 1, 5f /* remaining 2 << 98 l16ui a6, a2, 0 << 99 ONES_ADD(a4, a6) << 100 addi a2, a2, 2 << 101 5: << 102 _bbci.l a3, 0, 7f /* remaining 1 << 103 6: l8ui a6, a2, 0 << 104 #ifdef __XTENSA_EB__ << 105 slli a6, a6, 8 /* load byte i << 106 #endif << 107 ONES_ADD(a4, a6) << 108 7: << 109 mov a2, a4 << 110 abi_ret_default << 111 136 112 /* uncommon case, buf is 2-byte aligne !! 137 /* Version for PentiumII/PPro */ 113 8: << 114 beqz a3, 7b /* branch if l << 115 beqi a3, 1, 6b /* branch if l << 116 138 117 extui a5, a2, 0, 1 !! 139 csum_partial: 118 bnez a5, 8f /* branch if 1 !! 140 pushl %esi >> 141 pushl %ebx >> 142 movl 20(%esp),%eax # Function arg: unsigned int sum >> 143 movl 16(%esp),%ecx # Function arg: int len >> 144 movl 12(%esp),%esi # Function arg: const unsigned char *buf 119 145 120 l16ui a6, a2, 0 /* common case !! 146 testl $3, %esi 121 ONES_ADD(a4, a6) !! 147 jnz 25f 122 addi a2, a2, 2 /* adjust buf !! 148 10: 123 addi a3, a3, -2 /* adjust len !! 149 movl %ecx, %edx 124 j 1b /* now buf is !! 150 movl %ecx, %ebx 125 !! 151 andl $0x7c, %ebx 126 /* case: odd-byte aligned, len > 1 !! 152 shrl $7, %ecx 127 * This case is dog slow, so don't giv !! 153 addl %ebx,%esi 128 * (I don't think this ever happens, b !! 154 shrl $2, %ebx 129 */ !! 155 negl %ebx 130 8: !! 156 lea 45f(%ebx,%ebx,2), %ebx 131 srli a5, a3, 2 /* 4-byte chun !! 157 testl %esi, %esi 132 #if XCHAL_HAVE_LOOPS !! 158 jmp *%ebx 133 loopgtz a5, 2f !! 159 134 #else !! 160 # Handle 2-byte-aligned regions 135 beqz a5, 2f !! 161 20: addw (%esi), %ax 136 slli a5, a5, 2 !! 162 lea 2(%esi), %esi 137 add a5, a5, a2 /* a5 = end of !! 163 adcl $0, %eax 138 .Loop3: !! 164 jmp 10b 139 #endif !! 165 25: 140 l8ui a6, a2, 0 /* bits 24..31 !! 166 testl $1, %esi 141 l16ui a7, a2, 1 /* bits 8..23 !! 167 jz 30f 142 l8ui a8, a2, 3 /* bits 0.. 8 !! 168 # buf is odd 143 #ifdef __XTENSA_EB__ !! 169 dec %ecx 144 slli a6, a6, 24 !! 170 jl 90f 145 #else !! 171 movzbl (%esi), %ebx 146 slli a8, a8, 24 !! 172 addl %ebx, %eax 147 #endif !! 173 adcl $0, %eax 148 slli a7, a7, 8 !! 174 roll $8, %eax 149 or a7, a7, a6 !! 175 inc %esi 150 or a7, a7, a8 !! 176 testl $2, %esi 151 ONES_ADD(a4, a7) !! 177 jz 10b 152 addi a2, a2, 4 !! 178 153 #if !XCHAL_HAVE_LOOPS !! 179 30: subl $2, %ecx 154 blt a2, a5, .Loop3 !! 180 ja 20b 155 #endif !! 181 je 32f 156 2: !! 182 addl $2, %ecx 157 _bbci.l a3, 1, 3f /* remaining 2 !! 183 jz 80f 158 l8ui a6, a2, 0 !! 184 movzbl (%esi),%ebx # csumming 1 byte, 2-aligned 159 l8ui a7, a2, 1 !! 185 addl %ebx, %eax 160 #ifdef __XTENSA_EB__ !! 186 adcl $0, %eax 161 slli a6, a6, 8 !! 187 jmp 80f 162 #else !! 188 32: 163 slli a7, a7, 8 !! 189 addw (%esi), %ax # csumming 2 bytes, 2-aligned >> 190 adcl $0, %eax >> 191 jmp 80f >> 192 >> 193 40: >> 194 addl -128(%esi), %eax >> 195 adcl -124(%esi), %eax >> 196 adcl -120(%esi), %eax >> 197 adcl -116(%esi), %eax >> 198 adcl -112(%esi), %eax >> 199 adcl -108(%esi), %eax >> 200 adcl -104(%esi), %eax >> 201 adcl -100(%esi), %eax >> 202 adcl -96(%esi), %eax >> 203 adcl -92(%esi), %eax >> 204 adcl -88(%esi), %eax >> 205 adcl -84(%esi), %eax >> 206 adcl -80(%esi), %eax >> 207 adcl -76(%esi), %eax >> 208 adcl -72(%esi), %eax >> 209 adcl -68(%esi), %eax >> 210 adcl -64(%esi), %eax >> 211 adcl -60(%esi), %eax >> 212 adcl -56(%esi), %eax >> 213 adcl -52(%esi), %eax >> 214 adcl -48(%esi), %eax >> 215 adcl -44(%esi), %eax >> 216 adcl -40(%esi), %eax >> 217 adcl -36(%esi), %eax >> 218 adcl -32(%esi), %eax >> 219 adcl -28(%esi), %eax >> 220 adcl -24(%esi), %eax >> 221 adcl -20(%esi), %eax >> 222 adcl -16(%esi), %eax >> 223 adcl -12(%esi), %eax >> 224 adcl -8(%esi), %eax >> 225 adcl -4(%esi), %eax >> 226 45: >> 227 lea 128(%esi), %esi >> 228 adcl $0, %eax >> 229 dec %ecx >> 230 jge 40b >> 231 movl %edx, %ecx >> 232 50: andl $3, %ecx >> 233 jz 80f >> 234 >> 235 # Handle the last 1-3 bytes without jumping >> 236 notl %ecx # 1->2, 2->1, 3->0, higher bits are masked >> 237 movl $0xffffff,%ebx # by the shll and shrl instructions >> 238 shll $3,%ecx >> 239 shrl %cl,%ebx >> 240 andl -128(%esi),%ebx # esi is 4-aligned so should be ok >> 241 addl %ebx,%eax >> 242 adcl $0,%eax >> 243 80: >> 244 testl $1, 12(%esp) >> 245 jz 90f >> 246 roll $8, %eax >> 247 90: >> 248 popl %ebx >> 249 popl %esi >> 250 ret >> 251 164 #endif 252 #endif 165 or a7, a7, a6 << 166 ONES_ADD(a4, a7) << 167 addi a2, a2, 2 << 168 3: << 169 j 5b /* branch to h << 170 253 171 ENDPROC(csum_partial) !! 254 /* 172 EXPORT_SYMBOL(csum_partial) !! 255 unsigned int csum_partial_copy_generic (const char *src, char *dst, >> 256 int len, int sum, int *src_err_ptr, int *dst_err_ptr) >> 257 */ 173 258 174 /* 259 /* 175 * Copy from ds while checksumming, otherwise 260 * Copy from ds while checksumming, otherwise like csum_partial >> 261 * >> 262 * The macros SRC and DST specify the type of access for the instruction. >> 263 * thus we can call a custom exception handler for all access types. >> 264 * >> 265 * FIXME: could someone double-check whether I haven't mixed up some SRC and >> 266 * DST definitions? It's damn hard to trigger all cases. I hope I got >> 267 * them all but there's no guarantee. 176 */ 268 */ 177 269 178 /* !! 270 #define SRC(y...) \ 179 unsigned int csum_partial_copy_generic (const !! 271 9999: y; \ 180 a2 = src !! 272 .section __ex_table, "a"; \ 181 a3 = dst !! 273 .long 9999b, 6001f ; \ 182 a4 = len !! 274 .previous 183 a5 = sum !! 275 184 a8 = temp !! 276 #define DST(y...) \ 185 a9 = temp !! 277 9999: y; \ 186 a10 = temp !! 278 .section __ex_table, "a"; \ >> 279 .long 9999b, 6002f ; \ >> 280 .previous >> 281 >> 282 .align 4 >> 283 .globl csum_partial_copy_generic >> 284 >> 285 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM >> 286 >> 287 #define ARGBASE 16 >> 288 #define FP 12 >> 289 >> 290 csum_partial_copy_generic: >> 291 subl $4,%esp >> 292 pushl %edi >> 293 pushl %esi >> 294 pushl %ebx >> 295 movl ARGBASE+16(%esp),%eax # sum >> 296 movl ARGBASE+12(%esp),%ecx # len >> 297 movl ARGBASE+4(%esp),%esi # src >> 298 movl ARGBASE+8(%esp),%edi # dst >> 299 >> 300 testl $2, %edi # Check alignment. >> 301 jz 2f # Jump if alignment is ok. >> 302 subl $2, %ecx # Alignment uses up two bytes. >> 303 jae 1f # Jump if we had at least two bytes. >> 304 addl $2, %ecx # ecx was < 2. Deal with it. >> 305 jmp 4f >> 306 SRC(1: movw (%esi), %bx ) >> 307 addl $2, %esi >> 308 DST( movw %bx, (%edi) ) >> 309 addl $2, %edi >> 310 addw %bx, %ax >> 311 adcl $0, %eax >> 312 2: >> 313 movl %ecx, FP(%esp) >> 314 shrl $5, %ecx >> 315 jz 2f >> 316 testl %esi, %esi >> 317 SRC(1: movl (%esi), %ebx ) >> 318 SRC( movl 4(%esi), %edx ) >> 319 adcl %ebx, %eax >> 320 DST( movl %ebx, (%edi) ) >> 321 adcl %edx, %eax >> 322 DST( movl %edx, 4(%edi) ) >> 323 >> 324 SRC( movl 8(%esi), %ebx ) >> 325 SRC( movl 12(%esi), %edx ) >> 326 adcl %ebx, %eax >> 327 DST( movl %ebx, 8(%edi) ) >> 328 adcl %edx, %eax >> 329 DST( movl %edx, 12(%edi) ) >> 330 >> 331 SRC( movl 16(%esi), %ebx ) >> 332 SRC( movl 20(%esi), %edx ) >> 333 adcl %ebx, %eax >> 334 DST( movl %ebx, 16(%edi) ) >> 335 adcl %edx, %eax >> 336 DST( movl %edx, 20(%edi) ) >> 337 >> 338 SRC( movl 24(%esi), %ebx ) >> 339 SRC( movl 28(%esi), %edx ) >> 340 adcl %ebx, %eax >> 341 DST( movl %ebx, 24(%edi) ) >> 342 adcl %edx, %eax >> 343 DST( movl %edx, 28(%edi) ) >> 344 >> 345 lea 32(%esi), %esi >> 346 lea 32(%edi), %edi >> 347 dec %ecx >> 348 jne 1b >> 349 adcl $0, %eax >> 350 2: movl FP(%esp), %edx >> 351 movl %edx, %ecx >> 352 andl $0x1c, %edx >> 353 je 4f >> 354 shrl $2, %edx # This clears CF >> 355 SRC(3: movl (%esi), %ebx ) >> 356 adcl %ebx, %eax >> 357 DST( movl %ebx, (%edi) ) >> 358 lea 4(%esi), %esi >> 359 lea 4(%edi), %edi >> 360 dec %edx >> 361 jne 3b >> 362 adcl $0, %eax >> 363 4: andl $3, %ecx >> 364 jz 7f >> 365 cmpl $2, %ecx >> 366 jb 5f >> 367 SRC( movw (%esi), %cx ) >> 368 leal 2(%esi), %esi >> 369 DST( movw %cx, (%edi) ) >> 370 leal 2(%edi), %edi >> 371 je 6f >> 372 shll $16,%ecx >> 373 SRC(5: movb (%esi), %cl ) >> 374 DST( movb %cl, (%edi) ) >> 375 6: addl %ecx, %eax >> 376 adcl $0, %eax >> 377 7: >> 378 5000: 187 379 188 This function is optimized for 4-byte alig !! 380 # Exception handler: 189 alignments work, but not nearly as efficie !! 381 .section .fixup, "ax" 190 */ << 191 382 192 ENTRY(csum_partial_copy_generic) !! 383 6001: >> 384 movl ARGBASE+20(%esp), %ebx # src_err_ptr >> 385 movl $-EFAULT, (%ebx) >> 386 >> 387 # zero the complete destination - computing the rest >> 388 # is too much work >> 389 movl ARGBASE+8(%esp), %edi # dst >> 390 movl ARGBASE+12(%esp), %ecx # len >> 391 xorl %eax,%eax >> 392 rep ; stosb >> 393 >> 394 jmp 5000b >> 395 >> 396 6002: >> 397 movl ARGBASE+24(%esp), %ebx # dst_err_ptr >> 398 movl $-EFAULT,(%ebx) >> 399 jmp 5000b 193 400 194 abi_entry_default !! 401 .previous 195 movi a5, -1 << 196 or a10, a2, a3 << 197 << 198 /* We optimize the following alignment << 199 aligned case. Two bbsi.l instructions << 200 (commented out below). However, both << 201 of the imm8 range, so the assembler re << 202 equivalent bbci.l, j combinations, whi << 203 slower. */ << 204 << 205 extui a9, a10, 0, 2 << 206 beqz a9, 1f /* branch if b << 207 bbsi.l a10, 0, 5f /* branch if o << 208 j 3f /* one address << 209 << 210 /* _bbsi.l a10, 0, 5f */ /* branch if o << 211 /* _bbsi.l a10, 1, 3f */ /* branch if 2 << 212 << 213 1: << 214 /* src and dst are both 4-byte aligned << 215 srli a10, a4, 5 /* 32-byte chu << 216 #if XCHAL_HAVE_LOOPS << 217 loopgtz a10, 2f << 218 #else << 219 beqz a10, 2f << 220 slli a10, a10, 5 << 221 add a10, a10, a2 /* a10 = end o << 222 .Loop5: << 223 #endif << 224 EX(10f) l32i a9, a2, 0 << 225 EX(10f) l32i a8, a2, 4 << 226 EX(10f) s32i a9, a3, 0 << 227 EX(10f) s32i a8, a3, 4 << 228 ONES_ADD(a5, a9) << 229 ONES_ADD(a5, a8) << 230 EX(10f) l32i a9, a2, 8 << 231 EX(10f) l32i a8, a2, 12 << 232 EX(10f) s32i a9, a3, 8 << 233 EX(10f) s32i a8, a3, 12 << 234 ONES_ADD(a5, a9) << 235 ONES_ADD(a5, a8) << 236 EX(10f) l32i a9, a2, 16 << 237 EX(10f) l32i a8, a2, 20 << 238 EX(10f) s32i a9, a3, 16 << 239 EX(10f) s32i a8, a3, 20 << 240 ONES_ADD(a5, a9) << 241 ONES_ADD(a5, a8) << 242 EX(10f) l32i a9, a2, 24 << 243 EX(10f) l32i a8, a2, 28 << 244 EX(10f) s32i a9, a3, 24 << 245 EX(10f) s32i a8, a3, 28 << 246 ONES_ADD(a5, a9) << 247 ONES_ADD(a5, a8) << 248 addi a2, a2, 32 << 249 addi a3, a3, 32 << 250 #if !XCHAL_HAVE_LOOPS << 251 blt a2, a10, .Loop5 << 252 #endif << 253 2: << 254 extui a10, a4, 2, 3 /* remaining 4 << 255 extui a4, a4, 0, 2 /* reset len f << 256 #if XCHAL_HAVE_LOOPS << 257 loopgtz a10, 3f << 258 #else << 259 beqz a10, 3f << 260 slli a10, a10, 2 << 261 add a10, a10, a2 /* a10 = end o << 262 .Loop6: << 263 #endif << 264 EX(10f) l32i a9, a2, 0 << 265 EX(10f) s32i a9, a3, 0 << 266 ONES_ADD(a5, a9) << 267 addi a2, a2, 4 << 268 addi a3, a3, 4 << 269 #if !XCHAL_HAVE_LOOPS << 270 blt a2, a10, .Loop6 << 271 #endif << 272 3: << 273 /* << 274 Control comes to here in two cases: (1 << 275 to here from the 4-byte alignment case << 276 one 2-byte chunk. (2) It branches to << 277 either src or dst is 2-byte aligned, a << 278 here, except for perhaps a trailing od << 279 inefficient, so align your addresses t << 280 << 281 a2 = src << 282 a3 = dst << 283 a4 = len << 284 a5 = sum << 285 */ << 286 srli a10, a4, 1 /* 2-byte chun << 287 #if XCHAL_HAVE_LOOPS << 288 loopgtz a10, 4f << 289 #else << 290 beqz a10, 4f << 291 slli a10, a10, 1 << 292 add a10, a10, a2 /* a10 = end o << 293 .Loop7: << 294 #endif << 295 EX(10f) l16ui a9, a2, 0 << 296 EX(10f) s16i a9, a3, 0 << 297 ONES_ADD(a5, a9) << 298 addi a2, a2, 2 << 299 addi a3, a3, 2 << 300 #if !XCHAL_HAVE_LOOPS << 301 blt a2, a10, .Loop7 << 302 #endif << 303 4: << 304 /* This section processes a possible t << 305 _bbci.l a4, 0, 8f /* 1-byte chun << 306 EX(10f) l8ui a9, a2, 0 << 307 EX(10f) s8i a9, a3, 0 << 308 #ifdef __XTENSA_EB__ << 309 slli a9, a9, 8 /* shift byte << 310 #endif << 311 ONES_ADD(a5, a9) << 312 8: << 313 mov a2, a5 << 314 abi_ret_default << 315 402 316 5: !! 403 popl %ebx 317 /* Control branch to here when either !! 404 popl %esi 318 process all bytes using 8-bit accesses !! 405 popl %edi 319 so don't feed us an odd address. */ !! 406 popl %ecx # equivalent to addl $4,%esp 320 !! 407 ret 321 srli a10, a4, 1 /* handle in p << 322 #if XCHAL_HAVE_LOOPS << 323 loopgtz a10, 6f << 324 #else << 325 beqz a10, 6f << 326 slli a10, a10, 1 << 327 add a10, a10, a2 /* a10 = end o << 328 .Loop8: << 329 #endif << 330 EX(10f) l8ui a9, a2, 0 << 331 EX(10f) l8ui a8, a2, 1 << 332 EX(10f) s8i a9, a3, 0 << 333 EX(10f) s8i a8, a3, 1 << 334 #ifdef __XTENSA_EB__ << 335 slli a9, a9, 8 /* combine int << 336 #else /* for checksu << 337 slli a8, a8, 8 << 338 #endif << 339 or a9, a9, a8 << 340 ONES_ADD(a5, a9) << 341 addi a2, a2, 2 << 342 addi a3, a3, 2 << 343 #if !XCHAL_HAVE_LOOPS << 344 blt a2, a10, .Loop8 << 345 #endif << 346 6: << 347 j 4b /* process the << 348 408 349 ENDPROC(csum_partial_copy_generic) !! 409 #else 350 EXPORT_SYMBOL(csum_partial_copy_generic) << 351 410 >> 411 /* Version for PentiumII/PPro */ 352 412 353 # Exception handler: !! 413 #define ROUND1(x) \ >> 414 SRC(movl x(%esi), %ebx ) ; \ >> 415 addl %ebx, %eax ; \ >> 416 DST(movl %ebx, x(%edi) ) ; >> 417 >> 418 #define ROUND(x) \ >> 419 SRC(movl x(%esi), %ebx ) ; \ >> 420 adcl %ebx, %eax ; \ >> 421 DST(movl %ebx, x(%edi) ) ; >> 422 >> 423 #define ARGBASE 12 >> 424 >> 425 csum_partial_copy_generic: >> 426 pushl %ebx >> 427 pushl %edi >> 428 pushl %esi >> 429 movl ARGBASE+4(%esp),%esi #src >> 430 movl ARGBASE+8(%esp),%edi #dst >> 431 movl ARGBASE+12(%esp),%ecx #len >> 432 movl ARGBASE+16(%esp),%eax #sum >> 433 # movl %ecx, %edx >> 434 movl %ecx, %ebx >> 435 movl %esi, %edx >> 436 shrl $6, %ecx >> 437 andl $0x3c, %ebx >> 438 negl %ebx >> 439 subl %ebx, %esi >> 440 subl %ebx, %edi >> 441 lea -1(%esi),%edx >> 442 andl $-32,%edx >> 443 lea 3f(%ebx,%ebx), %ebx >> 444 testl %esi, %esi >> 445 jmp *%ebx >> 446 1: addl $64,%esi >> 447 addl $64,%edi >> 448 SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) >> 449 ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) >> 450 ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) >> 451 ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) >> 452 ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) >> 453 3: adcl $0,%eax >> 454 addl $64, %edx >> 455 dec %ecx >> 456 jge 1b >> 457 4: movl ARGBASE+12(%esp),%edx #len >> 458 andl $3, %edx >> 459 jz 7f >> 460 cmpl $2, %edx >> 461 jb 5f >> 462 SRC( movw (%esi), %dx ) >> 463 leal 2(%esi), %esi >> 464 DST( movw %dx, (%edi) ) >> 465 leal 2(%edi), %edi >> 466 je 6f >> 467 shll $16,%edx >> 468 5: >> 469 SRC( movb (%esi), %dl ) >> 470 DST( movb %dl, (%edi) ) >> 471 6: addl %edx, %eax >> 472 adcl $0, %eax >> 473 7: 354 .section .fixup, "ax" 474 .section .fixup, "ax" 355 10: !! 475 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr 356 movi a2, 0 !! 476 movl $-EFAULT, (%ebx) 357 abi_ret_default !! 477 # zero the complete destination (computing the rest is too much work) 358 !! 478 movl ARGBASE+8(%esp),%edi # dst 359 .previous !! 479 movl ARGBASE+12(%esp),%ecx # len >> 480 xorl %eax,%eax >> 481 rep; stosb >> 482 jmp 7b >> 483 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr >> 484 movl $-EFAULT, (%ebx) >> 485 jmp 7b >> 486 .previous >> 487 >> 488 popl %esi >> 489 popl %edi >> 490 popl %ebx >> 491 ret >> 492 >> 493 #undef ROUND >> 494 #undef ROUND1 >> 495 >> 496 #endif
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.