Linux/arch/x86/lib/checksum

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * IP/TCP/UDP checksumming routines 8 * 9 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 10 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 11 * Tom May, <ftom@netcom.com> 12 * Pentium Pro/II routines: 13 * Alexander Kjeldaas <astor@guardian.no> 14 * Finn Arne Gangstad <finnag@guardian.no> 15 * Lots of code moved from tcp.c and ip.c; see those files 16 * for more names. 17 * 18 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 19 * handling. 20 * Andi Kleen, add zeroing on error 21 * converted to pure assembler 22 */ 23 24 #include <linux/export.h> 25 #include <linux/linkage.h> 26 #include <asm/errno.h> 27 #include <asm/asm.h> 28 #include <asm/nospec-branch.h> 29 30 /* 31 * computes a partial checksum, e.g. for TCP/UDP fragments 32 */ 33 34 /* 35 unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) 36 */ 37 38 .text 39 40 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM 41 42 /* 43 * Experiments with Ethernet and SLIP connections show that buff 44 * is aligned on either a 2-byte or 4-byte boundary. We get at 45 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 46 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 47 * alignment for the unrolled loop. 48 */ 49 SYM_FUNC_START(csum_partial) 50 pushl %esi 51 pushl %ebx 52 movl 20(%esp),%eax # Function arg: unsigned int sum 53 movl 16(%esp),%ecx # Function arg: int len 54 movl 12(%esp),%esi # Function arg: unsigned char *buff 55 testl $3, %esi # Check alignment. 56 jz 2f # Jump if alignment is ok. 57 testl $1, %esi # Check alignment. 58 jz 10f # Jump if alignment is boundary of 2 bytes. 59 60 # buf is odd 61 dec %ecx 62 jl 8f 63 movzbl (%esi), %ebx 64 adcl %ebx, %eax 65 roll $8, %eax 66 inc %esi 67 testl $2, %esi 68 jz 2f 69 10: 70 subl $2, %ecx # Alignment uses up two bytes. 71 jae 1f # Jump if we had at least two bytes. 72 addl $2, %ecx # ecx was < 2. Deal with it. 73 jmp 4f 74 1: movw (%esi), %bx 75 addl $2, %esi 76 addw %bx, %ax 77 adcl $0, %eax 78 2: 79 movl %ecx, %edx 80 shrl $5, %ecx 81 jz 2f 82 testl %esi, %esi 83 1: movl (%esi), %ebx 84 adcl %ebx, %eax 85 movl 4(%esi), %ebx 86 adcl %ebx, %eax 87 movl 8(%esi), %ebx 88 adcl %ebx, %eax 89 movl 12(%esi), %ebx 90 adcl %ebx, %eax 91 movl 16(%esi), %ebx 92 adcl %ebx, %eax 93 movl 20(%esi), %ebx 94 adcl %ebx, %eax 95 movl 24(%esi), %ebx 96 adcl %ebx, %eax 97 movl 28(%esi), %ebx 98 adcl %ebx, %eax 99 lea 32(%esi), %esi 100 dec %ecx 101 jne 1b 102 adcl $0, %eax 103 2: movl %edx, %ecx 104 andl $0x1c, %edx 105 je 4f 106 shrl $2, %edx # This clears CF 107 3: adcl (%esi), %eax 108 lea 4(%esi), %esi 109 dec %edx 110 jne 3b 111 adcl $0, %eax 112 4: andl $3, %ecx 113 jz 7f 114 cmpl $2, %ecx 115 jb 5f 116 movw (%esi),%cx 117 leal 2(%esi),%esi 118 je 6f 119 shll $16,%ecx 120 5: movb (%esi),%cl 121 6: addl %ecx,%eax 122 adcl $0, %eax 123 7: 124 testb $1, 12(%esp) 125 jz 8f 126 roll $8, %eax 127 8: 128 popl %ebx 129 popl %esi 130 RET 131 SYM_FUNC_END(csum_partial) 132 133 #else 134 135 /* Version for PentiumII/PPro */ 136 137 SYM_FUNC_START(csum_partial) 138 pushl %esi 139 pushl %ebx 140 movl 20(%esp),%eax # Function arg: unsigned int sum 141 movl 16(%esp),%ecx # Function arg: int len 142 movl 12(%esp),%esi # Function arg: const unsigned char *buf 143 144 testl $3, %esi 145 jnz 25f 146 10: 147 movl %ecx, %edx 148 movl %ecx, %ebx 149 andl $0x7c, %ebx 150 shrl $7, %ecx 151 addl %ebx,%esi 152 shrl $2, %ebx 153 negl %ebx 154 lea 45f(%ebx,%ebx,2), %ebx 155 testl %esi, %esi 156 JMP_NOSPEC ebx 157 158 # Handle 2-byte-aligned regions 159 20: addw (%esi), %ax 160 lea 2(%esi), %esi 161 adcl $0, %eax 162 jmp 10b 163 25: 164 testl $1, %esi 165 jz 30f 166 # buf is odd 167 dec %ecx 168 jl 90f 169 movzbl (%esi), %ebx 170 addl %ebx, %eax 171 adcl $0, %eax 172 roll $8, %eax 173 inc %esi 174 testl $2, %esi 175 jz 10b 176 177 30: subl $2, %ecx 178 ja 20b 179 je 32f 180 addl $2, %ecx 181 jz 80f 182 movzbl (%esi),%ebx # csumming 1 byte, 2-aligned 183 addl %ebx, %eax 184 adcl $0, %eax 185 jmp 80f 186 32: 187 addw (%esi), %ax # csumming 2 bytes, 2-aligned 188 adcl $0, %eax 189 jmp 80f 190 191 40: 192 addl -128(%esi), %eax 193 adcl -124(%esi), %eax 194 adcl -120(%esi), %eax 195 adcl -116(%esi), %eax 196 adcl -112(%esi), %eax 197 adcl -108(%esi), %eax 198 adcl -104(%esi), %eax 199 adcl -100(%esi), %eax 200 adcl -96(%esi), %eax 201 adcl -92(%esi), %eax 202 adcl -88(%esi), %eax 203 adcl -84(%esi), %eax 204 adcl -80(%esi), %eax 205 adcl -76(%esi), %eax 206 adcl -72(%esi), %eax 207 adcl -68(%esi), %eax 208 adcl -64(%esi), %eax 209 adcl -60(%esi), %eax 210 adcl -56(%esi), %eax 211 adcl -52(%esi), %eax 212 adcl -48(%esi), %eax 213 adcl -44(%esi), %eax 214 adcl -40(%esi), %eax 215 adcl -36(%esi), %eax 216 adcl -32(%esi), %eax 217 adcl -28(%esi), %eax 218 adcl -24(%esi), %eax 219 adcl -20(%esi), %eax 220 adcl -16(%esi), %eax 221 adcl -12(%esi), %eax 222 adcl -8(%esi), %eax 223 adcl -4(%esi), %eax 224 45: 225 lea 128(%esi), %esi 226 adcl $0, %eax 227 dec %ecx 228 jge 40b 229 movl %edx, %ecx 230 50: andl $3, %ecx 231 jz 80f 232 233 # Handle the last 1-3 bytes without jumping 234 notl %ecx # 1->2, 2->1, 3->0, higher bits are masked 235 movl $0xffffff,%ebx # by the shll and shrl instructions 236 shll $3,%ecx 237 shrl %cl,%ebx 238 andl -128(%esi),%ebx # esi is 4-aligned so should be ok 239 addl %ebx,%eax 240 adcl $0,%eax 241 80: 242 testb $1, 12(%esp) 243 jz 90f 244 roll $8, %eax 245 90: 246 popl %ebx 247 popl %esi 248 RET 249 SYM_FUNC_END(csum_partial) 250 251 #endif 252 EXPORT_SYMBOL(csum_partial) 253 254 /* 255 unsigned int csum_partial_copy_generic (const char *src, char *dst, 256 int len) 257 */ 258 259 /* 260 * Copy from ds while checksumming, otherwise like csum_partial 261 */ 262 263 #define EXC(y...) \ 264 9999: y; \ 265 _ASM_EXTABLE_TYPE(9999b, 7f, EX_TYPE_UACCESS | EX_FLAG_CLEAR_AX) 266 267 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM 268 269 #define ARGBASE 16 270 #define FP 12 271 272 SYM_FUNC_START(csum_partial_copy_generic) 273 subl $4,%esp 274 pushl %edi 275 pushl %esi 276 pushl %ebx 277 movl ARGBASE+12(%esp),%ecx # len 278 movl ARGBASE+4(%esp),%esi # src 279 movl ARGBASE+8(%esp),%edi # dst 280 281 movl $-1, %eax # sum 282 testl $2, %edi # Check alignment. 283 jz 2f # Jump if alignment is ok. 284 subl $2, %ecx # Alignment uses up two bytes. 285 jae 1f # Jump if we had at least two bytes. 286 addl $2, %ecx # ecx was < 2. Deal with it. 287 jmp 4f 288 EXC(1: movw (%esi), %bx ) 289 addl $2, %esi 290 EXC( movw %bx, (%edi) ) 291 addl $2, %edi 292 addw %bx, %ax 293 adcl $0, %eax 294 2: 295 movl %ecx, FP(%esp) 296 shrl $5, %ecx 297 jz 2f 298 testl %esi, %esi # what's wrong with clc? 299 EXC(1: movl (%esi), %ebx ) 300 EXC( movl 4(%esi), %edx ) 301 adcl %ebx, %eax 302 EXC( movl %ebx, (%edi) ) 303 adcl %edx, %eax 304 EXC( movl %edx, 4(%edi) ) 305 306 EXC( movl 8(%esi), %ebx ) 307 EXC( movl 12(%esi), %edx ) 308 adcl %ebx, %eax 309 EXC( movl %ebx, 8(%edi) ) 310 adcl %edx, %eax 311 EXC( movl %edx, 12(%edi) ) 312 313 EXC( movl 16(%esi), %ebx ) 314 EXC( movl 20(%esi), %edx ) 315 adcl %ebx, %eax 316 EXC( movl %ebx, 16(%edi) ) 317 adcl %edx, %eax 318 EXC( movl %edx, 20(%edi) ) 319 320 EXC( movl 24(%esi), %ebx ) 321 EXC( movl 28(%esi), %edx ) 322 adcl %ebx, %eax 323 EXC( movl %ebx, 24(%edi) ) 324 adcl %edx, %eax 325 EXC( movl %edx, 28(%edi) ) 326 327 lea 32(%esi), %esi 328 lea 32(%edi), %edi 329 dec %ecx 330 jne 1b 331 adcl $0, %eax 332 2: movl FP(%esp), %edx 333 movl %edx, %ecx 334 andl $0x1c, %edx 335 je 4f 336 shrl $2, %edx # This clears CF 337 EXC(3: movl (%esi), %ebx ) 338 adcl %ebx, %eax 339 EXC( movl %ebx, (%edi) ) 340 lea 4(%esi), %esi 341 lea 4(%edi), %edi 342 dec %edx 343 jne 3b 344 adcl $0, %eax 345 4: andl $3, %ecx 346 jz 7f 347 cmpl $2, %ecx 348 jb 5f 349 EXC( movw (%esi), %cx ) 350 leal 2(%esi), %esi 351 EXC( movw %cx, (%edi) ) 352 leal 2(%edi), %edi 353 je 6f 354 shll $16,%ecx 355 EXC(5: movb (%esi), %cl ) 356 EXC( movb %cl, (%edi) ) 357 6: addl %ecx, %eax 358 adcl $0, %eax 359 7: 360 361 popl %ebx 362 popl %esi 363 popl %edi 364 popl %ecx # equivalent to addl $4,%esp 365 RET 366 SYM_FUNC_END(csum_partial_copy_generic) 367 368 #else 369 370 /* Version for PentiumII/PPro */ 371 372 #define ROUND1(x) \ 373 EXC(movl x(%esi), %ebx ) ; \ 374 addl %ebx, %eax ; \ 375 EXC(movl %ebx, x(%edi) ) ; 376 377 #define ROUND(x) \ 378 EXC(movl x(%esi), %ebx ) ; \ 379 adcl %ebx, %eax ; \ 380 EXC(movl %ebx, x(%edi) ) ; 381 382 #define ARGBASE 12 383 384 SYM_FUNC_START(csum_partial_copy_generic) 385 pushl %ebx 386 pushl %edi 387 pushl %esi 388 movl ARGBASE+4(%esp),%esi #src 389 movl ARGBASE+8(%esp),%edi #dst 390 movl ARGBASE+12(%esp),%ecx #len 391 movl $-1, %eax #sum 392 # movl %ecx, %edx 393 movl %ecx, %ebx 394 movl %esi, %edx 395 shrl $6, %ecx 396 andl $0x3c, %ebx 397 negl %ebx 398 subl %ebx, %esi 399 subl %ebx, %edi 400 lea -1(%esi),%edx 401 andl $-32,%edx 402 lea 3f(%ebx,%ebx), %ebx 403 testl %esi, %esi 404 JMP_NOSPEC ebx 405 1: addl $64,%esi 406 addl $64,%edi 407 EXC(movb -32(%edx),%bl) ; EXC(movb (%edx),%bl) 408 ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) 409 ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) 410 ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) 411 ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) 412 3: adcl $0,%eax 413 addl $64, %edx 414 dec %ecx 415 jge 1b 416 4: movl ARGBASE+12(%esp),%edx #len 417 andl $3, %edx 418 jz 7f 419 cmpl $2, %edx 420 jb 5f 421 EXC( movw (%esi), %dx ) 422 leal 2(%esi), %esi 423 EXC( movw %dx, (%edi) ) 424 leal 2(%edi), %edi 425 je 6f 426 shll $16,%edx 427 5: 428 EXC( movb (%esi), %dl ) 429 EXC( movb %dl, (%edi) ) 430 6: addl %edx, %eax 431 adcl $0, %eax 432 7: 433 434 popl %esi 435 popl %edi 436 popl %ebx 437 RET 438 SYM_FUNC_END(csum_partial_copy_generic) 439 440 #undef ROUND 441 #undef ROUND1 442 443 #endif 444 EXPORT_SYMBOL(csum_partial_copy_generic)

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

TOMOYO Linux Cross Reference Linux/arch/x86/lib/checksum_32.S

TOMOYO Linux Cross Reference
Linux/arch/x86/lib/checksum_32.S