1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* checksum.S: Sparc optimized checksum code. 3 * 4 * Copyright(C) 1995 Linus Torvalds 5 * Copyright(C) 1995 Miguel de Icaza 6 * Copyright(C) 1996 David S. Miller 7 * Copyright(C) 1997 Jakub Jelinek 8 * 9 * derived from: 10 * Linux/Alpha checksum c-code 11 * Linux/ix86 inline checksum assembly 12 * RFC1071 Computing the Internet Checksu 13 * David Mosberger-Tang for optimized ref 14 * BSD4.4 portable checksum routine 15 */ 16 17 #include <linux/export.h> 18 #include <asm/errno.h> 19 20 #define CSUM_BIGCHUNK(buf, offset, sum, t0, t1 21 ldd [buf + offset + 0x00], t0; 22 ldd [buf + offset + 0x08], t2; 23 addxcc t0, sum, sum; 24 addxcc t1, sum, sum; 25 ldd [buf + offset + 0x10], t4; 26 addxcc t2, sum, sum; 27 addxcc t3, sum, sum; 28 ldd [buf + offset + 0x18], t0; 29 addxcc t4, sum, sum; 30 addxcc t5, sum, sum; 31 addxcc t0, sum, sum; 32 addxcc t1, sum, sum; 33 34 #define CSUM_LASTCHUNK(buf, offset, sum, t0, t 35 ldd [buf - offset - 0x08], t0; 36 ldd [buf - offset - 0x00], t2; 37 addxcc t0, sum, sum; 38 addxcc t1, sum, sum; 39 addxcc t2, sum, sum; 40 addxcc t3, sum, sum; 41 42 /* Do end cruft out of band to get bet 43 csum_partial_end_cruft: 44 be 1f 45 andcc %o1, 4, %g0 46 ldd [%o0], %g2 47 addcc %g2, %o2, %o2 48 addxcc %g3, %o2, %o2 49 add %o0, 8, %o0 50 addx %g0, %o2, %o2 51 andcc %o1, 4, %g0 52 1: be 1f 53 andcc %o1, 3, %o1 54 ld [%o0], %g2 55 addcc %g2, %o2, %o2 56 add %o0, 4, %o0 57 addx %g0, %o2, %o2 58 andcc %o1, 3, %g0 59 1: be 1f 60 addcc %o1, -1, %g0 61 bne 2f 62 subcc %o1, 2, %o1 63 b 4f 64 or %g0, %g0, %o4 65 2: lduh [%o0], %o4 66 be 6f 67 add %o0, 2, %o0 68 sll %o4, 16, %o4 69 4: ldub [%o0], %o5 70 sll %o5, 8, %o5 71 or %o5, %o4, %o4 72 6: addcc %o4, %o2, %o2 73 1: retl 74 addx %g0, %o2, %o0 75 76 /* Also do alignment out of band to ge 77 csum_partial_fix_alignment: 78 cmp %o1, 6 79 bl cpte - 0x4 80 andcc %o0, 0x2, %g0 81 be 1f 82 andcc %o0, 0x4, %g0 83 lduh [%o0 + 0x00], %g2 84 sub %o1, 2, %o1 85 add %o0, 2, %o0 86 sll %g2, 16, %g2 87 addcc %g2, %o2, %o2 88 srl %o2, 16, %g3 89 addx %g0, %g3, %g2 90 sll %o2, 16, %o2 91 sll %g2, 16, %g3 92 srl %o2, 16, %o2 93 andcc %o0, 0x4, %g0 94 or %g3, %o2, %o2 95 1: be cpa 96 andcc %o1, 0xffffff80, %o3 97 ld [%o0 + 0x00], %g2 98 sub %o1, 4, %o1 99 addcc %g2, %o2, %o2 100 add %o0, 4, %o0 101 addx %g0, %o2, %o2 102 b cpa 103 andcc %o1, 0xffffff80, %o3 104 105 /* The common case is to get called wi 106 * buffer of size 0x20. Follow the co 107 */ 108 .globl csum_partial 109 EXPORT_SYMBOL(csum_partial) 110 csum_partial: /* %o0=buf, %o 111 andcc %o0, 0x7, %g0 112 bne csum_partial_fix_alignment 113 sethi %hi(cpte - 8), %g7 114 andcc %o1, 0xffffff80, %o3 115 cpa: be 3f 116 andcc %o1, 0x70, %g1 117 5: CSUM_BIGCHUNK(%o0, 0x00, %o2, %o4, %o5 118 CSUM_BIGCHUNK(%o0, 0x20, %o2, %o4, %o5 119 CSUM_BIGCHUNK(%o0, 0x40, %o2, %o4, %o5 120 CSUM_BIGCHUNK(%o0, 0x60, %o2, %o4, %o5 121 addx %g0, %o2, %o2 122 subcc %o3, 128, %o3 123 bne 5b 124 add %o0, 128, %o0 125 andcc %o1, 0x70, %g1 126 3: be cpte 127 andcc %o1, 0xf, %g0 128 srl %g1, 1, %o4 129 sub %g7, %g1, %g7 130 sub %g7, %o4, %g7 131 jmp %g7 + %lo(cpte - 8) 132 add %o0, %g1, %o0 133 cptbl: CSUM_LASTCHUNK(%o0, 0x68, %o2, %g2, %g 134 CSUM_LASTCHUNK(%o0, 0x58, %o2, %g2, %g 135 CSUM_LASTCHUNK(%o0, 0x48, %o2, %g2, %g 136 CSUM_LASTCHUNK(%o0, 0x38, %o2, %g2, %g 137 CSUM_LASTCHUNK(%o0, 0x28, %o2, %g2, %g 138 CSUM_LASTCHUNK(%o0, 0x18, %o2, %g2, %g 139 CSUM_LASTCHUNK(%o0, 0x08, %o2, %g2, %g 140 addx %g0, %o2, %o2 141 andcc %o1, 0xf, %g0 142 cpte: bne csum_partial_end_cruft 143 andcc %o1, 8, %g0 144 cpout: retl 145 mov %o2, %o0 146 147 /* Work around cpp -rob */ 148 #define ALLOC #alloc 149 #define EXECINSTR #execinstr 150 #define EX(x,y) 151 98: x,y; 152 .section __ex_table,ALLOC; 153 .align 4; 154 .word 98b, cc_fault; 155 .text; 156 .align 4 157 158 /* This aligned version executes typic 159 * is the best I can do. I say 8.5 be 160 * the next ldd in the main unrolled l 161 * If you change these macros (includi 162 * please check the fixup code below a 163 */ 164 #define CSUMCOPY_BIGCHUNK_ALIGNED(src, dst, su 165 EX(ldd [src + off + 0x00], t0); 166 EX(ldd [src + off + 0x08], t2); 167 addxcc t0, sum, sum; 168 EX(ldd [src + off + 0x10], t4); 169 addxcc t1, sum, sum; 170 EX(ldd [src + off + 0x18], t6); 171 addxcc t2, sum, sum; 172 EX(std t0, [dst + off + 0x00]); 173 addxcc t3, sum, sum; 174 EX(std t2, [dst + off + 0x08]); 175 addxcc t4, sum, sum; 176 EX(std t4, [dst + off + 0x10]); 177 addxcc t5, sum, sum; 178 EX(std t6, [dst + off + 0x18]); 179 addxcc t6, sum, sum; 180 addxcc t7, sum, sum; 181 182 /* 12 superscalar cycles seems to be t 183 * because of this we thus do all the 184 * Viking MXCC into streaming mode. H 185 */ 186 #define CSUMCOPY_BIGCHUNK(src, dst, sum, off, 187 EX(ldd [src + off + 0x00], t0); 188 EX(ldd [src + off + 0x08], t2); 189 EX(ldd [src + off + 0x10], t4); 190 EX(ldd [src + off + 0x18], t6); 191 EX(st t0, [dst + off + 0x00]); 192 addxcc t0, sum, sum; 193 EX(st t1, [dst + off + 0x04]); 194 addxcc t1, sum, sum; 195 EX(st t2, [dst + off + 0x08]); 196 addxcc t2, sum, sum; 197 EX(st t3, [dst + off + 0x0c]); 198 addxcc t3, sum, sum; 199 EX(st t4, [dst + off + 0x10]); 200 addxcc t4, sum, sum; 201 EX(st t5, [dst + off + 0x14]); 202 addxcc t5, sum, sum; 203 EX(st t6, [dst + off + 0x18]); 204 addxcc t6, sum, sum; 205 EX(st t7, [dst + off + 0x1c]); 206 addxcc t7, sum, sum; 207 208 /* Yuck, 6 superscalar cycles... */ 209 #define CSUMCOPY_LASTCHUNK(src, dst, sum, off, 210 EX(ldd [src - off - 0x08], t0); 211 EX(ldd [src - off - 0x00], t2); 212 addxcc t0, sum, sum; 213 EX(st t0, [dst - off - 0x08]); 214 addxcc t1, sum, sum; 215 EX(st t1, [dst - off - 0x04]); 216 addxcc t2, sum, sum; 217 EX(st t2, [dst - off - 0x00]); 218 addxcc t3, sum, sum; 219 EX(st t3, [dst - off + 0x04]); 220 221 /* Handle the end cruft code out of ba 222 cc_end_cruft: 223 be 1f 224 andcc %o3, 4, %g0 225 EX(ldd [%o0 + 0x00], %g2) 226 add %o1, 8, %o1 227 addcc %g2, %g7, %g7 228 add %o0, 8, %o0 229 addxcc %g3, %g7, %g7 230 EX(st %g2, [%o1 - 0x08]) 231 addx %g0, %g7, %g7 232 andcc %o3, 4, %g0 233 EX(st %g3, [%o1 - 0x04]) 234 1: be 1f 235 andcc %o3, 3, %o3 236 EX(ld [%o0 + 0x00], %g2) 237 add %o1, 4, %o1 238 addcc %g2, %g7, %g7 239 EX(st %g2, [%o1 - 0x04]) 240 addx %g0, %g7, %g7 241 andcc %o3, 3, %g0 242 add %o0, 4, %o0 243 1: be 1f 244 addcc %o3, -1, %g0 245 bne 2f 246 subcc %o3, 2, %o3 247 b 4f 248 or %g0, %g0, %o4 249 2: EX(lduh [%o0 + 0x00], %o4) 250 add %o0, 2, %o0 251 EX(sth %o4, [%o1 + 0x00]) 252 be 6f 253 add %o1, 2, %o1 254 sll %o4, 16, %o4 255 4: EX(ldub [%o0 + 0x00], %o5) 256 EX(stb %o5, [%o1 + 0x00]) 257 sll %o5, 8, %o5 258 or %o5, %o4, %o4 259 6: addcc %o4, %g7, %g7 260 1: retl 261 addx %g0, %g7, %o0 262 263 /* Also, handle the alignment code out 264 cc_dword_align: 265 cmp %g1, 16 266 bge 1f 267 srl %g1, 1, %o3 268 2: cmp %o3, 0 269 be,a ccte 270 andcc %g1, 0xf, %o3 271 andcc %o3, %o0, %g0 ! Check %o0 on 272 be,a 2b 273 srl %o3, 1, %o3 274 1: andcc %o0, 0x1, %g0 275 bne ccslow 276 andcc %o0, 0x2, %g0 277 be 1f 278 andcc %o0, 0x4, %g0 279 EX(lduh [%o0 + 0x00], %g4) 280 sub %g1, 2, %g1 281 EX(sth %g4, [%o1 + 0x00]) 282 add %o0, 2, %o0 283 sll %g4, 16, %g4 284 addcc %g4, %g7, %g7 285 add %o1, 2, %o1 286 srl %g7, 16, %g3 287 addx %g0, %g3, %g4 288 sll %g7, 16, %g7 289 sll %g4, 16, %g3 290 srl %g7, 16, %g7 291 andcc %o0, 0x4, %g0 292 or %g3, %g7, %g7 293 1: be 3f 294 andcc %g1, 0xffffff80, %g0 295 EX(ld [%o0 + 0x00], %g4) 296 sub %g1, 4, %g1 297 EX(st %g4, [%o1 + 0x00]) 298 add %o0, 4, %o0 299 addcc %g4, %g7, %g7 300 add %o1, 4, %o1 301 addx %g0, %g7, %g7 302 b 3f 303 andcc %g1, 0xffffff80, %g0 304 305 /* Sun, you just can't beat me, you ju 306 * give up. I'm serious, I am going t 307 * out of you, game over, lights out. 308 */ 309 .align 8 310 .globl __csum_partial_copy_sparc_gene 311 EXPORT_SYMBOL(__csum_partial_copy_spar 312 __csum_partial_copy_sparc_generic: 313 /* %o0 314 xor %o0, %o1, %o4 ! get 315 andcc %o4, 3, %g0 ! chec 316 bne ccslow ! bett 317 andcc %o0, 7, %g0 ! need 318 bne cc_dword_align ! yes, 319 andcc %g1, 0xffffff80, %g0 ! can 320 3: be 3f ! nope 321 andcc %o1, 4, %g0 ! dest 322 be ccdbl + 4 ! 8 by 323 5: CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x00,%o4 324 CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4 325 CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4 326 CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4 327 sub %g1, 128, %g1 ! detr 328 addx %g0, %g7, %g7 ! add 329 andcc %g1, 0xffffff80, %g0 ! more 330 add %o0, 128, %o0 ! adva 331 bne 5b ! we d 332 add %o1, 128, %o1 ! adva 333 3: andcc %g1, 0x70, %o2 ! can 334 ccmerge:be ccte ! nope 335 andcc %g1, 0xf, %o3 ! get 336 srl %o2, 1, %o4 ! begi 337 sethi %hi(12f), %o5 ! set 338 add %o0, %o2, %o0 ! adva 339 sub %o5, %o4, %o5 ! cont 340 sll %o2, 1, %g2 ! cons 341 sub %o5, %g2, %o5 ! some 342 jmp %o5 + %lo(12f) ! jump 343 add %o1, %o2, %o1 ! adva 344 cctbl: CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x68,%g 345 CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x58,%g 346 CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x48,%g 347 CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x38,%g 348 CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g 349 CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g 350 CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g 351 12: addx %g0, %g7, %g7 352 andcc %o3, 0xf, %g0 ! chec 353 ccte: bne cc_end_cruft ! some 354 andcc %o3, 8, %g0 ! begi 355 retl ! retu 356 mov %g7, %o0 ! give 357 ccdbl: CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7, 358 CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7, 359 CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7, 360 CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7, 361 sub %g1, 128, %g1 ! detr 362 addx %g0, %g7, %g7 ! add 363 andcc %g1, 0xffffff80, %g0 ! more 364 add %o0, 128, %o0 ! adva 365 bne ccdbl ! we d 366 add %o1, 128, %o1 ! adva 367 b ccmerge ! fini 368 andcc %g1, 0x70, %o2 ! can 369 370 ccslow: cmp %g1, 0 371 mov 0, %g5 372 bleu 4f 373 andcc %o0, 1, %o5 374 be,a 1f 375 srl %g1, 1, %g4 376 sub %g1, 1, %g1 377 EX(ldub [%o0], %g5) 378 add %o0, 1, %o0 379 EX(stb %g5, [%o1]) 380 srl %g1, 1, %g4 381 add %o1, 1, %o1 382 1: cmp %g4, 0 383 be,a 3f 384 andcc %g1, 1, %g0 385 andcc %o0, 2, %g0 386 be,a 1f 387 srl %g4, 1, %g4 388 EX(lduh [%o0], %o4) 389 sub %g1, 2, %g1 390 srl %o4, 8, %g2 391 sub %g4, 1, %g4 392 EX(stb %g2, [%o1]) 393 add %o4, %g5, %g5 394 EX(stb %o4, [%o1 + 1]) 395 add %o0, 2, %o0 396 srl %g4, 1, %g4 397 add %o1, 2, %o1 398 1: cmp %g4, 0 399 be,a 2f 400 andcc %g1, 2, %g0 401 EX(ld [%o0], %o4) 402 5: srl %o4, 24, %g2 403 srl %o4, 16, %g3 404 EX(stb %g2, [%o1]) 405 srl %o4, 8, %g2 406 EX(stb %g3, [%o1 + 1]) 407 add %o0, 4, %o0 408 EX(stb %g2, [%o1 + 2]) 409 addcc %o4, %g5, %g5 410 EX(stb %o4, [%o1 + 3]) 411 addx %g5, %g0, %g5 ! I am now to 412 add %o1, 4, %o1 ! is worthy). 413 subcc %g4, 1, %g4 ! tricks 414 bne,a 5b 415 EX(ld [%o0], %o4) 416 sll %g5, 16, %g2 417 srl %g5, 16, %g5 418 srl %g2, 16, %g2 419 andcc %g1, 2, %g0 420 add %g2, %g5, %g5 421 2: be,a 3f 422 andcc %g1, 1, %g0 423 EX(lduh [%o0], %o4) 424 andcc %g1, 1, %g0 425 srl %o4, 8, %g2 426 add %o0, 2, %o0 427 EX(stb %g2, [%o1]) 428 add %g5, %o4, %g5 429 EX(stb %o4, [%o1 + 1]) 430 add %o1, 2, %o1 431 3: be,a 1f 432 sll %g5, 16, %o4 433 EX(ldub [%o0], %g2) 434 sll %g2, 8, %o4 435 EX(stb %g2, [%o1]) 436 add %g5, %o4, %g5 437 sll %g5, 16, %o4 438 1: addcc %o4, %g5, %g5 439 srl %g5, 16, %o4 440 addx %g0, %o4, %g5 441 orcc %o5, %g0, %g0 442 be 4f 443 srl %g5, 8, %o4 444 and %g5, 0xff, %g2 445 and %o4, 0xff, %o4 446 sll %g2, 8, %g2 447 or %g2, %o4, %g5 448 4: addcc %g7, %g5, %g7 449 retl 450 addx %g0, %g7, %o0 451 452 /* We do these strange calculations for the cs 453 * we only bother with faults on loads... */ 454 455 cc_fault: 456 retl 457 clr %o0
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.