1 /* SPDX-License-Identifier: GPL-2.0 */ 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* checksum.S: Sparc V9 optimized checksum cod 2 /* checksum.S: Sparc V9 optimized checksum code. 3 * 3 * 4 * Copyright(C) 1995 Linus Torvalds 4 * Copyright(C) 1995 Linus Torvalds 5 * Copyright(C) 1995 Miguel de Icaza 5 * Copyright(C) 1995 Miguel de Icaza 6 * Copyright(C) 1996, 2000 David S. Miller 6 * Copyright(C) 1996, 2000 David S. Miller 7 * Copyright(C) 1997 Jakub Jelinek 7 * Copyright(C) 1997 Jakub Jelinek 8 * 8 * 9 * derived from: 9 * derived from: 10 * Linux/Alpha checksum c-code 10 * Linux/Alpha checksum c-code 11 * Linux/ix86 inline checksum assembly 11 * Linux/ix86 inline checksum assembly 12 * RFC1071 Computing the Internet Checksu 12 * RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code) 13 * David Mosberger-Tang for optimized ref 13 * David Mosberger-Tang for optimized reference c-code 14 * BSD4.4 portable checksum routine 14 * BSD4.4 portable checksum routine 15 */ 15 */ 16 16 17 #include <linux/export.h> 17 #include <linux/export.h> 18 .text 18 .text 19 19 20 csum_partial_fix_alignment: 20 csum_partial_fix_alignment: 21 /* We checked for zero length already, 21 /* We checked for zero length already, so there must be 22 * at least one byte. 22 * at least one byte. 23 */ 23 */ 24 be,pt %icc, 1f 24 be,pt %icc, 1f 25 nop 25 nop 26 ldub [%o0 + 0x00], %o4 26 ldub [%o0 + 0x00], %o4 27 add %o0, 1, %o0 27 add %o0, 1, %o0 28 sub %o1, 1, %o1 28 sub %o1, 1, %o1 29 1: andcc %o0, 0x2, %g0 29 1: andcc %o0, 0x2, %g0 30 be,pn %icc, csum_partial_pos 30 be,pn %icc, csum_partial_post_align 31 cmp %o1, 2 31 cmp %o1, 2 32 blu,pn %icc, csum_partial_end 32 blu,pn %icc, csum_partial_end_cruft 33 nop 33 nop 34 lduh [%o0 + 0x00], %o5 34 lduh [%o0 + 0x00], %o5 35 add %o0, 2, %o0 35 add %o0, 2, %o0 36 sub %o1, 2, %o1 36 sub %o1, 2, %o1 37 ba,pt %xcc, csum_partial_pos 37 ba,pt %xcc, csum_partial_post_align 38 add %o5, %o4, %o4 38 add %o5, %o4, %o4 39 39 40 .align 32 40 .align 32 41 .globl csum_partial 41 .globl csum_partial 42 .type csum_partial,#function 42 .type csum_partial,#function 43 EXPORT_SYMBOL(csum_partial) 43 EXPORT_SYMBOL(csum_partial) 44 csum_partial: /* %o0=buff, %o1=len, 44 csum_partial: /* %o0=buff, %o1=len, %o2=sum */ 45 prefetch [%o0 + 0x000], #n_read 45 prefetch [%o0 + 0x000], #n_reads 46 clr %o4 46 clr %o4 47 prefetch [%o0 + 0x040], #n_read 47 prefetch [%o0 + 0x040], #n_reads 48 brz,pn %o1, csum_partial_fini 48 brz,pn %o1, csum_partial_finish 49 andcc %o0, 0x3, %g0 49 andcc %o0, 0x3, %g0 50 50 51 /* We "remember" whether the lowest bi 51 /* We "remember" whether the lowest bit in the address 52 * was set in %g7. Because if it is, 52 * was set in %g7. Because if it is, we have to swap 53 * upper and lower 8 bit fields of the 53 * upper and lower 8 bit fields of the sum we calculate. 54 */ 54 */ 55 bne,pn %icc, csum_partial_fix 55 bne,pn %icc, csum_partial_fix_alignment 56 andcc %o0, 0x1, %g7 56 andcc %o0, 0x1, %g7 57 57 58 csum_partial_post_align: 58 csum_partial_post_align: 59 prefetch [%o0 + 0x080], #n_read 59 prefetch [%o0 + 0x080], #n_reads 60 andncc %o1, 0x3f, %o3 60 andncc %o1, 0x3f, %o3 61 61 62 prefetch [%o0 + 0x0c0], #n_read 62 prefetch [%o0 + 0x0c0], #n_reads 63 sub %o1, %o3, %o1 63 sub %o1, %o3, %o1 64 brz,pn %o3, 2f 64 brz,pn %o3, 2f 65 prefetch [%o0 + 0x100], #n_read 65 prefetch [%o0 + 0x100], #n_reads 66 66 67 /* So that we don't need to use the no 67 /* So that we don't need to use the non-pairing 68 * add-with-carry instructions we accu 68 * add-with-carry instructions we accumulate 32-bit 69 * values into a 64-bit register. At 69 * values into a 64-bit register. At the end of the 70 * loop we fold it down to 32-bits and 70 * loop we fold it down to 32-bits and so on. 71 */ 71 */ 72 prefetch [%o0 + 0x140], #n_read 72 prefetch [%o0 + 0x140], #n_reads 73 1: lduw [%o0 + 0x00], %o5 73 1: lduw [%o0 + 0x00], %o5 74 lduw [%o0 + 0x04], %g1 74 lduw [%o0 + 0x04], %g1 75 lduw [%o0 + 0x08], %g2 75 lduw [%o0 + 0x08], %g2 76 add %o4, %o5, %o4 76 add %o4, %o5, %o4 77 lduw [%o0 + 0x0c], %g3 77 lduw [%o0 + 0x0c], %g3 78 add %o4, %g1, %o4 78 add %o4, %g1, %o4 79 lduw [%o0 + 0x10], %o5 79 lduw [%o0 + 0x10], %o5 80 add %o4, %g2, %o4 80 add %o4, %g2, %o4 81 lduw [%o0 + 0x14], %g1 81 lduw [%o0 + 0x14], %g1 82 add %o4, %g3, %o4 82 add %o4, %g3, %o4 83 lduw [%o0 + 0x18], %g2 83 lduw [%o0 + 0x18], %g2 84 add %o4, %o5, %o4 84 add %o4, %o5, %o4 85 lduw [%o0 + 0x1c], %g3 85 lduw [%o0 + 0x1c], %g3 86 add %o4, %g1, %o4 86 add %o4, %g1, %o4 87 lduw [%o0 + 0x20], %o5 87 lduw [%o0 + 0x20], %o5 88 add %o4, %g2, %o4 88 add %o4, %g2, %o4 89 lduw [%o0 + 0x24], %g1 89 lduw [%o0 + 0x24], %g1 90 add %o4, %g3, %o4 90 add %o4, %g3, %o4 91 lduw [%o0 + 0x28], %g2 91 lduw [%o0 + 0x28], %g2 92 add %o4, %o5, %o4 92 add %o4, %o5, %o4 93 lduw [%o0 + 0x2c], %g3 93 lduw [%o0 + 0x2c], %g3 94 add %o4, %g1, %o4 94 add %o4, %g1, %o4 95 lduw [%o0 + 0x30], %o5 95 lduw [%o0 + 0x30], %o5 96 add %o4, %g2, %o4 96 add %o4, %g2, %o4 97 lduw [%o0 + 0x34], %g1 97 lduw [%o0 + 0x34], %g1 98 add %o4, %g3, %o4 98 add %o4, %g3, %o4 99 lduw [%o0 + 0x38], %g2 99 lduw [%o0 + 0x38], %g2 100 add %o4, %o5, %o4 100 add %o4, %o5, %o4 101 lduw [%o0 + 0x3c], %g3 101 lduw [%o0 + 0x3c], %g3 102 add %o4, %g1, %o4 102 add %o4, %g1, %o4 103 prefetch [%o0 + 0x180], #n_read 103 prefetch [%o0 + 0x180], #n_reads 104 add %o4, %g2, %o4 104 add %o4, %g2, %o4 105 subcc %o3, 0x40, %o3 105 subcc %o3, 0x40, %o3 106 add %o0, 0x40, %o0 106 add %o0, 0x40, %o0 107 bne,pt %icc, 1b 107 bne,pt %icc, 1b 108 add %o4, %g3, %o4 108 add %o4, %g3, %o4 109 109 110 2: and %o1, 0x3c, %o3 110 2: and %o1, 0x3c, %o3 111 brz,pn %o3, 2f 111 brz,pn %o3, 2f 112 sub %o1, %o3, %o1 112 sub %o1, %o3, %o1 113 1: lduw [%o0 + 0x00], %o5 113 1: lduw [%o0 + 0x00], %o5 114 subcc %o3, 0x4, %o3 114 subcc %o3, 0x4, %o3 115 add %o0, 0x4, %o0 115 add %o0, 0x4, %o0 116 bne,pt %icc, 1b 116 bne,pt %icc, 1b 117 add %o4, %o5, %o4 117 add %o4, %o5, %o4 118 118 119 2: 119 2: 120 /* fold 64-->32 */ 120 /* fold 64-->32 */ 121 srlx %o4, 32, %o5 121 srlx %o4, 32, %o5 122 srl %o4, 0, %o4 122 srl %o4, 0, %o4 123 add %o4, %o5, %o4 123 add %o4, %o5, %o4 124 srlx %o4, 32, %o5 124 srlx %o4, 32, %o5 125 srl %o4, 0, %o4 125 srl %o4, 0, %o4 126 add %o4, %o5, %o4 126 add %o4, %o5, %o4 127 127 128 /* fold 32-->16 */ 128 /* fold 32-->16 */ 129 sethi %hi(0xffff0000), %g1 129 sethi %hi(0xffff0000), %g1 130 srl %o4, 16, %o5 130 srl %o4, 16, %o5 131 andn %o4, %g1, %g2 131 andn %o4, %g1, %g2 132 add %o5, %g2, %o4 132 add %o5, %g2, %o4 133 srl %o4, 16, %o5 133 srl %o4, 16, %o5 134 andn %o4, %g1, %g2 134 andn %o4, %g1, %g2 135 add %o5, %g2, %o4 135 add %o5, %g2, %o4 136 136 137 csum_partial_end_cruft: 137 csum_partial_end_cruft: 138 /* %o4 has the 16-bit sum we have calc 138 /* %o4 has the 16-bit sum we have calculated so-far. */ 139 cmp %o1, 2 139 cmp %o1, 2 140 blu,pt %icc, 1f 140 blu,pt %icc, 1f 141 nop 141 nop 142 lduh [%o0 + 0x00], %o5 142 lduh [%o0 + 0x00], %o5 143 sub %o1, 2, %o1 143 sub %o1, 2, %o1 144 add %o0, 2, %o0 144 add %o0, 2, %o0 145 add %o4, %o5, %o4 145 add %o4, %o5, %o4 146 1: brz,pt %o1, 1f 146 1: brz,pt %o1, 1f 147 nop 147 nop 148 ldub [%o0 + 0x00], %o5 148 ldub [%o0 + 0x00], %o5 149 sub %o1, 1, %o1 149 sub %o1, 1, %o1 150 add %o0, 1, %o0 150 add %o0, 1, %o0 151 sllx %o5, 8, %o5 151 sllx %o5, 8, %o5 152 add %o4, %o5, %o4 152 add %o4, %o5, %o4 153 1: 153 1: 154 /* fold 32-->16 */ 154 /* fold 32-->16 */ 155 sethi %hi(0xffff0000), %g1 155 sethi %hi(0xffff0000), %g1 156 srl %o4, 16, %o5 156 srl %o4, 16, %o5 157 andn %o4, %g1, %g2 157 andn %o4, %g1, %g2 158 add %o5, %g2, %o4 158 add %o5, %g2, %o4 159 srl %o4, 16, %o5 159 srl %o4, 16, %o5 160 andn %o4, %g1, %g2 160 andn %o4, %g1, %g2 161 add %o5, %g2, %o4 161 add %o5, %g2, %o4 162 162 163 1: brz,pt %g7, 1f 163 1: brz,pt %g7, 1f 164 nop 164 nop 165 165 166 /* We started with an odd byte, byte-s 166 /* We started with an odd byte, byte-swap the result. */ 167 srl %o4, 8, %o5 167 srl %o4, 8, %o5 168 and %o4, 0xff, %g1 168 and %o4, 0xff, %g1 169 sll %g1, 8, %g1 169 sll %g1, 8, %g1 170 or %o5, %g1, %o4 170 or %o5, %g1, %o4 171 171 172 1: addcc %o2, %o4, %o2 172 1: addcc %o2, %o4, %o2 173 addc %g0, %o2, %o2 173 addc %g0, %o2, %o2 174 174 175 csum_partial_finish: 175 csum_partial_finish: 176 retl 176 retl 177 srl %o2, 0, %o0 177 srl %o2, 0, %o0
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.