1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 3 * This file contains assembly-language implementations 4 * of IP-style 1's complement checksum routines. 5 * 6 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 7 * 8 * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). 9 */ 10 11 #include <linux/export.h> 12 #include <linux/sys.h> 13 #include <asm/processor.h> 14 #include <asm/cache.h> 15 #include <asm/errno.h> 16 #include <asm/ppc_asm.h> 17 18 .text 19 20 /* 21 * computes the checksum of a memory block at buff, length len, 22 * and adds in "sum" (32-bit) 23 * 24 * __csum_partial(buff, len, sum) 25 */ 26 _GLOBAL(__csum_partial) 27 subi r3,r3,4 28 srawi. r6,r4,2 /* Divide len by 4 and also clear carry */ 29 beq 3f /* if we're doing < 4 bytes */ 30 andi. r0,r3,2 /* Align buffer to longword boundary */ 31 beq+ 1f 32 lhz r0,4(r3) /* do 2 bytes to get aligned */ 33 subi r4,r4,2 34 addi r3,r3,2 35 srwi. r6,r4,2 /* # words to do */ 36 adde r5,r5,r0 37 beq 3f 38 1: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */ 39 beq 21f 40 mtctr r6 41 2: lwzu r0,4(r3) 42 adde r5,r5,r0 43 bdnz 2b 44 21: srwi. r6,r4,4 /* # blocks of 4 words to do */ 45 beq 3f 46 lwz r0,4(r3) 47 mtctr r6 48 lwz r6,8(r3) 49 adde r5,r5,r0 50 lwz r7,12(r3) 51 adde r5,r5,r6 52 lwzu r8,16(r3) 53 adde r5,r5,r7 54 bdz 23f 55 22: lwz r0,4(r3) 56 adde r5,r5,r8 57 lwz r6,8(r3) 58 adde r5,r5,r0 59 lwz r7,12(r3) 60 adde r5,r5,r6 61 lwzu r8,16(r3) 62 adde r5,r5,r7 63 bdnz 22b 64 23: adde r5,r5,r8 65 3: andi. r0,r4,2 66 beq+ 4f 67 lhz r0,4(r3) 68 addi r3,r3,2 69 adde r5,r5,r0 70 4: andi. r0,r4,1 71 beq+ 5f 72 lbz r0,4(r3) 73 slwi r0,r0,8 /* Upper byte of word */ 74 adde r5,r5,r0 75 5: addze r3,r5 /* add in final carry */ 76 blr 77 EXPORT_SYMBOL(__csum_partial) 78 79 /* 80 * Computes the checksum of a memory block at src, length len, 81 * and adds in 0xffffffff, while copying the block to dst. 82 * If an access exception occurs it returns zero. 83 * 84 * csum_partial_copy_generic(src, dst, len) 85 */ 86 #define CSUM_COPY_16_BYTES_WITHEX(n) \ 87 8 ## n ## 0: \ 88 lwz r7,4(r4); \ 89 8 ## n ## 1: \ 90 lwz r8,8(r4); \ 91 8 ## n ## 2: \ 92 lwz r9,12(r4); \ 93 8 ## n ## 3: \ 94 lwzu r10,16(r4); \ 95 8 ## n ## 4: \ 96 stw r7,4(r6); \ 97 adde r12,r12,r7; \ 98 8 ## n ## 5: \ 99 stw r8,8(r6); \ 100 adde r12,r12,r8; \ 101 8 ## n ## 6: \ 102 stw r9,12(r6); \ 103 adde r12,r12,r9; \ 104 8 ## n ## 7: \ 105 stwu r10,16(r6); \ 106 adde r12,r12,r10 107 108 #define CSUM_COPY_16_BYTES_EXCODE(n) \ 109 EX_TABLE(8 ## n ## 0b, fault); \ 110 EX_TABLE(8 ## n ## 1b, fault); \ 111 EX_TABLE(8 ## n ## 2b, fault); \ 112 EX_TABLE(8 ## n ## 3b, fault); \ 113 EX_TABLE(8 ## n ## 4b, fault); \ 114 EX_TABLE(8 ## n ## 5b, fault); \ 115 EX_TABLE(8 ## n ## 6b, fault); \ 116 EX_TABLE(8 ## n ## 7b, fault); 117 118 .text 119 120 CACHELINE_BYTES = L1_CACHE_BYTES 121 LG_CACHELINE_BYTES = L1_CACHE_SHIFT 122 CACHELINE_MASK = (L1_CACHE_BYTES-1) 123 124 _GLOBAL(csum_partial_copy_generic) 125 li r12,-1 126 addic r0,r0,0 /* clear carry */ 127 addi r6,r4,-4 128 neg r0,r4 129 addi r4,r3,-4 130 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 131 crset 4*cr7+eq 132 beq 58f 133 134 cmplw 0,r5,r0 /* is this more than total to do? */ 135 blt 63f /* if not much to do */ 136 rlwinm r7,r6,3,0x8 137 rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */ 138 cmplwi cr7,r7,0 /* is destination address even ? */ 139 andi. r8,r0,3 /* get it word-aligned first */ 140 mtctr r8 141 beq+ 61f 142 li r3,0 143 70: lbz r9,4(r4) /* do some bytes */ 144 addi r4,r4,1 145 slwi r3,r3,8 146 rlwimi r3,r9,0,24,31 147 71: stb r9,4(r6) 148 addi r6,r6,1 149 bdnz 70b 150 adde r12,r12,r3 151 61: subf r5,r0,r5 152 srwi. r0,r0,2 153 mtctr r0 154 beq 58f 155 72: lwzu r9,4(r4) /* do some words */ 156 adde r12,r12,r9 157 73: stwu r9,4(r6) 158 bdnz 72b 159 160 58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 161 clrlwi r5,r5,32-LG_CACHELINE_BYTES 162 li r11,4 163 beq 63f 164 165 /* Here we decide how far ahead to prefetch the source */ 166 li r3,4 167 cmpwi r0,1 168 li r7,0 169 ble 114f 170 li r7,1 171 #if MAX_COPY_PREFETCH > 1 172 /* Heuristically, for large transfers we prefetch 173 MAX_COPY_PREFETCH cachelines ahead. For small transfers 174 we prefetch 1 cacheline ahead. */ 175 cmpwi r0,MAX_COPY_PREFETCH 176 ble 112f 177 li r7,MAX_COPY_PREFETCH 178 112: mtctr r7 179 111: dcbt r3,r4 180 addi r3,r3,CACHELINE_BYTES 181 bdnz 111b 182 #else 183 dcbt r3,r4 184 addi r3,r3,CACHELINE_BYTES 185 #endif /* MAX_COPY_PREFETCH > 1 */ 186 187 114: subf r8,r7,r0 188 mr r0,r7 189 mtctr r8 190 191 53: dcbt r3,r4 192 54: dcbz r11,r6 193 /* the main body of the cacheline loop */ 194 CSUM_COPY_16_BYTES_WITHEX(0) 195 #if L1_CACHE_BYTES >= 32 196 CSUM_COPY_16_BYTES_WITHEX(1) 197 #if L1_CACHE_BYTES >= 64 198 CSUM_COPY_16_BYTES_WITHEX(2) 199 CSUM_COPY_16_BYTES_WITHEX(3) 200 #if L1_CACHE_BYTES >= 128 201 CSUM_COPY_16_BYTES_WITHEX(4) 202 CSUM_COPY_16_BYTES_WITHEX(5) 203 CSUM_COPY_16_BYTES_WITHEX(6) 204 CSUM_COPY_16_BYTES_WITHEX(7) 205 #endif 206 #endif 207 #endif 208 bdnz 53b 209 cmpwi r0,0 210 li r3,4 211 li r7,0 212 bne 114b 213 214 63: srwi. r0,r5,2 215 mtctr r0 216 beq 64f 217 30: lwzu r0,4(r4) 218 adde r12,r12,r0 219 31: stwu r0,4(r6) 220 bdnz 30b 221 222 64: andi. r0,r5,2 223 beq+ 65f 224 40: lhz r0,4(r4) 225 addi r4,r4,2 226 41: sth r0,4(r6) 227 adde r12,r12,r0 228 addi r6,r6,2 229 65: andi. r0,r5,1 230 beq+ 66f 231 50: lbz r0,4(r4) 232 51: stb r0,4(r6) 233 slwi r0,r0,8 234 adde r12,r12,r0 235 66: addze r3,r12 236 beqlr+ cr7 237 rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */ 238 blr 239 240 fault: 241 li r3,0 242 blr 243 244 EX_TABLE(70b, fault); 245 EX_TABLE(71b, fault); 246 EX_TABLE(72b, fault); 247 EX_TABLE(73b, fault); 248 EX_TABLE(54b, fault); 249 250 /* 251 * this stuff handles faults in the cacheline loop and branches to either 252 * fault (if in read part) or fault (if in write part) 253 */ 254 CSUM_COPY_16_BYTES_EXCODE(0) 255 #if L1_CACHE_BYTES >= 32 256 CSUM_COPY_16_BYTES_EXCODE(1) 257 #if L1_CACHE_BYTES >= 64 258 CSUM_COPY_16_BYTES_EXCODE(2) 259 CSUM_COPY_16_BYTES_EXCODE(3) 260 #if L1_CACHE_BYTES >= 128 261 CSUM_COPY_16_BYTES_EXCODE(4) 262 CSUM_COPY_16_BYTES_EXCODE(5) 263 CSUM_COPY_16_BYTES_EXCODE(6) 264 CSUM_COPY_16_BYTES_EXCODE(7) 265 #endif 266 #endif 267 #endif 268 269 EX_TABLE(30b, fault); 270 EX_TABLE(31b, fault); 271 EX_TABLE(40b, fault); 272 EX_TABLE(41b, fault); 273 EX_TABLE(50b, fault); 274 EX_TABLE(51b, fault); 275 276 EXPORT_SYMBOL(csum_partial_copy_generic) 277 278 /* 279 * __sum16 csum_ipv6_magic(const struct in6_addr *saddr, 280 * const struct in6_addr *daddr, 281 * __u32 len, __u8 proto, __wsum sum) 282 */ 283 284 _GLOBAL(csum_ipv6_magic) 285 lwz r8, 0(r3) 286 lwz r9, 4(r3) 287 addc r0, r7, r8 288 lwz r10, 8(r3) 289 adde r0, r0, r9 290 lwz r11, 12(r3) 291 adde r0, r0, r10 292 lwz r8, 0(r4) 293 adde r0, r0, r11 294 lwz r9, 4(r4) 295 adde r0, r0, r8 296 lwz r10, 8(r4) 297 adde r0, r0, r9 298 lwz r11, 12(r4) 299 adde r0, r0, r10 300 add r5, r5, r6 /* assumption: len + proto doesn't carry */ 301 adde r0, r0, r11 302 adde r0, r0, r5 303 addze r0, r0 304 rotlwi r3, r0, 16 305 add r3, r0, r3 306 not r3, r3 307 rlwinm r3, r3, 16, 16, 31 308 blr 309 EXPORT_SYMBOL(csum_ipv6_magic)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.