~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/riscv/lib/csum.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * Checksum library
  4  *
  5  * Influenced by arch/arm64/lib/csum.c
  6  * Copyright (C) 2023-2024 Rivos Inc.
  7  */
  8 #include <linux/bitops.h>
  9 #include <linux/compiler.h>
 10 #include <linux/jump_label.h>
 11 #include <linux/kasan-checks.h>
 12 #include <linux/kernel.h>
 13 
 14 #include <asm/cpufeature.h>
 15 
 16 #include <net/checksum.h>
 17 
 18 /* Default version is sufficient for 32 bit */
 19 #ifndef CONFIG_32BIT
 20 __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 21                         const struct in6_addr *daddr,
 22                         __u32 len, __u8 proto, __wsum csum)
 23 {
 24         unsigned int ulen, uproto;
 25         unsigned long sum = (__force unsigned long)csum;
 26 
 27         sum += (__force unsigned long)saddr->s6_addr32[0];
 28         sum += (__force unsigned long)saddr->s6_addr32[1];
 29         sum += (__force unsigned long)saddr->s6_addr32[2];
 30         sum += (__force unsigned long)saddr->s6_addr32[3];
 31 
 32         sum += (__force unsigned long)daddr->s6_addr32[0];
 33         sum += (__force unsigned long)daddr->s6_addr32[1];
 34         sum += (__force unsigned long)daddr->s6_addr32[2];
 35         sum += (__force unsigned long)daddr->s6_addr32[3];
 36 
 37         ulen = (__force unsigned int)htonl((unsigned int)len);
 38         sum += ulen;
 39 
 40         uproto = (__force unsigned int)htonl(proto);
 41         sum += uproto;
 42 
 43         /*
 44          * Zbb support saves 4 instructions, so not worth checking without
 45          * alternatives if supported
 46          */
 47         if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
 48             IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
 49                 unsigned long fold_temp;
 50 
 51                 /*
 52                  * Zbb is likely available when the kernel is compiled with Zbb
 53                  * support, so nop when Zbb is available and jump when Zbb is
 54                  * not available.
 55                  */
 56                 asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
 57                                               RISCV_ISA_EXT_ZBB, 1)
 58                                   :
 59                                   :
 60                                   :
 61                                   : no_zbb);
 62                 asm(".option push                                       \n\
 63                 .option arch,+zbb                                       \n\
 64                         rori    %[fold_temp], %[sum], 32                \n\
 65                         add     %[sum], %[fold_temp], %[sum]            \n\
 66                         srli    %[sum], %[sum], 32                      \n\
 67                         not     %[fold_temp], %[sum]                    \n\
 68                         roriw   %[sum], %[sum], 16                      \n\
 69                         subw    %[sum], %[fold_temp], %[sum]            \n\
 70                 .option pop"
 71                 : [sum] "+r" (sum), [fold_temp] "=&r" (fold_temp));
 72                 return (__force __sum16)(sum >> 16);
 73         }
 74 no_zbb:
 75         sum += ror64(sum, 32);
 76         sum >>= 32;
 77         return csum_fold((__force __wsum)sum);
 78 }
 79 EXPORT_SYMBOL(csum_ipv6_magic);
 80 #endif /* !CONFIG_32BIT */
 81 
 82 #ifdef CONFIG_32BIT
 83 #define OFFSET_MASK 3
 84 #elif CONFIG_64BIT
 85 #define OFFSET_MASK 7
 86 #endif
 87 
 88 static inline __no_sanitize_address unsigned long
 89 do_csum_common(const unsigned long *ptr, const unsigned long *end,
 90                unsigned long data)
 91 {
 92         unsigned int shift;
 93         unsigned long csum = 0, carry = 0;
 94 
 95         /*
 96          * Do 32-bit reads on RV32 and 64-bit reads otherwise. This should be
 97          * faster than doing 32-bit reads on architectures that support larger
 98          * reads.
 99          */
100         while (ptr < end) {
101                 csum += data;
102                 carry += csum < data;
103                 data = *(ptr++);
104         }
105 
106         /*
107          * Perform alignment (and over-read) bytes on the tail if any bytes
108          * leftover.
109          */
110         shift = ((long)ptr - (long)end) * 8;
111 #ifdef __LITTLE_ENDIAN
112         data = (data << shift) >> shift;
113 #else
114         data = (data >> shift) << shift;
115 #endif
116         csum += data;
117         carry += csum < data;
118         csum += carry;
119         csum += csum < carry;
120 
121         return csum;
122 }
123 
124 /*
125  * Algorithm accounts for buff being misaligned.
126  * If buff is not aligned, will over-read bytes but not use the bytes that it
127  * shouldn't. The same thing will occur on the tail-end of the read.
128  */
129 static inline __no_sanitize_address unsigned int
130 do_csum_with_alignment(const unsigned char *buff, int len)
131 {
132         unsigned int offset, shift;
133         unsigned long csum, data;
134         const unsigned long *ptr, *end;
135 
136         /*
137          * Align address to closest word (double word on rv64) that comes before
138          * buff. This should always be in the same page and cache line.
139          * Directly call KASAN with the alignment we will be using.
140          */
141         offset = (unsigned long)buff & OFFSET_MASK;
142         kasan_check_read(buff, len);
143         ptr = (const unsigned long *)(buff - offset);
144 
145         /*
146          * Clear the most significant bytes that were over-read if buff was not
147          * aligned.
148          */
149         shift = offset * 8;
150         data = *(ptr++);
151 #ifdef __LITTLE_ENDIAN
152         data = (data >> shift) << shift;
153 #else
154         data = (data << shift) >> shift;
155 #endif
156         end = (const unsigned long *)(buff + len);
157         csum = do_csum_common(ptr, end, data);
158 
159 #ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT
160         /*
161          * Zbb support saves 6 instructions, so not worth checking without
162          * alternatives if supported
163          */
164         if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
165             IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
166                 unsigned long fold_temp;
167 
168                 /*
169                  * Zbb is likely available when the kernel is compiled with Zbb
170                  * support, so nop when Zbb is available and jump when Zbb is
171                  * not available.
172                  */
173                 asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
174                                               RISCV_ISA_EXT_ZBB, 1)
175                                   :
176                                   :
177                                   :
178                                   : no_zbb);
179 
180 #ifdef CONFIG_32BIT
181                 asm_goto_output(".option push                   \n\
182                 .option arch,+zbb                               \n\
183                         rori    %[fold_temp], %[csum], 16       \n\
184                         andi    %[offset], %[offset], 1         \n\
185                         add     %[csum], %[fold_temp], %[csum]  \n\
186                         beq     %[offset], zero, %l[end]        \n\
187                         rev8    %[csum], %[csum]                \n\
188                 .option pop"
189                         : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
190                         : [offset] "r" (offset)
191                         :
192                         : end);
193 
194                 return (unsigned short)csum;
195 #else /* !CONFIG_32BIT */
196                 asm_goto_output(".option push                   \n\
197                 .option arch,+zbb                               \n\
198                         rori    %[fold_temp], %[csum], 32       \n\
199                         add     %[csum], %[fold_temp], %[csum]  \n\
200                         srli    %[csum], %[csum], 32            \n\
201                         roriw   %[fold_temp], %[csum], 16       \n\
202                         addw    %[csum], %[fold_temp], %[csum]  \n\
203                         andi    %[offset], %[offset], 1         \n\
204                         beq     %[offset], zero, %l[end]        \n\
205                         rev8    %[csum], %[csum]                \n\
206                 .option pop"
207                         : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
208                         : [offset] "r" (offset)
209                         :
210                         : end);
211 
212                 return (csum << 16) >> 48;
213 #endif /* !CONFIG_32BIT */
214 end:
215                 return csum >> 16;
216         }
217 no_zbb:
218 #endif /* CC_HAS_ASM_GOTO_TIED_OUTPUT */
219 #ifndef CONFIG_32BIT
220         csum += ror64(csum, 32);
221         csum >>= 32;
222 #endif
223         csum = (u32)csum + ror32((u32)csum, 16);
224         if (offset & 1)
225                 return (u16)swab32(csum);
226         return csum >> 16;
227 }
228 
229 /*
230  * Does not perform alignment, should only be used if machine has fast
231  * misaligned accesses, or when buff is known to be aligned.
232  */
233 static inline __no_sanitize_address unsigned int
234 do_csum_no_alignment(const unsigned char *buff, int len)
235 {
236         unsigned long csum, data;
237         const unsigned long *ptr, *end;
238 
239         ptr = (const unsigned long *)(buff);
240         data = *(ptr++);
241 
242         kasan_check_read(buff, len);
243 
244         end = (const unsigned long *)(buff + len);
245         csum = do_csum_common(ptr, end, data);
246 
247         /*
248          * Zbb support saves 6 instructions, so not worth checking without
249          * alternatives if supported
250          */
251         if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
252             IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
253                 unsigned long fold_temp;
254 
255                 /*
256                  * Zbb is likely available when the kernel is compiled with Zbb
257                  * support, so nop when Zbb is available and jump when Zbb is
258                  * not available.
259                  */
260                 asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
261                                               RISCV_ISA_EXT_ZBB, 1)
262                                   :
263                                   :
264                                   :
265                                   : no_zbb);
266 
267 #ifdef CONFIG_32BIT
268                 asm (".option push                              \n\
269                 .option arch,+zbb                               \n\
270                         rori    %[fold_temp], %[csum], 16       \n\
271                         add     %[csum], %[fold_temp], %[csum]  \n\
272                 .option pop"
273                         : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
274                         :
275                         : );
276 
277 #else /* !CONFIG_32BIT */
278                 asm (".option push                              \n\
279                 .option arch,+zbb                               \n\
280                         rori    %[fold_temp], %[csum], 32       \n\
281                         add     %[csum], %[fold_temp], %[csum]  \n\
282                         srli    %[csum], %[csum], 32            \n\
283                         roriw   %[fold_temp], %[csum], 16       \n\
284                         addw    %[csum], %[fold_temp], %[csum]  \n\
285                 .option pop"
286                         : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
287                         :
288                         : );
289 #endif /* !CONFIG_32BIT */
290                 return csum >> 16;
291         }
292 no_zbb:
293 #ifndef CONFIG_32BIT
294         csum += ror64(csum, 32);
295         csum >>= 32;
296 #endif
297         csum = (u32)csum + ror32((u32)csum, 16);
298         return csum >> 16;
299 }
300 
301 /*
302  * Perform a checksum on an arbitrary memory address.
303  * Will do a light-weight address alignment if buff is misaligned, unless
304  * cpu supports fast misaligned accesses.
305  */
306 unsigned int do_csum(const unsigned char *buff, int len)
307 {
308         if (unlikely(len <= 0))
309                 return 0;
310 
311         /*
312          * Significant performance gains can be seen by not doing alignment
313          * on machines with fast misaligned accesses.
314          *
315          * There is some duplicate code between the "with_alignment" and
316          * "no_alignment" implmentations, but the overlap is too awkward to be
317          * able to fit in one function without introducing multiple static
318          * branches. The largest chunk of overlap was delegated into the
319          * do_csum_common function.
320          */
321         if (has_fast_unaligned_accesses() || (((unsigned long)buff & OFFSET_MASK) == 0))
322                 return do_csum_no_alignment(buff, len);
323 
324         return do_csum_with_alignment(buff, len);
325 }
326 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php