~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/riscv/lib/crc32.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * Accelerated CRC32 implementation with Zbc extension.
  4  *
  5  * Copyright (C) 2024 Intel Corporation
  6  */
  7 
  8 #include <asm/hwcap.h>
  9 #include <asm/alternative-macros.h>
 10 #include <asm/byteorder.h>
 11 
 12 #include <linux/types.h>
 13 #include <linux/minmax.h>
 14 #include <linux/crc32poly.h>
 15 #include <linux/crc32.h>
 16 #include <linux/byteorder/generic.h>
 17 
 18 /*
 19  * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for
 20  * better understanding of how this math works.
 21  *
 22  * let "+" denotes polynomial add (XOR)
 23  * let "-" denotes polynomial sub (XOR)
 24  * let "*" denotes polynomial multiplication
 25  * let "/" denotes polynomial floor division
 26  * let "S" denotes source data, XLEN bit wide
 27  * let "P" denotes CRC32 polynomial
 28  * let "T" denotes 2^(XLEN+32)
 29  * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit
 30  *
 31  * crc32(S, P)
 32  * => S * (2^32) - S * (2^32) / P * P
 33  * => lowest 32 bits of: S * (2^32) / P * P
 34  * => lowest 32 bits of: S * (2^32) * (T / P) / T * P
 35  * => lowest 32 bits of: S * (2^32) * quotient / T * P
 36  * => lowest 32 bits of: S * quotient / 2^XLEN * P
 37  * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P
 38  * => clmul_low_part(clmul_high_part(S, QT) + S, P)
 39  *
 40  * In terms of below implementations, the BE case is more intuitive, since the
 41  * higher order bit sits at more significant position.
 42  */
 43 
 44 #if __riscv_xlen == 64
 45 /* Slide by XLEN bits per iteration */
 46 # define STEP_ORDER 3
 47 
 48 /* Each below polynomial quotient has an implicit bit for 2^XLEN */
 49 
 50 /* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */
 51 # define CRC32_POLY_QT_LE       0x5a72d812fb808b20
 52 
 53 /* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */
 54 # define CRC32C_POLY_QT_LE      0xa434f61c6f5389f8
 55 
 56 /* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be
 57  * the same as the bit-reversed version of CRC32_POLY_QT_LE
 58  */
 59 # define CRC32_POLY_QT_BE       0x04d101df481b4e5a
 60 
 61 static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr)
 62 {
 63         return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr);
 64 }
 65 
 66 static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
 67 {
 68         u32 crc;
 69 
 70         /* We don't have a "clmulrh" insn, so use clmul + slli instead. */
 71         asm volatile (".option push\n"
 72                       ".option arch,+zbc\n"
 73                       "clmul    %0, %1, %2\n"
 74                       "slli     %0, %0, 1\n"
 75                       "xor      %0, %0, %1\n"
 76                       "clmulr   %0, %0, %3\n"
 77                       "srli     %0, %0, 32\n"
 78                       ".option pop\n"
 79                       : "=&r" (crc)
 80                       : "r" (s),
 81                         "r" (poly_qt),
 82                         "r" ((u64)poly << 32)
 83                       :);
 84         return crc;
 85 }
 86 
 87 static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr)
 88 {
 89         return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr);
 90 }
 91 
 92 #elif __riscv_xlen == 32
 93 # define STEP_ORDER 2
 94 /* Each quotient should match the upper half of its analog in RV64 */
 95 # define CRC32_POLY_QT_LE       0xfb808b20
 96 # define CRC32C_POLY_QT_LE      0x6f5389f8
 97 # define CRC32_POLY_QT_BE       0x04d101df
 98 
 99 static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr)
100 {
101         return crc ^ (__force u32)__cpu_to_le32(*ptr);
102 }
103 
104 static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
105 {
106         u32 crc;
107 
108         /* We don't have a "clmulrh" insn, so use clmul + slli instead. */
109         asm volatile (".option push\n"
110                       ".option arch,+zbc\n"
111                       "clmul    %0, %1, %2\n"
112                       "slli     %0, %0, 1\n"
113                       "xor      %0, %0, %1\n"
114                       "clmulr   %0, %0, %3\n"
115                       ".option pop\n"
116                       : "=&r" (crc)
117                       : "r" (s),
118                         "r" (poly_qt),
119                         "r" (poly)
120                       :);
121         return crc;
122 }
123 
124 static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr)
125 {
126         return crc ^ (__force u32)__cpu_to_be32(*ptr);
127 }
128 
129 #else
130 # error "Unexpected __riscv_xlen"
131 #endif
132 
133 static inline u32 crc32_be_zbc(unsigned long s)
134 {
135         u32 crc;
136 
137         asm volatile (".option push\n"
138                       ".option arch,+zbc\n"
139                       "clmulh   %0, %1, %2\n"
140                       "xor      %0, %0, %1\n"
141                       "clmul    %0, %0, %3\n"
142                       ".option pop\n"
143                       : "=&r" (crc)
144                       : "r" (s),
145                         "r" (CRC32_POLY_QT_BE),
146                         "r" (CRC32_POLY_BE)
147                       :);
148         return crc;
149 }
150 
151 #define STEP            (1 << STEP_ORDER)
152 #define OFFSET_MASK     (STEP - 1)
153 
154 typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len);
155 
156 static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p,
157                                      size_t len, u32 poly,
158                                      unsigned long poly_qt)
159 {
160         size_t bits = len * 8;
161         unsigned long s = 0;
162         u32 crc_low = 0;
163 
164         for (int i = 0; i < len; i++)
165                 s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8);
166 
167         s ^= (unsigned long)crc << (__riscv_xlen - bits);
168         if (__riscv_xlen == 32 || len < sizeof(u32))
169                 crc_low = crc >> bits;
170 
171         crc = crc32_le_zbc(s, poly, poly_qt);
172         crc ^= crc_low;
173 
174         return crc;
175 }
176 
177 static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
178                                           size_t len, u32 poly,
179                                           unsigned long poly_qt,
180                                           fallback crc_fb)
181 {
182         size_t offset, head_len, tail_len;
183         unsigned long const *p_ul;
184         unsigned long s;
185 
186         asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
187                              RISCV_ISA_EXT_ZBC, 1)
188                  : : : : legacy);
189 
190         /* Handle the unaligned head. */
191         offset = (unsigned long)p & OFFSET_MASK;
192         if (offset && len) {
193                 head_len = min(STEP - offset, len);
194                 crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt);
195                 p += head_len;
196                 len -= head_len;
197         }
198 
199         tail_len = len & OFFSET_MASK;
200         len = len >> STEP_ORDER;
201         p_ul = (unsigned long const *)p;
202 
203         for (int i = 0; i < len; i++) {
204                 s = crc32_le_prep(crc, p_ul);
205                 crc = crc32_le_zbc(s, poly, poly_qt);
206                 p_ul++;
207         }
208 
209         /* Handle the tail bytes. */
210         p = (unsigned char const *)p_ul;
211         if (tail_len)
212                 crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt);
213 
214         return crc;
215 
216 legacy:
217         return crc_fb(crc, p, len);
218 }
219 
220 u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
221 {
222         return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE,
223                                 crc32_le_base);
224 }
225 
226 u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
227 {
228         return crc32_le_generic(crc, p, len, CRC32C_POLY_LE,
229                                 CRC32C_POLY_QT_LE, __crc32c_le_base);
230 }
231 
232 static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
233                                      size_t len)
234 {
235         size_t bits = len * 8;
236         unsigned long s = 0;
237         u32 crc_low = 0;
238 
239         s = 0;
240         for (int i = 0; i < len; i++)
241                 s = *p++ | (s << 8);
242 
243         if (__riscv_xlen == 32 || len < sizeof(u32)) {
244                 s ^= crc >> (32 - bits);
245                 crc_low = crc << bits;
246         } else {
247                 s ^= (unsigned long)crc << (bits - 32);
248         }
249 
250         crc = crc32_be_zbc(s);
251         crc ^= crc_low;
252 
253         return crc;
254 }
255 
256 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
257 {
258         size_t offset, head_len, tail_len;
259         unsigned long const *p_ul;
260         unsigned long s;
261 
262         asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
263                              RISCV_ISA_EXT_ZBC, 1)
264                  : : : : legacy);
265 
266         /* Handle the unaligned head. */
267         offset = (unsigned long)p & OFFSET_MASK;
268         if (offset && len) {
269                 head_len = min(STEP - offset, len);
270                 crc = crc32_be_unaligned(crc, p, head_len);
271                 p += head_len;
272                 len -= head_len;
273         }
274 
275         tail_len = len & OFFSET_MASK;
276         len = len >> STEP_ORDER;
277         p_ul = (unsigned long const *)p;
278 
279         for (int i = 0; i < len; i++) {
280                 s = crc32_be_prep(crc, p_ul);
281                 crc = crc32_be_zbc(s);
282                 p_ul++;
283         }
284 
285         /* Handle the tail bytes. */
286         p = (unsigned char const *)p_ul;
287         if (tail_len)
288                 crc = crc32_be_unaligned(crc, p, tail_len);
289 
290         return crc;
291 
292 legacy:
293         return crc32_be_base(crc, p, len);
294 }
295 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php