~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/s390/crypto/crc32be-vx.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0 */
  2 /*
  3  * Hardware-accelerated CRC-32 variants for Linux on z Systems
  4  *
  5  * Use the z/Architecture Vector Extension Facility to accelerate the
  6  * computing of CRC-32 checksums.
  7  *
  8  * This CRC-32 implementation algorithm processes the most-significant
  9  * bit first (BE).
 10  *
 11  * Copyright IBM Corp. 2015
 12  * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
 13  */
 14 
 15 #include <linux/types.h>
 16 #include <asm/fpu.h>
 17 #include "crc32-vx.h"
 18 
 19 /* Vector register range containing CRC-32 constants */
 20 #define CONST_R1R2              9
 21 #define CONST_R3R4              10
 22 #define CONST_R5                11
 23 #define CONST_R6                12
 24 #define CONST_RU_POLY           13
 25 #define CONST_CRC_POLY          14
 26 
 27 /*
 28  * The CRC-32 constant block contains reduction constants to fold and
 29  * process particular chunks of the input data stream in parallel.
 30  *
 31  * For the CRC-32 variants, the constants are precomputed according to
 32  * these definitions:
 33  *
 34  *      R1 = x4*128+64 mod P(x)
 35  *      R2 = x4*128    mod P(x)
 36  *      R3 = x128+64   mod P(x)
 37  *      R4 = x128      mod P(x)
 38  *      R5 = x96       mod P(x)
 39  *      R6 = x64       mod P(x)
 40  *
 41  *      Barret reduction constant, u, is defined as floor(x**64 / P(x)).
 42  *
 43  *      where P(x) is the polynomial in the normal domain and the P'(x) is the
 44  *      polynomial in the reversed (bitreflected) domain.
 45  *
 46  * Note that the constant definitions below are extended in order to compute
 47  * intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction.
 48  * The rightmost doubleword can be 0 to prevent contribution to the result or
 49  * can be multiplied by 1 to perform an XOR without the need for a separate
 50  * VECTOR EXCLUSIVE OR instruction.
 51  *
 52  * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
 53  *
 54  *      P(x)  = 0x04C11DB7
 55  *      P'(x) = 0xEDB88320
 56  */
 57 
 58 static unsigned long constants_CRC_32_BE[] = {
 59         0x08833794c, 0x0e6228b11,       /* R1, R2 */
 60         0x0c5b9cd4c, 0x0e8a45605,       /* R3, R4 */
 61         0x0f200aa66, 1UL << 32,         /* R5, x32 */
 62         0x0490d678d, 1,                 /* R6, 1 */
 63         0x104d101df, 0,                 /* u */
 64         0x104C11DB7, 0,                 /* P(x) */
 65 };
 66 
 67 /**
 68  * crc32_be_vgfm_16 - Compute CRC-32 (BE variant) with vector registers
 69  * @crc: Initial CRC value, typically ~0.
 70  * @buf: Input buffer pointer, performance might be improved if the
 71  *        buffer is on a doubleword boundary.
 72  * @size: Size of the buffer, must be 64 bytes or greater.
 73  *
 74  * Register usage:
 75  *      V0:     Initial CRC value and intermediate constants and results.
 76  *      V1..V4: Data for CRC computation.
 77  *      V5..V8: Next data chunks that are fetched from the input buffer.
 78  *      V9..V14: CRC-32 constants.
 79  */
 80 u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
 81 {
 82         /* Load CRC-32 constants */
 83         fpu_vlm(CONST_R1R2, CONST_CRC_POLY, &constants_CRC_32_BE);
 84         fpu_vzero(0);
 85 
 86         /* Load the initial CRC value into the leftmost word of V0. */
 87         fpu_vlvgf(0, crc, 0);
 88 
 89         /* Load a 64-byte data chunk and XOR with CRC */
 90         fpu_vlm(1, 4, buf);
 91         fpu_vx(1, 0, 1);
 92         buf += 64;
 93         size -= 64;
 94 
 95         while (size >= 64) {
 96                 /* Load the next 64-byte data chunk into V5 to V8 */
 97                 fpu_vlm(5, 8, buf);
 98 
 99                 /*
100                  * Perform a GF(2) multiplication of the doublewords in V1 with
101                  * the reduction constants in V0.  The intermediate result is
102                  * then folded (accumulated) with the next data chunk in V5 and
103                  * stored in V1.  Repeat this step for the register contents
104                  * in V2, V3, and V4 respectively.
105                  */
106                 fpu_vgfmag(1, CONST_R1R2, 1, 5);
107                 fpu_vgfmag(2, CONST_R1R2, 2, 6);
108                 fpu_vgfmag(3, CONST_R1R2, 3, 7);
109                 fpu_vgfmag(4, CONST_R1R2, 4, 8);
110                 buf += 64;
111                 size -= 64;
112         }
113 
114         /* Fold V1 to V4 into a single 128-bit value in V1 */
115         fpu_vgfmag(1, CONST_R3R4, 1, 2);
116         fpu_vgfmag(1, CONST_R3R4, 1, 3);
117         fpu_vgfmag(1, CONST_R3R4, 1, 4);
118 
119         while (size >= 16) {
120                 fpu_vl(2, buf);
121                 fpu_vgfmag(1, CONST_R3R4, 1, 2);
122                 buf += 16;
123                 size -= 16;
124         }
125 
126         /*
127          * The R5 constant is used to fold a 128-bit value into an 96-bit value
128          * that is XORed with the next 96-bit input data chunk.  To use a single
129          * VGFMG instruction, multiply the rightmost 64-bit with x^32 (1<<32) to
130          * form an intermediate 96-bit value (with appended zeros) which is then
131          * XORed with the intermediate reduction result.
132          */
133         fpu_vgfmg(1, CONST_R5, 1);
134 
135         /*
136          * Further reduce the remaining 96-bit value to a 64-bit value using a
137          * single VGFMG, the rightmost doubleword is multiplied with 0x1. The
138          * intermediate result is then XORed with the product of the leftmost
139          * doubleword with R6.  The result is a 64-bit value and is subject to
140          * the Barret reduction.
141          */
142         fpu_vgfmg(1, CONST_R6, 1);
143 
144         /*
145          * The input values to the Barret reduction are the degree-63 polynomial
146          * in V1 (R(x)), degree-32 generator polynomial, and the reduction
147          * constant u.  The Barret reduction result is the CRC value of R(x) mod
148          * P(x).
149          *
150          * The Barret reduction algorithm is defined as:
151          *
152          *    1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
153          *    2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
154          *    3. C(x)  = R(x) XOR T2(x) mod x^32
155          *
156          * Note: To compensate the division by x^32, use the vector unpack
157          * instruction to move the leftmost word into the leftmost doubleword
158          * of the vector register.  The rightmost doubleword is multiplied
159          * with zero to not contribute to the intermediate results.
160          */
161 
162         /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
163         fpu_vupllf(2, 1);
164         fpu_vgfmg(2, CONST_RU_POLY, 2);
165 
166         /*
167          * Compute the GF(2) product of the CRC polynomial in VO with T1(x) in
168          * V2 and XOR the intermediate result, T2(x),  with the value in V1.
169          * The final result is in the rightmost word of V2.
170          */
171         fpu_vupllf(2, 2);
172         fpu_vgfmag(2, CONST_CRC_POLY, 2, 1);
173         return fpu_vlgvf(2, 3);
174 }
175 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php