1 /* SPDX-License-Identifier: GPL-2.0 */ 1 2 /* 3 * Hardware-accelerated CRC-32 variants for Li 4 * 5 * Use the z/Architecture Vector Extension Fac 6 * computing of CRC-32 checksums. 7 * 8 * This CRC-32 implementation algorithm proces 9 * bit first (BE). 10 * 11 * Copyright IBM Corp. 2015 12 * Author(s): Hendrik Brueckner <brueckner@lin 13 */ 14 15 #include <linux/types.h> 16 #include <asm/fpu.h> 17 #include "crc32-vx.h" 18 19 /* Vector register range containing CRC-32 con 20 #define CONST_R1R2 9 21 #define CONST_R3R4 10 22 #define CONST_R5 11 23 #define CONST_R6 12 24 #define CONST_RU_POLY 13 25 #define CONST_CRC_POLY 14 26 27 /* 28 * The CRC-32 constant block contains reductio 29 * process particular chunks of the input data 30 * 31 * For the CRC-32 variants, the constants are 32 * these definitions: 33 * 34 * R1 = x4*128+64 mod P(x) 35 * R2 = x4*128 mod P(x) 36 * R3 = x128+64 mod P(x) 37 * R4 = x128 mod P(x) 38 * R5 = x96 mod P(x) 39 * R6 = x64 mod P(x) 40 * 41 * Barret reduction constant, u, is defin 42 * 43 * where P(x) is the polynomial in the no 44 * polynomial in the reversed (bitreflect 45 * 46 * Note that the constant definitions below ar 47 * intermediate results with a single VECTOR G 48 * The rightmost doubleword can be 0 to preven 49 * can be multiplied by 1 to perform an XOR wi 50 * VECTOR EXCLUSIVE OR instruction. 51 * 52 * CRC-32 (IEEE 802.3 Ethernet, ...) polynomia 53 * 54 * P(x) = 0x04C11DB7 55 * P'(x) = 0xEDB88320 56 */ 57 58 static unsigned long constants_CRC_32_BE[] = { 59 0x08833794c, 0x0e6228b11, /* R1, 60 0x0c5b9cd4c, 0x0e8a45605, /* R3, 61 0x0f200aa66, 1UL << 32, /* R5, 62 0x0490d678d, 1, /* R6, 63 0x104d101df, 0, /* u * 64 0x104C11DB7, 0, /* P(x 65 }; 66 67 /** 68 * crc32_be_vgfm_16 - Compute CRC-32 (BE varia 69 * @crc: Initial CRC value, typically ~0. 70 * @buf: Input buffer pointer, performance mig 71 * buffer is on a doubleword boundary. 72 * @size: Size of the buffer, must be 64 bytes 73 * 74 * Register usage: 75 * V0: Initial CRC value and intermed 76 * V1..V4: Data for CRC computation. 77 * V5..V8: Next data chunks that are fetc 78 * V9..V14: CRC-32 constants. 79 */ 80 u32 crc32_be_vgfm_16(u32 crc, unsigned char co 81 { 82 /* Load CRC-32 constants */ 83 fpu_vlm(CONST_R1R2, CONST_CRC_POLY, &c 84 fpu_vzero(0); 85 86 /* Load the initial CRC value into the 87 fpu_vlvgf(0, crc, 0); 88 89 /* Load a 64-byte data chunk and XOR w 90 fpu_vlm(1, 4, buf); 91 fpu_vx(1, 0, 1); 92 buf += 64; 93 size -= 64; 94 95 while (size >= 64) { 96 /* Load the next 64-byte data 97 fpu_vlm(5, 8, buf); 98 99 /* 100 * Perform a GF(2) multiplicat 101 * the reduction constants in 102 * then folded (accumulated) w 103 * stored in V1. Repeat this 104 * in V2, V3, and V4 respectiv 105 */ 106 fpu_vgfmag(1, CONST_R1R2, 1, 5 107 fpu_vgfmag(2, CONST_R1R2, 2, 6 108 fpu_vgfmag(3, CONST_R1R2, 3, 7 109 fpu_vgfmag(4, CONST_R1R2, 4, 8 110 buf += 64; 111 size -= 64; 112 } 113 114 /* Fold V1 to V4 into a single 128-bit 115 fpu_vgfmag(1, CONST_R3R4, 1, 2); 116 fpu_vgfmag(1, CONST_R3R4, 1, 3); 117 fpu_vgfmag(1, CONST_R3R4, 1, 4); 118 119 while (size >= 16) { 120 fpu_vl(2, buf); 121 fpu_vgfmag(1, CONST_R3R4, 1, 2 122 buf += 16; 123 size -= 16; 124 } 125 126 /* 127 * The R5 constant is used to fold a 1 128 * that is XORed with the next 96-bit 129 * VGFMG instruction, multiply the rig 130 * form an intermediate 96-bit value ( 131 * XORed with the intermediate reducti 132 */ 133 fpu_vgfmg(1, CONST_R5, 1); 134 135 /* 136 * Further reduce the remaining 96-bit 137 * single VGFMG, the rightmost doublew 138 * intermediate result is then XORed w 139 * doubleword with R6. The result is 140 * the Barret reduction. 141 */ 142 fpu_vgfmg(1, CONST_R6, 1); 143 144 /* 145 * The input values to the Barret redu 146 * in V1 (R(x)), degree-32 generator p 147 * constant u. The Barret reduction r 148 * P(x). 149 * 150 * The Barret reduction algorithm is d 151 * 152 * 1. T1(x) = floor( R(x) / x^32 ) 153 * 2. T2(x) = floor( T1(x) / x^32 ) 154 * 3. C(x) = R(x) XOR T2(x) mod x^ 155 * 156 * Note: To compensate the division by 157 * instruction to move the leftmost wo 158 * of the vector register. The rightm 159 * with zero to not contribute to the 160 */ 161 162 /* T1(x) = floor( R(x) / x^32 ) GF2MUL 163 fpu_vupllf(2, 1); 164 fpu_vgfmg(2, CONST_RU_POLY, 2); 165 166 /* 167 * Compute the GF(2) product of the CR 168 * V2 and XOR the intermediate result, 169 * The final result is in the rightmos 170 */ 171 fpu_vupllf(2, 2); 172 fpu_vgfmag(2, CONST_CRC_POLY, 2, 1); 173 return fpu_vlgvf(2, 3); 174 } 175
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.