~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/s390/crypto/crc32le-vx.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/s390/crypto/crc32le-vx.c (Version linux-6.11.5) and /arch/i386/crypto/crc32le-vx.c (Version linux-5.9.16)


  1 /* SPDX-License-Identifier: GPL-2.0 */              1 
  2 /*                                                
  3  * Hardware-accelerated CRC-32 variants for Li    
  4  *                                                
  5  * Use the z/Architecture Vector Extension Fac    
  6  * computing of bitreflected CRC-32 checksums     
  7  * and Castagnoli.                                
  8  *                                                
  9  * This CRC-32 implementation algorithm is bit    
 10  * the least-significant bit first (Little-End    
 11  *                                                
 12  * Copyright IBM Corp. 2015                       
 13  * Author(s): Hendrik Brueckner <brueckner@lin    
 14  */                                               
 15                                                   
 16 #include <linux/types.h>                          
 17 #include <asm/fpu.h>                              
 18 #include "crc32-vx.h"                             
 19                                                   
 20 /* Vector register range containing CRC-32 con    
 21 #define CONST_PERM_LE2BE        9                 
 22 #define CONST_R2R1              10                
 23 #define CONST_R4R3              11                
 24 #define CONST_R5                12                
 25 #define CONST_RU_POLY           13                
 26 #define CONST_CRC_POLY          14                
 27                                                   
 28 /*                                                
 29  * The CRC-32 constant block contains reductio    
 30  * process particular chunks of the input data    
 31  *                                                
 32  * For the CRC-32 variants, the constants are     
 33  * these definitions:                             
 34  *                                                
 35  *      R1 = [(x4*128+32 mod P'(x) << 32)]' <<    
 36  *      R2 = [(x4*128-32 mod P'(x) << 32)]' <<    
 37  *      R3 = [(x128+32 mod P'(x) << 32)]'   <<    
 38  *      R4 = [(x128-32 mod P'(x) << 32)]'   <<    
 39  *      R5 = [(x64 mod P'(x) << 32)]'       <<    
 40  *      R6 = [(x32 mod P'(x) << 32)]'       <<    
 41  *                                                
 42  *      The bitreflected Barret reduction cons    
 43  *      the bit reversal of floor(x**64 / P(x)    
 44  *                                                
 45  *      where P(x) is the polynomial in the no    
 46  *      polynomial in the reversed (bitreflect    
 47  *                                                
 48  * CRC-32 (IEEE 802.3 Ethernet, ...) polynomia    
 49  *                                                
 50  *      P(x)  = 0x04C11DB7                        
 51  *      P'(x) = 0xEDB88320                        
 52  *                                                
 53  * CRC-32C (Castagnoli) polynomials:              
 54  *                                                
 55  *      P(x)  = 0x1EDC6F41                        
 56  *      P'(x) = 0x82F63B78                        
 57  */                                               
 58                                                   
 59 static unsigned long constants_CRC_32_LE[] = {    
 60         0x0f0e0d0c0b0a0908, 0x0706050403020100    
 61         0x1c6e41596, 0x154442bd4,                 
 62         0x0ccaa009e, 0x1751997d0,                 
 63         0x0, 0x163cd6124,                         
 64         0x0, 0x1f7011641,                         
 65         0x0, 0x1db710641                          
 66 };                                                
 67                                                   
 68 static unsigned long constants_CRC_32C_LE[] =     
 69         0x0f0e0d0c0b0a0908, 0x0706050403020100    
 70         0x09e4addf8, 0x740eef02,                  
 71         0x14cd00bd6, 0xf20c0dfe,                  
 72         0x0, 0x0dd45aab8,                         
 73         0x0, 0x0dea713f1,                         
 74         0x0, 0x105ec76f0                          
 75 };                                                
 76                                                   
 77 /**                                               
 78  * crc32_le_vgfm_generic - Compute CRC-32 (LE     
 79  * @crc: Initial CRC value, typically ~0.         
 80  * @buf: Input buffer pointer, performance mig    
 81  *       buffer is on a doubleword boundary.      
 82  * @size: Size of the buffer, must be 64 bytes    
 83  * @constants: CRC-32 constant pool base point    
 84  *                                                
 85  * Register usage:                                
 86  *      V0:       Initial CRC value and interm    
 87  *      V1..V4:   Data for CRC computation.       
 88  *      V5..V8:   Next data chunks that are fe    
 89  *      V9:       Constant for BE->LE conversi    
 90  *      V10..V14: CRC-32 constants.               
 91  */                                               
 92 static u32 crc32_le_vgfm_generic(u32 crc, unsi    
 93 {                                                 
 94         /* Load CRC-32 constants */               
 95         fpu_vlm(CONST_PERM_LE2BE, CONST_CRC_PO    
 96                                                   
 97         /*                                        
 98          * Load the initial CRC value.            
 99          *                                        
100          * The CRC value is loaded into the ri    
101          * vector register and is later XORed     
102          * of the loaded input data.              
103          */                                       
104         fpu_vzero(0);                   /* Cle    
105         fpu_vlvgf(0, crc, 3);           /* Loa    
106                                                   
107         /* Load a 64-byte data chunk and XOR w    
108         fpu_vlm(1, 4, buf);                       
109         fpu_vperm(1, 1, 1, CONST_PERM_LE2BE);     
110         fpu_vperm(2, 2, 2, CONST_PERM_LE2BE);     
111         fpu_vperm(3, 3, 3, CONST_PERM_LE2BE);     
112         fpu_vperm(4, 4, 4, CONST_PERM_LE2BE);     
113                                                   
114         fpu_vx(1, 0, 1);                /* V1     
115         buf += 64;                                
116         size -= 64;                               
117                                                   
118         while (size >= 64) {                      
119                 fpu_vlm(5, 8, buf);               
120                 fpu_vperm(5, 5, 5, CONST_PERM_    
121                 fpu_vperm(6, 6, 6, CONST_PERM_    
122                 fpu_vperm(7, 7, 7, CONST_PERM_    
123                 fpu_vperm(8, 8, 8, CONST_PERM_    
124                 /*                                
125                  * Perform a GF(2) multiplicat    
126                  * the R1 and R2 reduction con    
127                  * result is then folded (accu    
128                  * in V5 and stored in V1. Rep    
129                  * contents in V2, V3, and V4     
130                  */                               
131                 fpu_vgfmag(1, CONST_R2R1, 1, 5    
132                 fpu_vgfmag(2, CONST_R2R1, 2, 6    
133                 fpu_vgfmag(3, CONST_R2R1, 3, 7    
134                 fpu_vgfmag(4, CONST_R2R1, 4, 8    
135                 buf += 64;                        
136                 size -= 64;                       
137         }                                         
138                                                   
139         /*                                        
140          * Fold V1 to V4 into a single 128-bit    
141          * and R4 and accumulating the next 12    
142          * value remains.                         
143          */                                       
144         fpu_vgfmag(1, CONST_R4R3, 1, 2);          
145         fpu_vgfmag(1, CONST_R4R3, 1, 3);          
146         fpu_vgfmag(1, CONST_R4R3, 1, 4);          
147                                                   
148         while (size >= 16) {                      
149                 fpu_vl(2, buf);                   
150                 fpu_vperm(2, 2, 2, CONST_PERM_    
151                 fpu_vgfmag(1, CONST_R4R3, 1, 2    
152                 buf += 16;                        
153                 size -= 16;                       
154         }                                         
155                                                   
156         /*                                        
157          * Set up a vector register for byte s    
158          * be loaded in bits 1-4 in byte eleme    
159          * Shift by 8 bytes: 0x40                 
160          * Shift by 4 bytes: 0x20                 
161          */                                       
162         fpu_vleib(9, 0x40, 7);                    
163                                                   
164         /*                                        
165          * Prepare V0 for the next GF(2) multi    
166          * to move R4 into the rightmost doubl    
167          * doubleword to 0x1.                     
168          */                                       
169         fpu_vsrlb(0, CONST_R4R3, 9);              
170         fpu_vleig(0, 1, 0);                       
171                                                   
172         /*                                        
173          * Compute GF(2) product of V1 and V0.    
174          * of V1 is multiplied with R4.  The l    
175          * multiplied by 0x1 and is then XORed    
176          * Implicitly, the intermediate leftmo    
177          */                                       
178         fpu_vgfmg(1, 0, 1);                       
179                                                   
180         /*                                        
181          * Now do the final 32-bit fold by mul    
182          * in V1 with R5 and XOR the result wi    
183          *                                        
184          * To achieve this by a single VGFMAG,    
185          * and store the result in V2 which is    
186          * vector unpack instruction to load t    
187          * doubleword into the rightmost doubl    
188          * half is loaded in the leftmost doub    
189          * The vector register with CONST_R5 c    
190          * rightmost doubleword and the leftmo    
191          * the leftmost product of V1.            
192          */                                       
193         fpu_vleib(9, 0x20, 7);            /* S    
194         fpu_vsrlb(2, 1, 9);               /* S    
195         fpu_vupllf(1, 1);                 /* S    
196         fpu_vgfmag(1, CONST_R5, 1, 2);    /* V    
197                                                   
198         /*                                        
199          * Apply a Barret reduction to compute    
200          *                                        
201          * The input values to the Barret redu    
202          * in V1 (R(x)), degree-32 generator p    
203          * constant u.  The Barret reduction r    
204          * P(x).                                  
205          *                                        
206          * The Barret reduction algorithm is d    
207          *                                        
208          *    1. T1(x) = floor( R(x) / x^32 )     
209          *    2. T2(x) = floor( T1(x) / x^32 )    
210          *    3. C(x)  = R(x) XOR T2(x) mod x^    
211          *                                        
212          *  Note: The leftmost doubleword of v    
213          *  CONST_RU_POLY is zero and, thus, t    
214          *  is zero and does not contribute to    
215          */                                       
216                                                   
217         /* T1(x) = floor( R(x) / x^32 ) GF2MUL    
218         fpu_vupllf(2, 1);                         
219         fpu_vgfmg(2, CONST_RU_POLY, 2);           
220                                                   
221         /*                                        
222          * Compute the GF(2) product of the CR    
223          * V2 and XOR the intermediate result,    
224          * The final result is stored in word     
225          */                                       
226         fpu_vupllf(2, 2);                         
227         fpu_vgfmag(2, CONST_CRC_POLY, 2, 1);      
228                                                   
229         return fpu_vlgvf(2, 2);                   
230 }                                                 
231                                                   
232 u32 crc32_le_vgfm_16(u32 crc, unsigned char co    
233 {                                                 
234         return crc32_le_vgfm_generic(crc, buf,    
235 }                                                 
236                                                   
237 u32 crc32c_le_vgfm_16(u32 crc, unsigned char c    
238 {                                                 
239         return crc32_le_vgfm_generic(crc, buf,    
240 }                                                 
241                                                   

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php