~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/crypto/sm3-avx-asm_64.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/x86/crypto/sm3-avx-asm_64.S (Architecture ppc) and /arch/alpha/crypto/sm3-avx-asm_64.S (Architecture alpha)


  1 /* SPDX-License-Identifier: GPL-2.0-or-later *    
  2 /*                                                
  3  * SM3 AVX accelerated transform.                 
  4  * specified in: https://datatracker.ietf.org/    
  5  *                                                
  6  * Copyright (C) 2021 Jussi Kivilinna <jussi.ki    
  7  * Copyright (C) 2021 Tianjia Zhang <tianjia.zh    
  8  */                                               
  9                                                   
 10 /* Based on SM3 AES/BMI2 accelerated work by l    
 11  *  https://gnupg.org/software/libgcrypt/index    
 12  */                                               
 13                                                   
 14 #include <linux/linkage.h>                        
 15 #include <linux/cfi_types.h>                      
 16 #include <asm/frame.h>                            
 17                                                   
 18 /* Context structure */                           
 19                                                   
 20 #define state_h0 0                                
 21 #define state_h1 4                                
 22 #define state_h2 8                                
 23 #define state_h3 12                               
 24 #define state_h4 16                               
 25 #define state_h5 20                               
 26 #define state_h6 24                               
 27 #define state_h7 28                               
 28                                                   
 29 /* Constants */                                   
 30                                                   
 31 /* Round constant macros */                       
 32                                                   
 33 #define K0   2043430169  /* 0x79cc4519 */         
 34 #define K1   -208106958  /* 0xf3988a32 */         
 35 #define K2   -416213915  /* 0xe7311465 */         
 36 #define K3   -832427829  /* 0xce6228cb */         
 37 #define K4  -1664855657  /* 0x9cc45197 */         
 38 #define K5    965255983  /* 0x3988a32f */         
 39 #define K6   1930511966  /* 0x7311465e */         
 40 #define K7   -433943364  /* 0xe6228cbc */         
 41 #define K8   -867886727  /* 0xcc451979 */         
 42 #define K9  -1735773453  /* 0x988a32f3 */         
 43 #define K10   823420391  /* 0x311465e7 */         
 44 #define K11  1646840782  /* 0x6228cbce */         
 45 #define K12 -1001285732  /* 0xc451979c */         
 46 #define K13 -2002571463  /* 0x88a32f39 */         
 47 #define K14   289824371  /* 0x11465e73 */         
 48 #define K15   579648742  /* 0x228cbce6 */         
 49 #define K16 -1651869049  /* 0x9d8a7a87 */         
 50 #define K17   991229199  /* 0x3b14f50f */         
 51 #define K18  1982458398  /* 0x7629ea1e */         
 52 #define K19  -330050500  /* 0xec53d43c */         
 53 #define K20  -660100999  /* 0xd8a7a879 */         
 54 #define K21 -1320201997  /* 0xb14f50f3 */         
 55 #define K22  1654563303  /* 0x629ea1e7 */         
 56 #define K23  -985840690  /* 0xc53d43ce */         
 57 #define K24 -1971681379  /* 0x8a7a879d */         
 58 #define K25   351604539  /* 0x14f50f3b */         
 59 #define K26   703209078  /* 0x29ea1e76 */         
 60 #define K27  1406418156  /* 0x53d43cec */         
 61 #define K28 -1482130984  /* 0xa7a879d8 */         
 62 #define K29  1330705329  /* 0x4f50f3b1 */         
 63 #define K30 -1633556638  /* 0x9ea1e762 */         
 64 #define K31  1027854021  /* 0x3d43cec5 */         
 65 #define K32  2055708042  /* 0x7a879d8a */         
 66 #define K33  -183551212  /* 0xf50f3b14 */         
 67 #define K34  -367102423  /* 0xea1e7629 */         
 68 #define K35  -734204845  /* 0xd43cec53 */         
 69 #define K36 -1468409689  /* 0xa879d8a7 */         
 70 #define K37  1358147919  /* 0x50f3b14f */         
 71 #define K38 -1578671458  /* 0xa1e7629e */         
 72 #define K39  1137624381  /* 0x43cec53d */         
 73 #define K40 -2019718534  /* 0x879d8a7a */         
 74 #define K41   255530229  /* 0x0f3b14f5 */         
 75 #define K42   511060458  /* 0x1e7629ea */         
 76 #define K43  1022120916  /* 0x3cec53d4 */         
 77 #define K44  2044241832  /* 0x79d8a7a8 */         
 78 #define K45  -206483632  /* 0xf3b14f50 */         
 79 #define K46  -412967263  /* 0xe7629ea1 */         
 80 #define K47  -825934525  /* 0xcec53d43 */         
 81 #define K48 -1651869049  /* 0x9d8a7a87 */         
 82 #define K49   991229199  /* 0x3b14f50f */         
 83 #define K50  1982458398  /* 0x7629ea1e */         
 84 #define K51  -330050500  /* 0xec53d43c */         
 85 #define K52  -660100999  /* 0xd8a7a879 */         
 86 #define K53 -1320201997  /* 0xb14f50f3 */         
 87 #define K54  1654563303  /* 0x629ea1e7 */         
 88 #define K55  -985840690  /* 0xc53d43ce */         
 89 #define K56 -1971681379  /* 0x8a7a879d */         
 90 #define K57   351604539  /* 0x14f50f3b */         
 91 #define K58   703209078  /* 0x29ea1e76 */         
 92 #define K59  1406418156  /* 0x53d43cec */         
 93 #define K60 -1482130984  /* 0xa7a879d8 */         
 94 #define K61  1330705329  /* 0x4f50f3b1 */         
 95 #define K62 -1633556638  /* 0x9ea1e762 */         
 96 #define K63  1027854021  /* 0x3d43cec5 */         
 97                                                   
 98 /* Register macros */                             
 99                                                   
100 #define RSTATE %rdi                               
101 #define RDATA  %rsi                               
102 #define RNBLKS %rdx                               
103                                                   
104 #define t0 %eax                                   
105 #define t1 %ebx                                   
106 #define t2 %ecx                                   
107                                                   
108 #define a %r8d                                    
109 #define b %r9d                                    
110 #define c %r10d                                   
111 #define d %r11d                                   
112 #define e %r12d                                   
113 #define f %r13d                                   
114 #define g %r14d                                   
115 #define h %r15d                                   
116                                                   
117 #define W0 %xmm0                                  
118 #define W1 %xmm1                                  
119 #define W2 %xmm2                                  
120 #define W3 %xmm3                                  
121 #define W4 %xmm4                                  
122 #define W5 %xmm5                                  
123                                                   
124 #define XTMP0 %xmm6                               
125 #define XTMP1 %xmm7                               
126 #define XTMP2 %xmm8                               
127 #define XTMP3 %xmm9                               
128 #define XTMP4 %xmm10                              
129 #define XTMP5 %xmm11                              
130 #define XTMP6 %xmm12                              
131                                                   
132 #define BSWAP_REG %xmm15                          
133                                                   
134 /* Stack structure */                             
135                                                   
136 #define STACK_W_SIZE        (32 * 2 * 3)          
137 #define STACK_REG_SAVE_SIZE (64)                  
138                                                   
139 #define STACK_W             (0)                   
140 #define STACK_REG_SAVE      (STACK_W + STACK_W    
141 #define STACK_SIZE          (STACK_REG_SAVE +     
142                                                   
143 /* Instruction helpers. */                        
144                                                   
145 #define roll2(v, reg)           \                 
146         roll $(v), reg;                           
147                                                   
148 #define roll3mov(v, src, dst)   \                 
149         movl src, dst;          \                 
150         roll $(v), dst;                           
151                                                   
152 #define roll3(v, src, dst)      \                 
153         rorxl $(32-(v)), src, dst;                
154                                                   
155 #define addl2(a, out)           \                 
156         leal (a, out), out;                       
157                                                   
158 /* Round function macros. */                      
159                                                   
160 #define GG1(x, y, z, o, t)      \                 
161         movl x, o;              \                 
162         xorl y, o;              \                 
163         xorl z, o;                                
164                                                   
165 #define FF1(x, y, z, o, t) GG1(x, y, z, o, t)     
166                                                   
167 #define GG2(x, y, z, o, t)      \                 
168         andnl z, x, o;          \                 
169         movl y, t;              \                 
170         andl x, t;              \                 
171         addl2(t, o);                              
172                                                   
173 #define FF2(x, y, z, o, t)      \                 
174         movl y, o;              \                 
175         xorl x, o;              \                 
176         movl y, t;              \                 
177         andl x, t;              \                 
178         andl z, o;              \                 
179         xorl t, o;                                
180                                                   
181 #define R(i, a, b, c, d, e, f, g, h, round, wi    
182         /* rol(a, 12) => t0 */                    
183         roll3mov(12, a, t0); /* rorxl here wou    
184         /* rol (t0 + e + t), 7) => t1 */          
185         leal K##round(t0, e, 1), t1;              
186         roll2(7, t1);                             
187         /* h + w1 => h */                         
188         addl wtype##_W1_ADDR(round, widx), h;     
189         /* h + t1 => h */                         
190         addl2(t1, h);                             
191         /* t1 ^ t0 => t0 */                       
192         xorl t1, t0;                              
193         /* w1w2 + d => d */                       
194         addl wtype##_W1W2_ADDR(round, widx), d    
195         /* FF##i(a,b,c) => t1 */                  
196         FF##i(a, b, c, t1, t2);                   
197         /* d + t1 => d */                         
198         addl2(t1, d);                             
199         /* GG#i(e,f,g) => t2 */                   
200         GG##i(e, f, g, t2, t1);                   
201         /* h + t2 => h */                         
202         addl2(t2, h);                             
203         /* rol (f, 19) => f */                    
204         roll2(19, f);                             
205         /* d + t0 => d */                         
206         addl2(t0, d);                             
207         /* rol (b, 9) => b */                     
208         roll2(9, b);                              
209         /* P0(h) => h */                          
210         roll3(9, h, t2);                          
211         roll3(17, h, t1);                         
212         xorl t2, h;                               
213         xorl t1, h;                               
214                                                   
215 #define R1(a, b, c, d, e, f, g, h, round, widx    
216         R(1, a, b, c, d, e, f, g, h, round, wi    
217                                                   
218 #define R2(a, b, c, d, e, f, g, h, round, widx    
219         R(2, a, b, c, d, e, f, g, h, round, wi    
220                                                   
221 /* Input expansion macros. */                     
222                                                   
223 /* Byte-swapped input address. */                 
224 #define IW_W_ADDR(round, widx, offs) \            
225         (STACK_W + ((round) / 4) * 64 + (offs)    
226                                                   
227 /* Expanded input address. */                     
228 #define XW_W_ADDR(round, widx, offs) \            
229         (STACK_W + ((((round) / 3) - 4) % 2) *    
230                                                   
231 /* Rounds 1-12, byte-swapped input block addre    
232 #define IW_W1_ADDR(round, widx)   IW_W_ADDR(ro    
233 #define IW_W1W2_ADDR(round, widx) IW_W_ADDR(ro    
234                                                   
235 /* Rounds 1-12, expanded input block addresses    
236 #define XW_W1_ADDR(round, widx)   XW_W_ADDR(ro    
237 #define XW_W1W2_ADDR(round, widx) XW_W_ADDR(ro    
238                                                   
239 /* Input block loading. */                        
240 #define LOAD_W_XMM_1()                            
241         vmovdqu 0*16(RDATA), XTMP0; /* XTMP0:     
242         vmovdqu 1*16(RDATA), XTMP1; /* XTMP1:     
243         vmovdqu 2*16(RDATA), XTMP2; /* XTMP2:     
244         vmovdqu 3*16(RDATA), XTMP3; /* XTMP3:     
245         vpshufb BSWAP_REG, XTMP0, XTMP0;          
246         vpshufb BSWAP_REG, XTMP1, XTMP1;          
247         vpshufb BSWAP_REG, XTMP2, XTMP2;          
248         vpshufb BSWAP_REG, XTMP3, XTMP3;          
249         vpxor XTMP0, XTMP1, XTMP4;                
250         vpxor XTMP1, XTMP2, XTMP5;                
251         vpxor XTMP2, XTMP3, XTMP6;                
252         leaq 64(RDATA), RDATA;                    
253         vmovdqa XTMP0, IW_W1_ADDR(0, 0);          
254         vmovdqa XTMP4, IW_W1W2_ADDR(0, 0);        
255         vmovdqa XTMP1, IW_W1_ADDR(4, 0);          
256         vmovdqa XTMP5, IW_W1W2_ADDR(4, 0);        
257                                                   
258 #define LOAD_W_XMM_2()                            
259         vmovdqa XTMP2, IW_W1_ADDR(8, 0);          
260         vmovdqa XTMP6, IW_W1W2_ADDR(8, 0);        
261                                                   
262 #define LOAD_W_XMM_3()                            
263         vpshufd $0b00000000, XTMP0, W0; /* W0:    
264         vpshufd $0b11111001, XTMP0, W1; /* W1:    
265         vmovdqa XTMP1, W2;              /* W2:    
266         vpalignr $12, XTMP1, XTMP2, W3; /* W3:    
267         vpalignr $8, XTMP2, XTMP3, W4;  /* W4:    
268         vpshufd $0b11111001, XTMP3, W5; /* W5:    
269                                                   
270 /* Message scheduling. Note: 3 words per XMM r    
271 #define SCHED_W_0(round, w0, w1, w2, w3, w4, w    
272         /* Load (w[i - 16]) => XTMP0 */           
273         vpshufd $0b10111111, w0, XTMP0;           
274         vpalignr $12, XTMP0, w1, XTMP0; /* XTM    
275         /* Load (w[i - 13]) => XTMP1 */           
276         vpshufd $0b10111111, w1, XTMP1;           
277         vpalignr $12, XTMP1, w2, XTMP1;           
278         /* w[i - 9] == w3 */                      
279         /* XMM3 ^ XTMP0 => XTMP0 */               
280         vpxor w3, XTMP0, XTMP0;                   
281                                                   
282 #define SCHED_W_1(round, w0, w1, w2, w3, w4, w    
283         /* w[i - 3] == w5 */                      
284         /* rol(XMM5, 15) ^ XTMP0 => XTMP0 */      
285         vpslld $15, w5, XTMP2;                    
286         vpsrld $(32-15), w5, XTMP3;               
287         vpxor XTMP2, XTMP3, XTMP3;                
288         vpxor XTMP3, XTMP0, XTMP0;                
289         /* rol(XTMP1, 7) => XTMP1 */              
290         vpslld $7, XTMP1, XTMP5;                  
291         vpsrld $(32-7), XTMP1, XTMP1;             
292         vpxor XTMP5, XTMP1, XTMP1;                
293         /* XMM4 ^ XTMP1 => XTMP1 */               
294         vpxor w4, XTMP1, XTMP1;                   
295         /* w[i - 6] == XMM4 */                    
296         /* P1(XTMP0) ^ XTMP1 => XMM0 */           
297         vpslld $15, XTMP0, XTMP5;                 
298         vpsrld $(32-15), XTMP0, XTMP6;            
299         vpslld $23, XTMP0, XTMP2;                 
300         vpsrld $(32-23), XTMP0, XTMP3;            
301         vpxor XTMP0, XTMP1, XTMP1;                
302         vpxor XTMP6, XTMP5, XTMP5;                
303         vpxor XTMP3, XTMP2, XTMP2;                
304         vpxor XTMP2, XTMP5, XTMP5;                
305         vpxor XTMP5, XTMP1, w0;                   
306                                                   
307 #define SCHED_W_2(round, w0, w1, w2, w3, w4, w    
308         /* W1 in XMM12 */                         
309         vpshufd $0b10111111, w4, XTMP4;           
310         vpalignr $12, XTMP4, w5, XTMP4;           
311         vmovdqa XTMP4, XW_W1_ADDR((round), 0);    
312         /* W1 ^ W2 => XTMP1 */                    
313         vpxor w0, XTMP4, XTMP1;                   
314         vmovdqa XTMP1, XW_W1W2_ADDR((round), 0    
315                                                   
316                                                   
317 .section        .rodata.cst16, "aM", @progbits    
318 .align 16                                         
319                                                   
320 .Lbe32mask:                                       
321         .long 0x00010203, 0x04050607, 0x08090a    
322                                                   
323 .text                                             
324                                                   
325 /*                                                
326  * Transform nblocks*64 bytes (nblocks*16 32-b    
327  *                                                
328  * void sm3_transform_avx(struct sm3_state *st    
329  *                        const u8 *data, int     
330  */                                               
331 SYM_TYPED_FUNC_START(sm3_transform_avx)           
332         /* input:                                 
333          *      %rdi: ctx, CTX                    
334          *      %rsi: data (64*nblks bytes)       
335          *      %rdx: nblocks                     
336          */                                       
337         vzeroupper;                               
338                                                   
339         pushq %rbp;                               
340         movq %rsp, %rbp;                          
341                                                   
342         movq %rdx, RNBLKS;                        
343                                                   
344         subq $STACK_SIZE, %rsp;                   
345         andq $(~63), %rsp;                        
346                                                   
347         movq %rbx, (STACK_REG_SAVE + 0 * 8)(%r    
348         movq %r15, (STACK_REG_SAVE + 1 * 8)(%r    
349         movq %r14, (STACK_REG_SAVE + 2 * 8)(%r    
350         movq %r13, (STACK_REG_SAVE + 3 * 8)(%r    
351         movq %r12, (STACK_REG_SAVE + 4 * 8)(%r    
352                                                   
353         vmovdqa .Lbe32mask (%rip), BSWAP_REG;     
354                                                   
355         /* Get the values of the chaining vari    
356         movl state_h0(RSTATE), a;                 
357         movl state_h1(RSTATE), b;                 
358         movl state_h2(RSTATE), c;                 
359         movl state_h3(RSTATE), d;                 
360         movl state_h4(RSTATE), e;                 
361         movl state_h5(RSTATE), f;                 
362         movl state_h6(RSTATE), g;                 
363         movl state_h7(RSTATE), h;                 
364                                                   
365 .align 16                                         
366 .Loop:                                            
367         /* Load data part1. */                    
368         LOAD_W_XMM_1();                           
369                                                   
370         leaq -1(RNBLKS), RNBLKS;                  
371                                                   
372         /* Transform 0-3 + Load data part2. */    
373         R1(a, b, c, d, e, f, g, h, 0, 0, IW);     
374         R1(d, a, b, c, h, e, f, g, 1, 1, IW);     
375         R1(c, d, a, b, g, h, e, f, 2, 2, IW);     
376         R1(b, c, d, a, f, g, h, e, 3, 3, IW);     
377                                                   
378         /* Transform 4-7 + Precalc 12-14. */      
379         R1(a, b, c, d, e, f, g, h, 4, 0, IW);     
380         R1(d, a, b, c, h, e, f, g, 5, 1, IW);     
381         R1(c, d, a, b, g, h, e, f, 6, 2, IW);     
382         R1(b, c, d, a, f, g, h, e, 7, 3, IW);     
383                                                   
384         /* Transform 8-11 + Precalc 12-17. */     
385         R1(a, b, c, d, e, f, g, h, 8, 0, IW);     
386         R1(d, a, b, c, h, e, f, g, 9, 1, IW);     
387         R1(c, d, a, b, g, h, e, f, 10, 2, IW);    
388         R1(b, c, d, a, f, g, h, e, 11, 3, IW);    
389                                                   
390         /* Transform 12-14 + Precalc 18-20 */     
391         R1(a, b, c, d, e, f, g, h, 12, 0, XW);    
392         R1(d, a, b, c, h, e, f, g, 13, 1, XW);    
393         R1(c, d, a, b, g, h, e, f, 14, 2, XW);    
394                                                   
395         /* Transform 15-17 + Precalc 21-23 */     
396         R1(b, c, d, a, f, g, h, e, 15, 0, XW);    
397         R2(a, b, c, d, e, f, g, h, 16, 1, XW);    
398         R2(d, a, b, c, h, e, f, g, 17, 2, XW);    
399                                                   
400         /* Transform 18-20 + Precalc 24-26 */     
401         R2(c, d, a, b, g, h, e, f, 18, 0, XW);    
402         R2(b, c, d, a, f, g, h, e, 19, 1, XW);    
403         R2(a, b, c, d, e, f, g, h, 20, 2, XW);    
404                                                   
405         /* Transform 21-23 + Precalc 27-29 */     
406         R2(d, a, b, c, h, e, f, g, 21, 0, XW);    
407         R2(c, d, a, b, g, h, e, f, 22, 1, XW);    
408         R2(b, c, d, a, f, g, h, e, 23, 2, XW);    
409                                                   
410         /* Transform 24-26 + Precalc 30-32 */     
411         R2(a, b, c, d, e, f, g, h, 24, 0, XW);    
412         R2(d, a, b, c, h, e, f, g, 25, 1, XW);    
413         R2(c, d, a, b, g, h, e, f, 26, 2, XW);    
414                                                   
415         /* Transform 27-29 + Precalc 33-35 */     
416         R2(b, c, d, a, f, g, h, e, 27, 0, XW);    
417         R2(a, b, c, d, e, f, g, h, 28, 1, XW);    
418         R2(d, a, b, c, h, e, f, g, 29, 2, XW);    
419                                                   
420         /* Transform 30-32 + Precalc 36-38 */     
421         R2(c, d, a, b, g, h, e, f, 30, 0, XW);    
422         R2(b, c, d, a, f, g, h, e, 31, 1, XW);    
423         R2(a, b, c, d, e, f, g, h, 32, 2, XW);    
424                                                   
425         /* Transform 33-35 + Precalc 39-41 */     
426         R2(d, a, b, c, h, e, f, g, 33, 0, XW);    
427         R2(c, d, a, b, g, h, e, f, 34, 1, XW);    
428         R2(b, c, d, a, f, g, h, e, 35, 2, XW);    
429                                                   
430         /* Transform 36-38 + Precalc 42-44 */     
431         R2(a, b, c, d, e, f, g, h, 36, 0, XW);    
432         R2(d, a, b, c, h, e, f, g, 37, 1, XW);    
433         R2(c, d, a, b, g, h, e, f, 38, 2, XW);    
434                                                   
435         /* Transform 39-41 + Precalc 45-47 */     
436         R2(b, c, d, a, f, g, h, e, 39, 0, XW);    
437         R2(a, b, c, d, e, f, g, h, 40, 1, XW);    
438         R2(d, a, b, c, h, e, f, g, 41, 2, XW);    
439                                                   
440         /* Transform 42-44 + Precalc 48-50 */     
441         R2(c, d, a, b, g, h, e, f, 42, 0, XW);    
442         R2(b, c, d, a, f, g, h, e, 43, 1, XW);    
443         R2(a, b, c, d, e, f, g, h, 44, 2, XW);    
444                                                   
445         /* Transform 45-47 + Precalc 51-53 */     
446         R2(d, a, b, c, h, e, f, g, 45, 0, XW);    
447         R2(c, d, a, b, g, h, e, f, 46, 1, XW);    
448         R2(b, c, d, a, f, g, h, e, 47, 2, XW);    
449                                                   
450         /* Transform 48-50 + Precalc 54-56 */     
451         R2(a, b, c, d, e, f, g, h, 48, 0, XW);    
452         R2(d, a, b, c, h, e, f, g, 49, 1, XW);    
453         R2(c, d, a, b, g, h, e, f, 50, 2, XW);    
454                                                   
455         /* Transform 51-53 + Precalc 57-59 */     
456         R2(b, c, d, a, f, g, h, e, 51, 0, XW);    
457         R2(a, b, c, d, e, f, g, h, 52, 1, XW);    
458         R2(d, a, b, c, h, e, f, g, 53, 2, XW);    
459                                                   
460         /* Transform 54-56 + Precalc 60-62 */     
461         R2(c, d, a, b, g, h, e, f, 54, 0, XW);    
462         R2(b, c, d, a, f, g, h, e, 55, 1, XW);    
463         R2(a, b, c, d, e, f, g, h, 56, 2, XW);    
464                                                   
465         /* Transform 57-59 + Precalc 63 */        
466         R2(d, a, b, c, h, e, f, g, 57, 0, XW);    
467         R2(c, d, a, b, g, h, e, f, 58, 1, XW);    
468         R2(b, c, d, a, f, g, h, e, 59, 2, XW);    
469                                                   
470         /* Transform 60-62 + Precalc 63 */        
471         R2(a, b, c, d, e, f, g, h, 60, 0, XW);    
472         R2(d, a, b, c, h, e, f, g, 61, 1, XW);    
473         R2(c, d, a, b, g, h, e, f, 62, 2, XW);    
474                                                   
475         /* Transform 63 */                        
476         R2(b, c, d, a, f, g, h, e, 63, 0, XW);    
477                                                   
478         /* Update the chaining variables. */      
479         xorl state_h0(RSTATE), a;                 
480         xorl state_h1(RSTATE), b;                 
481         xorl state_h2(RSTATE), c;                 
482         xorl state_h3(RSTATE), d;                 
483         movl a, state_h0(RSTATE);                 
484         movl b, state_h1(RSTATE);                 
485         movl c, state_h2(RSTATE);                 
486         movl d, state_h3(RSTATE);                 
487         xorl state_h4(RSTATE), e;                 
488         xorl state_h5(RSTATE), f;                 
489         xorl state_h6(RSTATE), g;                 
490         xorl state_h7(RSTATE), h;                 
491         movl e, state_h4(RSTATE);                 
492         movl f, state_h5(RSTATE);                 
493         movl g, state_h6(RSTATE);                 
494         movl h, state_h7(RSTATE);                 
495                                                   
496         cmpq $0, RNBLKS;                          
497         jne .Loop;                                
498                                                   
499         vzeroall;                                 
500                                                   
501         movq (STACK_REG_SAVE + 0 * 8)(%rsp), %    
502         movq (STACK_REG_SAVE + 1 * 8)(%rsp), %    
503         movq (STACK_REG_SAVE + 2 * 8)(%rsp), %    
504         movq (STACK_REG_SAVE + 3 * 8)(%rsp), %    
505         movq (STACK_REG_SAVE + 4 * 8)(%rsp), %    
506                                                   
507         vmovdqa %xmm0, IW_W1_ADDR(0, 0);          
508         vmovdqa %xmm0, IW_W1W2_ADDR(0, 0);        
509         vmovdqa %xmm0, IW_W1_ADDR(4, 0);          
510         vmovdqa %xmm0, IW_W1W2_ADDR(4, 0);        
511         vmovdqa %xmm0, IW_W1_ADDR(8, 0);          
512         vmovdqa %xmm0, IW_W1W2_ADDR(8, 0);        
513                                                   
514         movq %rbp, %rsp;                          
515         popq %rbp;                                
516         RET;                                      
517 SYM_FUNC_END(sm3_transform_avx)                   
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php