~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/crypto/sm4-aesni-avx2-asm_64.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/x86/crypto/sm4-aesni-avx2-asm_64.S (Architecture mips) and /arch/sparc/crypto/sm4-aesni-avx2-asm_64.S (Architecture sparc)


  1 // SPDX-License-Identifier: GPL-2.0-or-later      
  2 /*                                                
  3  * SM4 Cipher Algorithm, AES-NI/AVX2 optimized    
  4  * as specified in                                
  5  * https://tools.ietf.org/id/draft-ribose-cfrg    
  6  *                                                
  7  * Copyright (C) 2018 Markku-Juhani O. Saarine<    
  8  * Copyright (C) 2020 Jussi Kivilinna <jussi.ki    
  9  * Copyright (c) 2021 Tianjia Zhang <tianjia.zh    
 10  */                                               
 11                                                   
 12 /* Based on SM4 AES-NI work by libgcrypt and M    
 13  *  https://github.com/mjosaarinen/sm4ni          
 14  */                                               
 15                                                   
 16 #include <linux/linkage.h>                        
 17 #include <linux/cfi_types.h>                      
 18 #include <asm/frame.h>                            
 19                                                   
 20 #define rRIP         (%rip)                       
 21                                                   
 22 /* vector registers */                            
 23 #define RX0          %ymm0                        
 24 #define RX1          %ymm1                        
 25 #define MASK_4BIT    %ymm2                        
 26 #define RTMP0        %ymm3                        
 27 #define RTMP1        %ymm4                        
 28 #define RTMP2        %ymm5                        
 29 #define RTMP3        %ymm6                        
 30 #define RTMP4        %ymm7                        
 31                                                   
 32 #define RA0          %ymm8                        
 33 #define RA1          %ymm9                        
 34 #define RA2          %ymm10                       
 35 #define RA3          %ymm11                       
 36                                                   
 37 #define RB0          %ymm12                       
 38 #define RB1          %ymm13                       
 39 #define RB2          %ymm14                       
 40 #define RB3          %ymm15                       
 41                                                   
 42 #define RNOT         %ymm0                        
 43 #define RBSWAP       %ymm1                        
 44                                                   
 45 #define RX0x         %xmm0                        
 46 #define RX1x         %xmm1                        
 47 #define MASK_4BITx   %xmm2                        
 48                                                   
 49 #define RNOTx        %xmm0                        
 50 #define RBSWAPx      %xmm1                        
 51                                                   
 52 #define RTMP0x       %xmm3                        
 53 #define RTMP1x       %xmm4                        
 54 #define RTMP2x       %xmm5                        
 55 #define RTMP3x       %xmm6                        
 56 #define RTMP4x       %xmm7                        
 57                                                   
 58                                                   
 59 /* helper macros */                               
 60                                                   
 61 /* Transpose four 32-bit words between 128-bit    
 62 #define transpose_4x4(x0, x1, x2, x3, t1, t2)     
 63         vpunpckhdq x1, x0, t2;                    
 64         vpunpckldq x1, x0, x0;                    
 65                                                   
 66         vpunpckldq x3, x2, t1;                    
 67         vpunpckhdq x3, x2, x2;                    
 68                                                   
 69         vpunpckhqdq t1, x0, x1;                   
 70         vpunpcklqdq t1, x0, x0;                   
 71                                                   
 72         vpunpckhqdq x2, t2, x3;                   
 73         vpunpcklqdq x2, t2, x2;                   
 74                                                   
 75 /* post-SubByte transform. */                     
 76 #define transform_pre(x, lo_t, hi_t, mask4bit,    
 77         vpand x, mask4bit, tmp0;                  
 78         vpandn x, mask4bit, x;                    
 79         vpsrld $4, x, x;                          
 80                                                   
 81         vpshufb tmp0, lo_t, tmp0;                 
 82         vpshufb x, hi_t, x;                       
 83         vpxor tmp0, x, x;                         
 84                                                   
 85 /* post-SubByte transform. Note: x has been XO    
 86  * 'vaeslastenc' instruction. */                  
 87 #define transform_post(x, lo_t, hi_t, mask4bit    
 88         vpandn mask4bit, x, tmp0;                 
 89         vpsrld $4, x, x;                          
 90         vpand x, mask4bit, x;                     
 91                                                   
 92         vpshufb tmp0, lo_t, tmp0;                 
 93         vpshufb x, hi_t, x;                       
 94         vpxor tmp0, x, x;                         
 95                                                   
 96                                                   
 97 .section        .rodata.cst16, "aM", @progbits    
 98 .align 16                                         
 99                                                   
100 /*                                                
101  * Following four affine transform look-up tab    
102  * Markku-Juhani O. Saarinen, at https://githu    
103  *                                                
104  * These allow exposing SM4 S-Box from AES Sub    
105  */                                               
106                                                   
107 /* pre-SubByte affine transform, from SM4 fiel    
108 .Lpre_tf_lo_s:                                    
109         .quad 0x9197E2E474720701, 0xC7C1B4B222    
110 .Lpre_tf_hi_s:                                    
111         .quad 0xE240AB09EB49A200, 0xF052B91BF9    
112                                                   
113 /* post-SubByte affine transform, from AES fie    
114 .Lpost_tf_lo_s:                                   
115         .quad 0x5B67F2CEA19D0834, 0xEDD1447817    
116 .Lpost_tf_hi_s:                                   
117         .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC    
118                                                   
119 /* For isolating SubBytes from AESENCLAST, inv    
120 .Linv_shift_row:                                  
121         .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x    
122         .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x    
123                                                   
124 /* Inverse shift row + Rotate left by 8 bits o    
125 .Linv_shift_row_rol_8:                            
126         .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x    
127         .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x    
128                                                   
129 /* Inverse shift row + Rotate left by 16 bits     
130 .Linv_shift_row_rol_16:                           
131         .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x    
132         .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x    
133                                                   
134 /* Inverse shift row + Rotate left by 24 bits     
135 .Linv_shift_row_rol_24:                           
136         .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x    
137         .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x    
138                                                   
139 /* For CTR-mode IV byteswap */                    
140 .Lbswap128_mask:                                  
141         .byte 15, 14, 13, 12, 11, 10, 9, 8, 7,    
142                                                   
143 /* For input word byte-swap */                    
144 .Lbswap32_mask:                                   
145         .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10,     
146                                                   
147 .align 4                                          
148 /* 4-bit mask */                                  
149 .L0f0f0f0f:                                       
150         .long 0x0f0f0f0f                          
151                                                   
152 /* 12 bytes, only for padding */                  
153 .Lpadding_deadbeef:                               
154         .long 0xdeadbeef, 0xdeadbeef, 0xdeadbe    
155                                                   
156 .text                                             
157 SYM_FUNC_START_LOCAL(__sm4_crypt_blk16)           
158         /* input:                                 
159          *      %rdi: round key array, CTX        
160          *      RA0, RA1, RA2, RA3, RB0, RB1,     
161          *                                        
162          * output:                                
163          *      RA0, RA1, RA2, RA3, RB0, RB1,     
164          *                                        
165          */                                       
166         FRAME_BEGIN                               
167                                                   
168         vbroadcasti128 .Lbswap32_mask rRIP, RT    
169         vpshufb RTMP2, RA0, RA0;                  
170         vpshufb RTMP2, RA1, RA1;                  
171         vpshufb RTMP2, RA2, RA2;                  
172         vpshufb RTMP2, RA3, RA3;                  
173         vpshufb RTMP2, RB0, RB0;                  
174         vpshufb RTMP2, RB1, RB1;                  
175         vpshufb RTMP2, RB2, RB2;                  
176         vpshufb RTMP2, RB3, RB3;                  
177                                                   
178         vpbroadcastd .L0f0f0f0f rRIP, MASK_4BI    
179         transpose_4x4(RA0, RA1, RA2, RA3, RTMP    
180         transpose_4x4(RB0, RB1, RB2, RB3, RTMP    
181                                                   
182 #define ROUND(round, s0, s1, s2, s3, r0, r1, r    
183         vpbroadcastd (4*(round))(%rdi), RX0;      
184         vbroadcasti128 .Lpre_tf_lo_s rRIP, RTM    
185         vbroadcasti128 .Lpre_tf_hi_s rRIP, RTM    
186         vmovdqa RX0, RX1;                         
187         vpxor s1, RX0, RX0;                       
188         vpxor s2, RX0, RX0;                       
189         vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^     
190         vbroadcasti128 .Lpost_tf_lo_s rRIP, RT    
191         vbroadcasti128 .Lpost_tf_hi_s rRIP, RT    
192         vpxor r1, RX1, RX1;                       
193         vpxor r2, RX1, RX1;                       
194         vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^     
195                                                   
196         /* sbox, non-linear part */               
197         transform_pre(RX0, RTMP4, RTMP1, MASK_    
198         transform_pre(RX1, RTMP4, RTMP1, MASK_    
199         vextracti128 $1, RX0, RTMP4x;             
200         vextracti128 $1, RX1, RTMP0x;             
201         vaesenclast MASK_4BITx, RX0x, RX0x;       
202         vaesenclast MASK_4BITx, RTMP4x, RTMP4x    
203         vaesenclast MASK_4BITx, RX1x, RX1x;       
204         vaesenclast MASK_4BITx, RTMP0x, RTMP0x    
205         vinserti128 $1, RTMP4x, RX0, RX0;         
206         vbroadcasti128 .Linv_shift_row rRIP, R    
207         vinserti128 $1, RTMP0x, RX1, RX1;         
208         transform_post(RX0, RTMP2, RTMP3, MASK    
209         transform_post(RX1, RTMP2, RTMP3, MASK    
210                                                   
211         /* linear part */                         
212         vpshufb RTMP4, RX0, RTMP0;                
213         vpxor RTMP0, s0, s0; /* s0 ^ x */         
214         vpshufb RTMP4, RX1, RTMP2;                
215         vbroadcasti128 .Linv_shift_row_rol_8 r    
216         vpxor RTMP2, r0, r0; /* r0 ^ x */         
217         vpshufb RTMP4, RX0, RTMP1;                
218         vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(    
219         vpshufb RTMP4, RX1, RTMP3;                
220         vbroadcasti128 .Linv_shift_row_rol_16     
221         vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(    
222         vpshufb RTMP4, RX0, RTMP1;                
223         vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(    
224         vpshufb RTMP4, RX1, RTMP3;                
225         vbroadcasti128 .Linv_shift_row_rol_24     
226         vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(    
227         vpshufb RTMP4, RX0, RTMP1;                
228         vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x    
229         vpslld $2, RTMP0, RTMP1;                  
230         vpsrld $30, RTMP0, RTMP0;                 
231         vpxor RTMP0, s0, s0;                      
232         /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol    
233         vpxor RTMP1, s0, s0;                      
234         vpshufb RTMP4, RX1, RTMP3;                
235         vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x    
236         vpslld $2, RTMP2, RTMP3;                  
237         vpsrld $30, RTMP2, RTMP2;                 
238         vpxor RTMP2, r0, r0;                      
239         /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol    
240         vpxor RTMP3, r0, r0;                      
241                                                   
242         leaq (32*4)(%rdi), %rax;                  
243 .align 16                                         
244 .Lroundloop_blk8:                                 
245         ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1,    
246         ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2,    
247         ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3,    
248         ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0,    
249         leaq (4*4)(%rdi), %rdi;                   
250         cmpq %rax, %rdi;                          
251         jne .Lroundloop_blk8;                     
252                                                   
253 #undef ROUND                                      
254                                                   
255         vbroadcasti128 .Lbswap128_mask rRIP, R    
256                                                   
257         transpose_4x4(RA0, RA1, RA2, RA3, RTMP    
258         transpose_4x4(RB0, RB1, RB2, RB3, RTMP    
259         vpshufb RTMP2, RA0, RA0;                  
260         vpshufb RTMP2, RA1, RA1;                  
261         vpshufb RTMP2, RA2, RA2;                  
262         vpshufb RTMP2, RA3, RA3;                  
263         vpshufb RTMP2, RB0, RB0;                  
264         vpshufb RTMP2, RB1, RB1;                  
265         vpshufb RTMP2, RB2, RB2;                  
266         vpshufb RTMP2, RB3, RB3;                  
267                                                   
268         FRAME_END                                 
269         RET;                                      
270 SYM_FUNC_END(__sm4_crypt_blk16)                   
271                                                   
272 #define inc_le128(x, minus_one, tmp) \            
273         vpcmpeqq minus_one, x, tmp;  \            
274         vpsubq minus_one, x, x;      \            
275         vpslldq $8, tmp, tmp;        \            
276         vpsubq tmp, x, x;                         
277                                                   
278 /*                                                
279  * void sm4_aesni_avx2_ctr_enc_blk16(const u32    
280  *                                   const u8     
281  */                                               
282 SYM_TYPED_FUNC_START(sm4_aesni_avx2_ctr_enc_bl    
283         /* input:                                 
284          *      %rdi: round key array, CTX        
285          *      %rsi: dst (16 blocks)             
286          *      %rdx: src (16 blocks)             
287          *      %rcx: iv (big endian, 128bit)     
288          */                                       
289         FRAME_BEGIN                               
290                                                   
291         movq 8(%rcx), %rax;                       
292         bswapq %rax;                              
293                                                   
294         vzeroupper;                               
295                                                   
296         vbroadcasti128 .Lbswap128_mask rRIP, R    
297         vpcmpeqd RNOT, RNOT, RNOT;                
298         vpsrldq $8, RNOT, RNOT;   /* ab: -1:0     
299         vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0     
300                                                   
301         /* load IV and byteswap */                
302         vmovdqu (%rcx), RTMP4x;                   
303         vpshufb RTMP3x, RTMP4x, RTMP4x;           
304         vmovdqa RTMP4x, RTMP0x;                   
305         inc_le128(RTMP4x, RNOTx, RTMP1x);         
306         vinserti128 $1, RTMP4x, RTMP0, RTMP0;     
307         vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0     
308                                                   
309         /* check need for handling 64-bit over    
310         cmpq $(0xffffffffffffffff - 16), %rax;    
311         ja .Lhandle_ctr_carry;                    
312                                                   
313         /* construct IVs */                       
314         vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2    
315         vpshufb RTMP3, RTMP0, RA1;                
316         vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4    
317         vpshufb RTMP3, RTMP0, RA2;                
318         vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6    
319         vpshufb RTMP3, RTMP0, RA3;                
320         vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8    
321         vpshufb RTMP3, RTMP0, RB0;                
322         vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +    
323         vpshufb RTMP3, RTMP0, RB1;                
324         vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +    
325         vpshufb RTMP3, RTMP0, RB2;                
326         vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +    
327         vpshufb RTMP3, RTMP0, RB3;                
328         vpsubq RTMP2, RTMP0, RTMP0; /* +16 */     
329         vpshufb RTMP3x, RTMP0x, RTMP0x;           
330                                                   
331         jmp .Lctr_carry_done;                     
332                                                   
333 .Lhandle_ctr_carry:                               
334         /* construct IVs */                       
335         inc_le128(RTMP0, RNOT, RTMP1);            
336         inc_le128(RTMP0, RNOT, RTMP1);            
337         vpshufb RTMP3, RTMP0, RA1; /* +3 ; +2     
338         inc_le128(RTMP0, RNOT, RTMP1);            
339         inc_le128(RTMP0, RNOT, RTMP1);            
340         vpshufb RTMP3, RTMP0, RA2; /* +5 ; +4     
341         inc_le128(RTMP0, RNOT, RTMP1);            
342         inc_le128(RTMP0, RNOT, RTMP1);            
343         vpshufb RTMP3, RTMP0, RA3; /* +7 ; +6     
344         inc_le128(RTMP0, RNOT, RTMP1);            
345         inc_le128(RTMP0, RNOT, RTMP1);            
346         vpshufb RTMP3, RTMP0, RB0; /* +9 ; +8     
347         inc_le128(RTMP0, RNOT, RTMP1);            
348         inc_le128(RTMP0, RNOT, RTMP1);            
349         vpshufb RTMP3, RTMP0, RB1; /* +11 ; +1    
350         inc_le128(RTMP0, RNOT, RTMP1);            
351         inc_le128(RTMP0, RNOT, RTMP1);            
352         vpshufb RTMP3, RTMP0, RB2; /* +13 ; +1    
353         inc_le128(RTMP0, RNOT, RTMP1);            
354         inc_le128(RTMP0, RNOT, RTMP1);            
355         vpshufb RTMP3, RTMP0, RB3; /* +15 ; +1    
356         inc_le128(RTMP0, RNOT, RTMP1);            
357         vextracti128 $1, RTMP0, RTMP0x;           
358         vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16    
359                                                   
360 .align 4                                          
361 .Lctr_carry_done:                                 
362         /* store new IV */                        
363         vmovdqu RTMP0x, (%rcx);                   
364                                                   
365         call __sm4_crypt_blk16;                   
366                                                   
367         vpxor (0 * 32)(%rdx), RA0, RA0;           
368         vpxor (1 * 32)(%rdx), RA1, RA1;           
369         vpxor (2 * 32)(%rdx), RA2, RA2;           
370         vpxor (3 * 32)(%rdx), RA3, RA3;           
371         vpxor (4 * 32)(%rdx), RB0, RB0;           
372         vpxor (5 * 32)(%rdx), RB1, RB1;           
373         vpxor (6 * 32)(%rdx), RB2, RB2;           
374         vpxor (7 * 32)(%rdx), RB3, RB3;           
375                                                   
376         vmovdqu RA0, (0 * 32)(%rsi);              
377         vmovdqu RA1, (1 * 32)(%rsi);              
378         vmovdqu RA2, (2 * 32)(%rsi);              
379         vmovdqu RA3, (3 * 32)(%rsi);              
380         vmovdqu RB0, (4 * 32)(%rsi);              
381         vmovdqu RB1, (5 * 32)(%rsi);              
382         vmovdqu RB2, (6 * 32)(%rsi);              
383         vmovdqu RB3, (7 * 32)(%rsi);              
384                                                   
385         vzeroall;                                 
386         FRAME_END                                 
387         RET;                                      
388 SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16)        
389                                                   
390 /*                                                
391  * void sm4_aesni_avx2_cbc_dec_blk16(const u32    
392  *                                   const u8     
393  */                                               
394 SYM_TYPED_FUNC_START(sm4_aesni_avx2_cbc_dec_bl    
395         /* input:                                 
396          *      %rdi: round key array, CTX        
397          *      %rsi: dst (16 blocks)             
398          *      %rdx: src (16 blocks)             
399          *      %rcx: iv                          
400          */                                       
401         FRAME_BEGIN                               
402                                                   
403         vzeroupper;                               
404                                                   
405         vmovdqu (0 * 32)(%rdx), RA0;              
406         vmovdqu (1 * 32)(%rdx), RA1;              
407         vmovdqu (2 * 32)(%rdx), RA2;              
408         vmovdqu (3 * 32)(%rdx), RA3;              
409         vmovdqu (4 * 32)(%rdx), RB0;              
410         vmovdqu (5 * 32)(%rdx), RB1;              
411         vmovdqu (6 * 32)(%rdx), RB2;              
412         vmovdqu (7 * 32)(%rdx), RB3;              
413                                                   
414         call __sm4_crypt_blk16;                   
415                                                   
416         vmovdqu (%rcx), RNOTx;                    
417         vinserti128 $1, (%rdx), RNOT, RNOT;       
418         vpxor RNOT, RA0, RA0;                     
419         vpxor (0 * 32 + 16)(%rdx), RA1, RA1;      
420         vpxor (1 * 32 + 16)(%rdx), RA2, RA2;      
421         vpxor (2 * 32 + 16)(%rdx), RA3, RA3;      
422         vpxor (3 * 32 + 16)(%rdx), RB0, RB0;      
423         vpxor (4 * 32 + 16)(%rdx), RB1, RB1;      
424         vpxor (5 * 32 + 16)(%rdx), RB2, RB2;      
425         vpxor (6 * 32 + 16)(%rdx), RB3, RB3;      
426         vmovdqu (7 * 32 + 16)(%rdx), RNOTx;       
427         vmovdqu RNOTx, (%rcx); /* store new IV    
428                                                   
429         vmovdqu RA0, (0 * 32)(%rsi);              
430         vmovdqu RA1, (1 * 32)(%rsi);              
431         vmovdqu RA2, (2 * 32)(%rsi);              
432         vmovdqu RA3, (3 * 32)(%rsi);              
433         vmovdqu RB0, (4 * 32)(%rsi);              
434         vmovdqu RB1, (5 * 32)(%rsi);              
435         vmovdqu RB2, (6 * 32)(%rsi);              
436         vmovdqu RB3, (7 * 32)(%rsi);              
437                                                   
438         vzeroall;                                 
439         FRAME_END                                 
440         RET;                                      
441 SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16)        
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php