~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/crypto/sm4-ce-core.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/arm64/crypto/sm4-ce-core.S (Architecture i386) and /arch/sparc64/crypto/sm4-ce-core.S (Architecture sparc64)


  1 /* SPDX-License-Identifier: GPL-2.0-or-later *    
  2 /*                                                
  3  * SM4 Cipher Algorithm for ARMv8 with Crypto     
  4  * as specified in                                
  5  * https://tools.ietf.org/id/draft-ribose-cfrg    
  6  *                                                
  7  * Copyright (C) 2022, Alibaba Group.             
  8  * Copyright (C) 2022 Tianjia Zhang <tianjia.zh    
  9  */                                               
 10                                                   
 11 #include <linux/linkage.h>                        
 12 #include <asm/assembler.h>                        
 13 #include "sm4-ce-asm.h"                           
 14                                                   
 15 .arch   armv8-a+crypto                            
 16                                                   
 17 .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,     
 18                 20, 24, 25, 26, 27, 28, 29, 30    
 19         .set .Lv\b\().4s, \b                      
 20 .endr                                             
 21                                                   
 22 .macro sm4e, vd, vn                               
 23         .inst 0xcec08400 | (.L\vn << 5) | .L\v    
 24 .endm                                             
 25                                                   
 26 .macro sm4ekey, vd, vn, vm                        
 27         .inst 0xce60c800 | (.L\vm << 16) | (.L    
 28 .endm                                             
 29                                                   
 30 /* Register macros */                             
 31                                                   
 32 #define RTMP0   v16                               
 33 #define RTMP1   v17                               
 34 #define RTMP2   v18                               
 35 #define RTMP3   v19                               
 36                                                   
 37 #define RIV     v20                               
 38 #define RMAC    v20                               
 39 #define RMASK   v21                               
 40                                                   
 41                                                   
 42 .align 3                                          
 43 SYM_FUNC_START(sm4_ce_expand_key)                 
 44         /* input:                                 
 45          *   x0: 128-bit key                      
 46          *   x1: rkey_enc                         
 47          *   x2: rkey_dec                         
 48          *   x3: fk array                         
 49          *   x4: ck array                         
 50          */                                       
 51         ld1             {v0.16b}, [x0];           
 52         rev32           v0.16b, v0.16b;           
 53         ld1             {v1.16b}, [x3];           
 54         /* load ck */                             
 55         ld1             {v24.16b-v27.16b}, [x4    
 56         ld1             {v28.16b-v31.16b}, [x4    
 57                                                   
 58         /* input ^ fk */                          
 59         eor             v0.16b, v0.16b, v1.16b    
 60                                                   
 61         sm4ekey         v0.4s, v0.4s, v24.4s;     
 62         sm4ekey         v1.4s, v0.4s, v25.4s;     
 63         sm4ekey         v2.4s, v1.4s, v26.4s;     
 64         sm4ekey         v3.4s, v2.4s, v27.4s;     
 65         sm4ekey         v4.4s, v3.4s, v28.4s;     
 66         sm4ekey         v5.4s, v4.4s, v29.4s;     
 67         sm4ekey         v6.4s, v5.4s, v30.4s;     
 68         sm4ekey         v7.4s, v6.4s, v31.4s;     
 69                                                   
 70         adr_l           x5, .Lbswap128_mask       
 71         ld1             {v24.16b}, [x5]           
 72                                                   
 73         st1             {v0.16b-v3.16b}, [x1],    
 74         st1             {v4.16b-v7.16b}, [x1];    
 75                                                   
 76         tbl             v16.16b, {v7.16b}, v24    
 77         tbl             v17.16b, {v6.16b}, v24    
 78         tbl             v18.16b, {v5.16b}, v24    
 79         tbl             v19.16b, {v4.16b}, v24    
 80         tbl             v20.16b, {v3.16b}, v24    
 81         tbl             v21.16b, {v2.16b}, v24    
 82         tbl             v22.16b, {v1.16b}, v24    
 83         tbl             v23.16b, {v0.16b}, v24    
 84                                                   
 85         st1             {v16.16b-v19.16b}, [x2    
 86         st1             {v20.16b-v23.16b}, [x2    
 87                                                   
 88         ret;                                      
 89 SYM_FUNC_END(sm4_ce_expand_key)                   
 90                                                   
 91 .align 3                                          
 92 SYM_FUNC_START(sm4_ce_crypt_block)                
 93         /* input:                                 
 94          *   x0: round key array, CTX             
 95          *   x1: dst                              
 96          *   x2: src                              
 97          */                                       
 98         SM4_PREPARE(x0)                           
 99                                                   
100         ld1             {v0.16b}, [x2];           
101         SM4_CRYPT_BLK(v0);                        
102         st1             {v0.16b}, [x1];           
103                                                   
104         ret;                                      
105 SYM_FUNC_END(sm4_ce_crypt_block)                  
106                                                   
107 .align 3                                          
108 SYM_FUNC_START(sm4_ce_crypt)                      
109         /* input:                                 
110          *   x0: round key array, CTX             
111          *   x1: dst                              
112          *   x2: src                              
113          *   w3: nblocks                          
114          */                                       
115         SM4_PREPARE(x0)                           
116                                                   
117 .Lcrypt_loop_blk:                                 
118         sub             w3, w3, #8;               
119         tbnz            w3, #31, .Lcrypt_tail8    
120                                                   
121         ld1             {v0.16b-v3.16b}, [x2],    
122         ld1             {v4.16b-v7.16b}, [x2],    
123                                                   
124         SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5,    
125                                                   
126         st1             {v0.16b-v3.16b}, [x1],    
127         st1             {v4.16b-v7.16b}, [x1],    
128                                                   
129         cbz             w3, .Lcrypt_end;          
130         b               .Lcrypt_loop_blk;         
131                                                   
132 .Lcrypt_tail8:                                    
133         add             w3, w3, #8;               
134         cmp             w3, #4;                   
135         blt             .Lcrypt_tail4;            
136                                                   
137         sub             w3, w3, #4;               
138                                                   
139         ld1             {v0.16b-v3.16b}, [x2],    
140         SM4_CRYPT_BLK4(v0, v1, v2, v3);           
141         st1             {v0.16b-v3.16b}, [x1],    
142                                                   
143         cbz             w3, .Lcrypt_end;          
144                                                   
145 .Lcrypt_tail4:                                    
146         sub             w3, w3, #1;               
147                                                   
148         ld1             {v0.16b}, [x2], #16;      
149         SM4_CRYPT_BLK(v0);                        
150         st1             {v0.16b}, [x1], #16;      
151                                                   
152         cbnz            w3, .Lcrypt_tail4;        
153                                                   
154 .Lcrypt_end:                                      
155         ret;                                      
156 SYM_FUNC_END(sm4_ce_crypt)                        
157                                                   
158 .align 3                                          
159 SYM_FUNC_START(sm4_ce_cbc_enc)                    
160         /* input:                                 
161          *   x0: round key array, CTX             
162          *   x1: dst                              
163          *   x2: src                              
164          *   x3: iv (big endian, 128 bit)         
165          *   w4: nblocks                          
166          */                                       
167         SM4_PREPARE(x0)                           
168                                                   
169         ld1             {RIV.16b}, [x3]           
170                                                   
171 .Lcbc_enc_loop_4x:                                
172         cmp             w4, #4                    
173         blt             .Lcbc_enc_loop_1x         
174                                                   
175         sub             w4, w4, #4                
176                                                   
177         ld1             {v0.16b-v3.16b}, [x2],    
178                                                   
179         eor             v0.16b, v0.16b, RIV.16    
180         SM4_CRYPT_BLK(v0)                         
181         eor             v1.16b, v1.16b, v0.16b    
182         SM4_CRYPT_BLK(v1)                         
183         eor             v2.16b, v2.16b, v1.16b    
184         SM4_CRYPT_BLK(v2)                         
185         eor             v3.16b, v3.16b, v2.16b    
186         SM4_CRYPT_BLK(v3)                         
187                                                   
188         st1             {v0.16b-v3.16b}, [x1],    
189         mov             RIV.16b, v3.16b           
190                                                   
191         cbz             w4, .Lcbc_enc_end         
192         b               .Lcbc_enc_loop_4x         
193                                                   
194 .Lcbc_enc_loop_1x:                                
195         sub             w4, w4, #1                
196                                                   
197         ld1             {v0.16b}, [x2], #16       
198                                                   
199         eor             RIV.16b, RIV.16b, v0.1    
200         SM4_CRYPT_BLK(RIV)                        
201                                                   
202         st1             {RIV.16b}, [x1], #16      
203                                                   
204         cbnz            w4, .Lcbc_enc_loop_1x     
205                                                   
206 .Lcbc_enc_end:                                    
207         /* store new IV */                        
208         st1             {RIV.16b}, [x3]           
209                                                   
210         ret                                       
211 SYM_FUNC_END(sm4_ce_cbc_enc)                      
212                                                   
213 .align 3                                          
214 SYM_FUNC_START(sm4_ce_cbc_dec)                    
215         /* input:                                 
216          *   x0: round key array, CTX             
217          *   x1: dst                              
218          *   x2: src                              
219          *   x3: iv (big endian, 128 bit)         
220          *   w4: nblocks                          
221          */                                       
222         SM4_PREPARE(x0)                           
223                                                   
224         ld1             {RIV.16b}, [x3]           
225                                                   
226 .Lcbc_dec_loop_8x:                                
227         sub             w4, w4, #8                
228         tbnz            w4, #31, .Lcbc_dec_4x     
229                                                   
230         ld1             {v0.16b-v3.16b}, [x2],    
231         ld1             {v4.16b-v7.16b}, [x2],    
232                                                   
233         rev32           v8.16b, v0.16b            
234         rev32           v9.16b, v1.16b            
235         rev32           v10.16b, v2.16b           
236         rev32           v11.16b, v3.16b           
237         rev32           v12.16b, v4.16b           
238         rev32           v13.16b, v5.16b           
239         rev32           v14.16b, v6.16b           
240         rev32           v15.16b, v7.16b           
241                                                   
242         SM4_CRYPT_BLK8_BE(v8, v9, v10, v11, v1    
243                                                   
244         eor             v8.16b, v8.16b, RIV.16    
245         eor             v9.16b, v9.16b, v0.16b    
246         eor             v10.16b, v10.16b, v1.1    
247         eor             v11.16b, v11.16b, v2.1    
248         eor             v12.16b, v12.16b, v3.1    
249         eor             v13.16b, v13.16b, v4.1    
250         eor             v14.16b, v14.16b, v5.1    
251         eor             v15.16b, v15.16b, v6.1    
252                                                   
253         st1             {v8.16b-v11.16b}, [x1]    
254         st1             {v12.16b-v15.16b}, [x1    
255                                                   
256         mov             RIV.16b, v7.16b           
257                                                   
258         cbz             w4, .Lcbc_dec_end         
259         b               .Lcbc_dec_loop_8x         
260                                                   
261 .Lcbc_dec_4x:                                     
262         add             w4, w4, #8                
263         cmp             w4, #4                    
264         blt             .Lcbc_dec_loop_1x         
265                                                   
266         sub             w4, w4, #4                
267                                                   
268         ld1             {v0.16b-v3.16b}, [x2],    
269                                                   
270         rev32           v8.16b, v0.16b            
271         rev32           v9.16b, v1.16b            
272         rev32           v10.16b, v2.16b           
273         rev32           v11.16b, v3.16b           
274                                                   
275         SM4_CRYPT_BLK4_BE(v8, v9, v10, v11)       
276                                                   
277         eor             v8.16b, v8.16b, RIV.16    
278         eor             v9.16b, v9.16b, v0.16b    
279         eor             v10.16b, v10.16b, v1.1    
280         eor             v11.16b, v11.16b, v2.1    
281                                                   
282         st1             {v8.16b-v11.16b}, [x1]    
283                                                   
284         mov             RIV.16b, v3.16b           
285                                                   
286         cbz             w4, .Lcbc_dec_end         
287                                                   
288 .Lcbc_dec_loop_1x:                                
289         sub             w4, w4, #1                
290                                                   
291         ld1             {v0.16b}, [x2], #16       
292                                                   
293         rev32           v8.16b, v0.16b            
294                                                   
295         SM4_CRYPT_BLK_BE(v8)                      
296                                                   
297         eor             v8.16b, v8.16b, RIV.16    
298         st1             {v8.16b}, [x1], #16       
299                                                   
300         mov             RIV.16b, v0.16b           
301                                                   
302         cbnz            w4, .Lcbc_dec_loop_1x     
303                                                   
304 .Lcbc_dec_end:                                    
305         /* store new IV */                        
306         st1             {RIV.16b}, [x3]           
307                                                   
308         ret                                       
309 SYM_FUNC_END(sm4_ce_cbc_dec)                      
310                                                   
311 .align 3                                          
312 SYM_FUNC_START(sm4_ce_cbc_cts_enc)                
313         /* input:                                 
314          *   x0: round key array, CTX             
315          *   x1: dst                              
316          *   x2: src                              
317          *   x3: iv (big endian, 128 bit)         
318          *   w4: nbytes                           
319          */                                       
320         SM4_PREPARE(x0)                           
321                                                   
322         sub             w5, w4, #16               
323         uxtw            x5, w5                    
324                                                   
325         ld1             {RIV.16b}, [x3]           
326                                                   
327         ld1             {v0.16b}, [x2]            
328         eor             RIV.16b, RIV.16b, v0.1    
329         SM4_CRYPT_BLK(RIV)                        
330                                                   
331         /* load permute table */                  
332         adr_l           x6, .Lcts_permute_tabl    
333         add             x7, x6, #32               
334         add             x6, x6, x5                
335         sub             x7, x7, x5                
336         ld1             {v3.16b}, [x6]            
337         ld1             {v4.16b}, [x7]            
338                                                   
339         /* overlapping loads */                   
340         add             x2, x2, x5                
341         ld1             {v1.16b}, [x2]            
342                                                   
343         /* create Cn from En-1 */                 
344         tbl             v0.16b, {RIV.16b}, v3.    
345         /* padding Pn with zeros */               
346         tbl             v1.16b, {v1.16b}, v4.1    
347                                                   
348         eor             v1.16b, v1.16b, RIV.16    
349         SM4_CRYPT_BLK(v1)                         
350                                                   
351         /* overlapping stores */                  
352         add             x5, x1, x5                
353         st1             {v0.16b}, [x5]            
354         st1             {v1.16b}, [x1]            
355                                                   
356         ret                                       
357 SYM_FUNC_END(sm4_ce_cbc_cts_enc)                  
358                                                   
359 .align 3                                          
360 SYM_FUNC_START(sm4_ce_cbc_cts_dec)                
361         /* input:                                 
362          *   x0: round key array, CTX             
363          *   x1: dst                              
364          *   x2: src                              
365          *   x3: iv (big endian, 128 bit)         
366          *   w4: nbytes                           
367          */                                       
368         SM4_PREPARE(x0)                           
369                                                   
370         sub             w5, w4, #16               
371         uxtw            x5, w5                    
372                                                   
373         ld1             {RIV.16b}, [x3]           
374                                                   
375         /* load permute table */                  
376         adr_l           x6, .Lcts_permute_tabl    
377         add             x7, x6, #32               
378         add             x6, x6, x5                
379         sub             x7, x7, x5                
380         ld1             {v3.16b}, [x6]            
381         ld1             {v4.16b}, [x7]            
382                                                   
383         /* overlapping loads */                   
384         ld1             {v0.16b}, [x2], x5        
385         ld1             {v1.16b}, [x2]            
386                                                   
387         SM4_CRYPT_BLK(v0)                         
388         /* select the first Ln bytes of Xn to     
389         tbl             v2.16b, {v0.16b}, v3.1    
390         eor             v2.16b, v2.16b, v1.16b    
391                                                   
392         /* overwrite the first Ln bytes with C    
393         tbx             v0.16b, {v1.16b}, v4.1    
394         SM4_CRYPT_BLK(v0)                         
395         eor             v0.16b, v0.16b, RIV.16    
396                                                   
397         /* overlapping stores */                  
398         add             x5, x1, x5                
399         st1             {v2.16b}, [x5]            
400         st1             {v0.16b}, [x1]            
401                                                   
402         ret                                       
403 SYM_FUNC_END(sm4_ce_cbc_cts_dec)                  
404                                                   
405 .align 3                                          
406 SYM_FUNC_START(sm4_ce_ctr_enc)                    
407         /* input:                                 
408          *   x0: round key array, CTX             
409          *   x1: dst                              
410          *   x2: src                              
411          *   x3: ctr (big endian, 128 bit)        
412          *   w4: nblocks                          
413          */                                       
414         SM4_PREPARE(x0)                           
415                                                   
416         ldp             x7, x8, [x3]              
417         rev             x7, x7                    
418         rev             x8, x8                    
419                                                   
420 .Lctr_loop_8x:                                    
421         sub             w4, w4, #8                
422         tbnz            w4, #31, .Lctr_4x         
423                                                   
424 #define inc_le128(vctr)                           
425                 mov             vctr.d[1], x8;    
426                 mov             vctr.d[0], x7;    
427                 adds            x8, x8, #1;       
428                 rev64           vctr.16b, vctr    
429                 adc             x7, x7, xzr;      
430                                                   
431         /* construct CTRs */                      
432         inc_le128(v0)                   /* +0     
433         inc_le128(v1)                   /* +1     
434         inc_le128(v2)                   /* +2     
435         inc_le128(v3)                   /* +3     
436         inc_le128(v4)                   /* +4     
437         inc_le128(v5)                   /* +5     
438         inc_le128(v6)                   /* +6     
439         inc_le128(v7)                   /* +7     
440                                                   
441         ld1             {v8.16b-v11.16b}, [x2]    
442         ld1             {v12.16b-v15.16b}, [x2    
443                                                   
444         SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5,    
445                                                   
446         eor             v0.16b, v0.16b, v8.16b    
447         eor             v1.16b, v1.16b, v9.16b    
448         eor             v2.16b, v2.16b, v10.16    
449         eor             v3.16b, v3.16b, v11.16    
450         eor             v4.16b, v4.16b, v12.16    
451         eor             v5.16b, v5.16b, v13.16    
452         eor             v6.16b, v6.16b, v14.16    
453         eor             v7.16b, v7.16b, v15.16    
454                                                   
455         st1             {v0.16b-v3.16b}, [x1],    
456         st1             {v4.16b-v7.16b}, [x1],    
457                                                   
458         cbz             w4, .Lctr_end             
459         b               .Lctr_loop_8x             
460                                                   
461 .Lctr_4x:                                         
462         add             w4, w4, #8                
463         cmp             w4, #4                    
464         blt             .Lctr_loop_1x             
465                                                   
466         sub             w4, w4, #4                
467                                                   
468         /* construct CTRs */                      
469         inc_le128(v0)                   /* +0     
470         inc_le128(v1)                   /* +1     
471         inc_le128(v2)                   /* +2     
472         inc_le128(v3)                   /* +3     
473                                                   
474         ld1             {v8.16b-v11.16b}, [x2]    
475                                                   
476         SM4_CRYPT_BLK4(v0, v1, v2, v3)            
477                                                   
478         eor             v0.16b, v0.16b, v8.16b    
479         eor             v1.16b, v1.16b, v9.16b    
480         eor             v2.16b, v2.16b, v10.16    
481         eor             v3.16b, v3.16b, v11.16    
482                                                   
483         st1             {v0.16b-v3.16b}, [x1],    
484                                                   
485         cbz             w4, .Lctr_end             
486                                                   
487 .Lctr_loop_1x:                                    
488         sub             w4, w4, #1                
489                                                   
490         /* construct CTRs */                      
491         inc_le128(v0)                             
492                                                   
493         ld1             {v8.16b}, [x2], #16       
494                                                   
495         SM4_CRYPT_BLK(v0)                         
496                                                   
497         eor             v0.16b, v0.16b, v8.16b    
498         st1             {v0.16b}, [x1], #16       
499                                                   
500         cbnz            w4, .Lctr_loop_1x         
501                                                   
502 .Lctr_end:                                        
503         /* store new CTR */                       
504         rev             x7, x7                    
505         rev             x8, x8                    
506         stp             x7, x8, [x3]              
507                                                   
508         ret                                       
509 SYM_FUNC_END(sm4_ce_ctr_enc)                      
510                                                   
511                                                   
512 #define tweak_next(vt, vin, RTMP)                 
513                 sshr            RTMP.2d, vin.2    
514                 and             RTMP.16b, RTMP    
515                 add             vt.2d, vin.2d,    
516                 ext             RTMP.16b, RTMP    
517                 eor             vt.16b, vt.16b    
518                                                   
519 .align 3                                          
520 SYM_FUNC_START(sm4_ce_xts_enc)                    
521         /* input:                                 
522          *   x0: round key array, CTX             
523          *   x1: dst                              
524          *   x2: src                              
525          *   x3: tweak (big endian, 128 bit)      
526          *   w4: nbytes                           
527          *   x5: round key array for IV           
528          */                                       
529         ld1             {v8.16b}, [x3]            
530                                                   
531         cbz             x5, .Lxts_enc_nofirst     
532                                                   
533         SM4_PREPARE(x5)                           
534                                                   
535         /* Generate first tweak */                
536         SM4_CRYPT_BLK(v8)                         
537                                                   
538 .Lxts_enc_nofirst:                                
539         SM4_PREPARE(x0)                           
540                                                   
541         ands            w5, w4, #15               
542         lsr             w4, w4, #4                
543         sub             w6, w4, #1                
544         csel            w4, w4, w6, eq            
545         uxtw            x5, w5                    
546                                                   
547         movi            RMASK.2s, #0x1            
548         movi            RTMP0.2s, #0x87           
549         uzp1            RMASK.4s, RMASK.4s, RT    
550                                                   
551         cbz             w4, .Lxts_enc_cts         
552                                                   
553 .Lxts_enc_loop_8x:                                
554         sub             w4, w4, #8                
555         tbnz            w4, #31, .Lxts_enc_4x     
556                                                   
557         tweak_next( v9,  v8, RTMP0)               
558         tweak_next(v10,  v9, RTMP1)               
559         tweak_next(v11, v10, RTMP2)               
560         tweak_next(v12, v11, RTMP3)               
561         tweak_next(v13, v12, RTMP0)               
562         tweak_next(v14, v13, RTMP1)               
563         tweak_next(v15, v14, RTMP2)               
564                                                   
565         ld1             {v0.16b-v3.16b}, [x2],    
566         ld1             {v4.16b-v7.16b}, [x2],    
567         eor             v0.16b, v0.16b,  v8.16    
568         eor             v1.16b, v1.16b,  v9.16    
569         eor             v2.16b, v2.16b, v10.16    
570         eor             v3.16b, v3.16b, v11.16    
571         eor             v4.16b, v4.16b, v12.16    
572         eor             v5.16b, v5.16b, v13.16    
573         eor             v6.16b, v6.16b, v14.16    
574         eor             v7.16b, v7.16b, v15.16    
575                                                   
576         SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5,    
577                                                   
578         eor             v0.16b, v0.16b,  v8.16    
579         eor             v1.16b, v1.16b,  v9.16    
580         eor             v2.16b, v2.16b, v10.16    
581         eor             v3.16b, v3.16b, v11.16    
582         eor             v4.16b, v4.16b, v12.16    
583         eor             v5.16b, v5.16b, v13.16    
584         eor             v6.16b, v6.16b, v14.16    
585         eor             v7.16b, v7.16b, v15.16    
586         st1             {v0.16b-v3.16b}, [x1],    
587         st1             {v4.16b-v7.16b}, [x1],    
588                                                   
589         tweak_next(v8, v15, RTMP3)                
590                                                   
591         cbz             w4, .Lxts_enc_cts         
592         b               .Lxts_enc_loop_8x         
593                                                   
594 .Lxts_enc_4x:                                     
595         add             w4, w4, #8                
596         cmp             w4, #4                    
597         blt             .Lxts_enc_loop_1x         
598                                                   
599         sub             w4, w4, #4                
600                                                   
601         tweak_next( v9,  v8, RTMP0)               
602         tweak_next(v10,  v9, RTMP1)               
603         tweak_next(v11, v10, RTMP2)               
604                                                   
605         ld1             {v0.16b-v3.16b}, [x2],    
606         eor             v0.16b, v0.16b,  v8.16    
607         eor             v1.16b, v1.16b,  v9.16    
608         eor             v2.16b, v2.16b, v10.16    
609         eor             v3.16b, v3.16b, v11.16    
610                                                   
611         SM4_CRYPT_BLK4(v0, v1, v2, v3)            
612                                                   
613         eor             v0.16b, v0.16b,  v8.16    
614         eor             v1.16b, v1.16b,  v9.16    
615         eor             v2.16b, v2.16b, v10.16    
616         eor             v3.16b, v3.16b, v11.16    
617         st1             {v0.16b-v3.16b}, [x1],    
618                                                   
619         tweak_next(v8, v11, RTMP3)                
620                                                   
621         cbz             w4, .Lxts_enc_cts         
622                                                   
623 .Lxts_enc_loop_1x:                                
624         sub             w4, w4, #1                
625                                                   
626         ld1             {v0.16b}, [x2], #16       
627         eor             v0.16b, v0.16b, v8.16b    
628                                                   
629         SM4_CRYPT_BLK(v0)                         
630                                                   
631         eor             v0.16b, v0.16b, v8.16b    
632         st1             {v0.16b}, [x1], #16       
633                                                   
634         tweak_next(v8, v8, RTMP0)                 
635                                                   
636         cbnz            w4, .Lxts_enc_loop_1x     
637                                                   
638 .Lxts_enc_cts:                                    
639         cbz             x5, .Lxts_enc_end         
640                                                   
641         /* cipher text stealing */                
642                                                   
643         tweak_next(v9, v8, RTMP0)                 
644         ld1             {v0.16b}, [x2]            
645         eor             v0.16b, v0.16b, v8.16b    
646         SM4_CRYPT_BLK(v0)                         
647         eor             v0.16b, v0.16b, v8.16b    
648                                                   
649         /* load permute table */                  
650         adr_l           x6, .Lcts_permute_tabl    
651         add             x7, x6, #32               
652         add             x6, x6, x5                
653         sub             x7, x7, x5                
654         ld1             {v3.16b}, [x6]            
655         ld1             {v4.16b}, [x7]            
656                                                   
657         /* overlapping loads */                   
658         add             x2, x2, x5                
659         ld1             {v1.16b}, [x2]            
660                                                   
661         /* create Cn from En-1 */                 
662         tbl             v2.16b, {v0.16b}, v3.1    
663         /* padding Pn with En-1 at the end */     
664         tbx             v0.16b, {v1.16b}, v4.1    
665                                                   
666         eor             v0.16b, v0.16b, v9.16b    
667         SM4_CRYPT_BLK(v0)                         
668         eor             v0.16b, v0.16b, v9.16b    
669                                                   
670                                                   
671         /* overlapping stores */                  
672         add             x5, x1, x5                
673         st1             {v2.16b}, [x5]            
674         st1             {v0.16b}, [x1]            
675                                                   
676         b               .Lxts_enc_ret             
677                                                   
678 .Lxts_enc_end:                                    
679         /* store new tweak */                     
680         st1             {v8.16b}, [x3]            
681                                                   
682 .Lxts_enc_ret:                                    
683         ret                                       
684 SYM_FUNC_END(sm4_ce_xts_enc)                      
685                                                   
686 .align 3                                          
687 SYM_FUNC_START(sm4_ce_xts_dec)                    
688         /* input:                                 
689          *   x0: round key array, CTX             
690          *   x1: dst                              
691          *   x2: src                              
692          *   x3: tweak (big endian, 128 bit)      
693          *   w4: nbytes                           
694          *   x5: round key array for IV           
695          */                                       
696         ld1             {v8.16b}, [x3]            
697                                                   
698         cbz             x5, .Lxts_dec_nofirst     
699                                                   
700         SM4_PREPARE(x5)                           
701                                                   
702         /* Generate first tweak */                
703         SM4_CRYPT_BLK(v8)                         
704                                                   
705 .Lxts_dec_nofirst:                                
706         SM4_PREPARE(x0)                           
707                                                   
708         ands            w5, w4, #15               
709         lsr             w4, w4, #4                
710         sub             w6, w4, #1                
711         csel            w4, w4, w6, eq            
712         uxtw            x5, w5                    
713                                                   
714         movi            RMASK.2s, #0x1            
715         movi            RTMP0.2s, #0x87           
716         uzp1            RMASK.4s, RMASK.4s, RT    
717                                                   
718         cbz             w4, .Lxts_dec_cts         
719                                                   
720 .Lxts_dec_loop_8x:                                
721         sub             w4, w4, #8                
722         tbnz            w4, #31, .Lxts_dec_4x     
723                                                   
724         tweak_next( v9,  v8, RTMP0)               
725         tweak_next(v10,  v9, RTMP1)               
726         tweak_next(v11, v10, RTMP2)               
727         tweak_next(v12, v11, RTMP3)               
728         tweak_next(v13, v12, RTMP0)               
729         tweak_next(v14, v13, RTMP1)               
730         tweak_next(v15, v14, RTMP2)               
731                                                   
732         ld1             {v0.16b-v3.16b}, [x2],    
733         ld1             {v4.16b-v7.16b}, [x2],    
734         eor             v0.16b, v0.16b,  v8.16    
735         eor             v1.16b, v1.16b,  v9.16    
736         eor             v2.16b, v2.16b, v10.16    
737         eor             v3.16b, v3.16b, v11.16    
738         eor             v4.16b, v4.16b, v12.16    
739         eor             v5.16b, v5.16b, v13.16    
740         eor             v6.16b, v6.16b, v14.16    
741         eor             v7.16b, v7.16b, v15.16    
742                                                   
743         SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5,    
744                                                   
745         eor             v0.16b, v0.16b,  v8.16    
746         eor             v1.16b, v1.16b,  v9.16    
747         eor             v2.16b, v2.16b, v10.16    
748         eor             v3.16b, v3.16b, v11.16    
749         eor             v4.16b, v4.16b, v12.16    
750         eor             v5.16b, v5.16b, v13.16    
751         eor             v6.16b, v6.16b, v14.16    
752         eor             v7.16b, v7.16b, v15.16    
753         st1             {v0.16b-v3.16b}, [x1],    
754         st1             {v4.16b-v7.16b}, [x1],    
755                                                   
756         tweak_next(v8, v15, RTMP3)                
757                                                   
758         cbz             w4, .Lxts_dec_cts         
759         b               .Lxts_dec_loop_8x         
760                                                   
761 .Lxts_dec_4x:                                     
762         add             w4, w4, #8                
763         cmp             w4, #4                    
764         blt             .Lxts_dec_loop_1x         
765                                                   
766         sub             w4, w4, #4                
767                                                   
768         tweak_next( v9,  v8, RTMP0)               
769         tweak_next(v10,  v9, RTMP1)               
770         tweak_next(v11, v10, RTMP2)               
771                                                   
772         ld1             {v0.16b-v3.16b}, [x2],    
773         eor             v0.16b, v0.16b,  v8.16    
774         eor             v1.16b, v1.16b,  v9.16    
775         eor             v2.16b, v2.16b, v10.16    
776         eor             v3.16b, v3.16b, v11.16    
777                                                   
778         SM4_CRYPT_BLK4(v0, v1, v2, v3)            
779                                                   
780         eor             v0.16b, v0.16b,  v8.16    
781         eor             v1.16b, v1.16b,  v9.16    
782         eor             v2.16b, v2.16b, v10.16    
783         eor             v3.16b, v3.16b, v11.16    
784         st1             {v0.16b-v3.16b}, [x1],    
785                                                   
786         tweak_next(v8, v11, RTMP3)                
787                                                   
788         cbz             w4, .Lxts_dec_cts         
789                                                   
790 .Lxts_dec_loop_1x:                                
791         sub             w4, w4, #1                
792                                                   
793         ld1             {v0.16b}, [x2], #16       
794         eor             v0.16b, v0.16b, v8.16b    
795                                                   
796         SM4_CRYPT_BLK(v0)                         
797                                                   
798         eor             v0.16b, v0.16b, v8.16b    
799         st1             {v0.16b}, [x1], #16       
800                                                   
801         tweak_next(v8, v8, RTMP0)                 
802                                                   
803         cbnz            w4, .Lxts_dec_loop_1x     
804                                                   
805 .Lxts_dec_cts:                                    
806         cbz             x5, .Lxts_dec_end         
807                                                   
808         /* cipher text stealing */                
809                                                   
810         tweak_next(v9, v8, RTMP0)                 
811         ld1             {v0.16b}, [x2]            
812         eor             v0.16b, v0.16b, v9.16b    
813         SM4_CRYPT_BLK(v0)                         
814         eor             v0.16b, v0.16b, v9.16b    
815                                                   
816         /* load permute table */                  
817         adr_l           x6, .Lcts_permute_tabl    
818         add             x7, x6, #32               
819         add             x6, x6, x5                
820         sub             x7, x7, x5                
821         ld1             {v3.16b}, [x6]            
822         ld1             {v4.16b}, [x7]            
823                                                   
824         /* overlapping loads */                   
825         add             x2, x2, x5                
826         ld1             {v1.16b}, [x2]            
827                                                   
828         /* create Cn from En-1 */                 
829         tbl             v2.16b, {v0.16b}, v3.1    
830         /* padding Pn with En-1 at the end */     
831         tbx             v0.16b, {v1.16b}, v4.1    
832                                                   
833         eor             v0.16b, v0.16b, v8.16b    
834         SM4_CRYPT_BLK(v0)                         
835         eor             v0.16b, v0.16b, v8.16b    
836                                                   
837                                                   
838         /* overlapping stores */                  
839         add             x5, x1, x5                
840         st1             {v2.16b}, [x5]            
841         st1             {v0.16b}, [x1]            
842                                                   
843         b               .Lxts_dec_ret             
844                                                   
845 .Lxts_dec_end:                                    
846         /* store new tweak */                     
847         st1             {v8.16b}, [x3]            
848                                                   
849 .Lxts_dec_ret:                                    
850         ret                                       
851 SYM_FUNC_END(sm4_ce_xts_dec)                      
852                                                   
853 .align 3                                          
854 SYM_FUNC_START(sm4_ce_mac_update)                 
855         /* input:                                 
856          *   x0: round key array, CTX             
857          *   x1: digest                           
858          *   x2: src                              
859          *   w3: nblocks                          
860          *   w4: enc_before                       
861          *   w5: enc_after                        
862          */                                       
863         SM4_PREPARE(x0)                           
864                                                   
865         ld1             {RMAC.16b}, [x1]          
866                                                   
867         cbz             w4, .Lmac_update          
868                                                   
869         SM4_CRYPT_BLK(RMAC)                       
870                                                   
871 .Lmac_update:                                     
872         cbz             w3, .Lmac_ret             
873                                                   
874         sub             w6, w3, #1                
875         cmp             w5, wzr                   
876         csel            w3, w3, w6, ne            
877                                                   
878         cbz             w3, .Lmac_end             
879                                                   
880 .Lmac_loop_4x:                                    
881         cmp             w3, #4                    
882         blt             .Lmac_loop_1x             
883                                                   
884         sub             w3, w3, #4                
885                                                   
886         ld1             {v0.16b-v3.16b}, [x2],    
887                                                   
888         eor             RMAC.16b, RMAC.16b, v0    
889         SM4_CRYPT_BLK(RMAC)                       
890         eor             RMAC.16b, RMAC.16b, v1    
891         SM4_CRYPT_BLK(RMAC)                       
892         eor             RMAC.16b, RMAC.16b, v2    
893         SM4_CRYPT_BLK(RMAC)                       
894         eor             RMAC.16b, RMAC.16b, v3    
895         SM4_CRYPT_BLK(RMAC)                       
896                                                   
897         cbz             w3, .Lmac_end             
898         b               .Lmac_loop_4x             
899                                                   
900 .Lmac_loop_1x:                                    
901         sub             w3, w3, #1                
902                                                   
903         ld1             {v0.16b}, [x2], #16       
904                                                   
905         eor             RMAC.16b, RMAC.16b, v0    
906         SM4_CRYPT_BLK(RMAC)                       
907                                                   
908         cbnz            w3, .Lmac_loop_1x         
909                                                   
910                                                   
911 .Lmac_end:                                        
912         cbnz            w5, .Lmac_ret             
913                                                   
914         ld1             {v0.16b}, [x2], #16       
915         eor             RMAC.16b, RMAC.16b, v0    
916                                                   
917 .Lmac_ret:                                        
918         st1             {RMAC.16b}, [x1]          
919         ret                                       
920 SYM_FUNC_END(sm4_ce_mac_update)                   
921                                                   
922                                                   
923         .section        ".rodata", "a"            
924         .align 4                                  
925 .Lbswap128_mask:                                  
926         .byte           0x0c, 0x0d, 0x0e, 0x0f    
927         .byte           0x04, 0x05, 0x06, 0x07    
928                                                   
929 .Lcts_permute_table:                              
930         .byte           0xff, 0xff, 0xff, 0xff    
931         .byte           0xff, 0xff, 0xff, 0xff    
932         .byte            0x0,  0x1,  0x2,  0x3    
933         .byte            0x8,  0x9,  0xa,  0xb    
934         .byte           0xff, 0xff, 0xff, 0xff    
935         .byte           0xff, 0xff, 0xff, 0xff    
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php