~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/crypto/sm4-ce-gcm-core.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/arm64/crypto/sm4-ce-gcm-core.S (Architecture i386) and /arch/alpha/crypto/sm4-ce-gcm-core.S (Architecture alpha)


  1 /* SPDX-License-Identifier: GPL-2.0-or-later *    
  2 /*                                                
  3  * SM4-GCM AEAD Algorithm using ARMv8 Crypto E    
  4  * as specified in rfc8998                        
  5  * https://datatracker.ietf.org/doc/html/rfc89    
  6  *                                                
  7  * Copyright (C) 2016 Jussi Kivilinna <jussi.ki    
  8  * Copyright (C) 2022 Tianjia Zhang <tianjia.zh    
  9  */                                               
 10                                                   
 11 #include <linux/linkage.h>                        
 12 #include <linux/cfi_types.h>                      
 13 #include <asm/assembler.h>                        
 14 #include "sm4-ce-asm.h"                           
 15                                                   
 16 .arch   armv8-a+crypto                            
 17                                                   
 18 .irp b, 0, 1, 2, 3, 24, 25, 26, 27, 28, 29, 30    
 19         .set .Lv\b\().4s, \b                      
 20 .endr                                             
 21                                                   
 22 .macro sm4e, vd, vn                               
 23         .inst 0xcec08400 | (.L\vn << 5) | .L\v    
 24 .endm                                             
 25                                                   
 26 /* Register macros */                             
 27                                                   
 28 /* Used for both encryption and decryption */     
 29 #define RHASH   v21                               
 30 #define RRCONST v22                               
 31 #define RZERO   v23                               
 32                                                   
 33 /* Helper macros. */                              
 34                                                   
 35 /*                                                
 36  * input: m0, m1                                  
 37  * output: r0:r1 (low 128-bits in r0, high in     
 38  */                                               
 39 #define PMUL_128x128(r0, r1, m0, m1, T0, T1)      
 40                 ext             T0.16b, m1.16b    
 41                 pmull           r0.1q, m0.1d,     
 42                 pmull           T1.1q, m0.1d,     
 43                 pmull2          T0.1q, m0.2d,     
 44                 pmull2          r1.1q, m0.2d,     
 45                 eor             T0.16b, T0.16b    
 46                 ext             T1.16b, RZERO.    
 47                 ext             T0.16b, T0.16b    
 48                 eor             r0.16b, r0.16b    
 49                 eor             r1.16b, r1.16b    
 50                                                   
 51 #define PMUL_128x128_4x(r0, r1, m0, m1, T0, T1    
 52                         r2, r3, m2, m3, T2, T3    
 53                         r4, r5, m4, m5, T4, T5    
 54                         r6, r7, m6, m7, T6, T7    
 55                 ext             T0.16b, m1.16b    
 56                 ext             T2.16b, m3.16b    
 57                 ext             T4.16b, m5.16b    
 58                 ext             T6.16b, m7.16b    
 59                 pmull           r0.1q, m0.1d,     
 60                 pmull           r2.1q, m2.1d,     
 61                 pmull           r4.1q, m4.1d,     
 62                 pmull           r6.1q, m6.1d,     
 63                 pmull           T1.1q, m0.1d,     
 64                 pmull           T3.1q, m2.1d,     
 65                 pmull           T5.1q, m4.1d,     
 66                 pmull           T7.1q, m6.1d,     
 67                 pmull2          T0.1q, m0.2d,     
 68                 pmull2          T2.1q, m2.2d,     
 69                 pmull2          T4.1q, m4.2d,     
 70                 pmull2          T6.1q, m6.2d,     
 71                 pmull2          r1.1q, m0.2d,     
 72                 pmull2          r3.1q, m2.2d,     
 73                 pmull2          r5.1q, m4.2d,     
 74                 pmull2          r7.1q, m6.2d,     
 75                 eor             T0.16b, T0.16b    
 76                 eor             T2.16b, T2.16b    
 77                 eor             T4.16b, T4.16b    
 78                 eor             T6.16b, T6.16b    
 79                 ext             T1.16b, RZERO.    
 80                 ext             T3.16b, RZERO.    
 81                 ext             T5.16b, RZERO.    
 82                 ext             T7.16b, RZERO.    
 83                 ext             T0.16b, T0.16b    
 84                 ext             T2.16b, T2.16b    
 85                 ext             T4.16b, T4.16b    
 86                 ext             T6.16b, T6.16b    
 87                 eor             r0.16b, r0.16b    
 88                 eor             r2.16b, r2.16b    
 89                 eor             r4.16b, r4.16b    
 90                 eor             r6.16b, r6.16b    
 91                 eor             r1.16b, r1.16b    
 92                 eor             r3.16b, r3.16b    
 93                 eor             r5.16b, r5.16b    
 94                 eor             r7.16b, r7.16b    
 95                                                   
 96 /*                                                
 97  * input: r0:r1 (low 128-bits in r0, high in r    
 98  * output: a                                      
 99  */                                               
100 #define REDUCTION(a, r0, r1, rconst, T0, T1)      
101                 pmull2          T0.1q, r1.2d,     
102                 ext             T1.16b, T0.16b    
103                 ext             T0.16b, RZERO.    
104                 eor             r1.16b, r1.16b    
105                 eor             r0.16b, r0.16b    
106                 pmull           T0.1q, r1.1d,     
107                 eor             a.16b, r0.16b,    
108                                                   
109 #define SM4_CRYPT_PMUL_128x128_BLK(b0, r0, r1,    
110         rev32                   b0.16b, b0.16b    
111                 ext             T0.16b, m1.16b    
112         sm4e                    b0.4s, v24.4s;    
113                 pmull           r0.1q, m0.1d,     
114         sm4e                    b0.4s, v25.4s;    
115                 pmull           T1.1q, m0.1d,     
116         sm4e                    b0.4s, v26.4s;    
117                 pmull2          T0.1q, m0.2d,     
118         sm4e                    b0.4s, v27.4s;    
119                 pmull2          r1.1q, m0.2d,     
120         sm4e                    b0.4s, v28.4s;    
121                 eor             T0.16b, T0.16b    
122         sm4e                    b0.4s, v29.4s;    
123                 ext             T1.16b, RZERO.    
124         sm4e                    b0.4s, v30.4s;    
125                 ext             T0.16b, T0.16b    
126         sm4e                    b0.4s, v31.4s;    
127                 eor             r0.16b, r0.16b    
128         rev64                   b0.4s, b0.4s;     
129                 eor             r1.16b, r1.16b    
130         ext                     b0.16b, b0.16b    
131         rev32                   b0.16b, b0.16b    
132                                                   
133 #define SM4_CRYPT_PMUL_128x128_BLK3(b0, b1, b2    
134                                     r0, r1, m0    
135                                     r2, r3, m2    
136                                     r4, r5, m4    
137         rev32                   b0.16b, b0.16b    
138         rev32                   b1.16b, b1.16b    
139         rev32                   b2.16b, b2.16b    
140                 ext             T0.16b, m1.16b    
141                 ext             T2.16b, m3.16b    
142                 ext             T4.16b, m5.16b    
143         sm4e                    b0.4s, v24.4s;    
144         sm4e                    b1.4s, v24.4s;    
145         sm4e                    b2.4s, v24.4s;    
146                 pmull           r0.1q, m0.1d,     
147                 pmull           r2.1q, m2.1d,     
148                 pmull           r4.1q, m4.1d,     
149         sm4e                    b0.4s, v25.4s;    
150         sm4e                    b1.4s, v25.4s;    
151         sm4e                    b2.4s, v25.4s;    
152                 pmull           T1.1q, m0.1d,     
153                 pmull           T3.1q, m2.1d,     
154                 pmull           T5.1q, m4.1d,     
155         sm4e                    b0.4s, v26.4s;    
156         sm4e                    b1.4s, v26.4s;    
157         sm4e                    b2.4s, v26.4s;    
158                 pmull2          T0.1q, m0.2d,     
159                 pmull2          T2.1q, m2.2d,     
160                 pmull2          T4.1q, m4.2d,     
161         sm4e                    b0.4s, v27.4s;    
162         sm4e                    b1.4s, v27.4s;    
163         sm4e                    b2.4s, v27.4s;    
164                 pmull2          r1.1q, m0.2d,     
165                 pmull2          r3.1q, m2.2d,     
166                 pmull2          r5.1q, m4.2d,     
167         sm4e                    b0.4s, v28.4s;    
168         sm4e                    b1.4s, v28.4s;    
169         sm4e                    b2.4s, v28.4s;    
170                 eor             T0.16b, T0.16b    
171                 eor             T2.16b, T2.16b    
172                 eor             T4.16b, T4.16b    
173         sm4e                    b0.4s, v29.4s;    
174         sm4e                    b1.4s, v29.4s;    
175         sm4e                    b2.4s, v29.4s;    
176                 ext             T1.16b, RZERO.    
177                 ext             T3.16b, RZERO.    
178                 ext             T5.16b, RZERO.    
179         sm4e                    b0.4s, v30.4s;    
180         sm4e                    b1.4s, v30.4s;    
181         sm4e                    b2.4s, v30.4s;    
182                 ext             T0.16b, T0.16b    
183                 ext             T2.16b, T2.16b    
184                 ext             T4.16b, T4.16b    
185         sm4e                    b0.4s, v31.4s;    
186         sm4e                    b1.4s, v31.4s;    
187         sm4e                    b2.4s, v31.4s;    
188                 eor             r0.16b, r0.16b    
189                 eor             r2.16b, r2.16b    
190                 eor             r4.16b, r4.16b    
191         rev64                   b0.4s, b0.4s;     
192         rev64                   b1.4s, b1.4s;     
193         rev64                   b2.4s, b2.4s;     
194                 eor             r1.16b, r1.16b    
195                 eor             r3.16b, r3.16b    
196                 eor             r5.16b, r5.16b    
197         ext                     b0.16b, b0.16b    
198         ext                     b1.16b, b1.16b    
199         ext                     b2.16b, b2.16b    
200                 eor             r0.16b, r0.16b    
201                 eor             r1.16b, r1.16b    
202         rev32                   b0.16b, b0.16b    
203         rev32                   b1.16b, b1.16b    
204         rev32                   b2.16b, b2.16b    
205                 eor             r0.16b, r0.16b    
206                 eor             r1.16b, r1.16b    
207                                                   
208 #define inc32_le128(vctr)                         
209                 mov             vctr.d[1], x9;    
210                 add             w6, w9, #1;       
211                 mov             vctr.d[0], x8;    
212                 bfi             x9, x6, #0, #3    
213                 rev64           vctr.16b, vctr    
214                                                   
215 #define GTAG_HASH_LENGTHS(vctr0, vlen)            
216                 ld1             {vlen.16b}, [x    
217                 /* construct CTR0 */              
218                 /* the lower 32-bits of initia    
219                 mov             x6, #0x1;         
220                 bfi             x9, x6, #0, #3    
221                 mov             vctr0.d[0], x8    
222                 mov             vctr0.d[1], x9    
223                 rbit            vlen.16b, vlen    
224                 rev64           vctr0.16b, vct    
225                 /* authtag = GCTR(CTR0, GHASH)    
226                 eor             RHASH.16b, RHA    
227                 SM4_CRYPT_PMUL_128x128_BLK(vct    
228                                            RTM    
229                 REDUCTION(RHASH, RR0, RR1, RRC    
230                 rbit            RHASH.16b, RHA    
231                 eor             RHASH.16b, RHA    
232                                                   
233                                                   
234 /* Register macros for encrypt and ghash */       
235                                                   
236 /* can be the same as input v0-v3 */              
237 #define RR1     v0                                
238 #define RR3     v1                                
239 #define RR5     v2                                
240 #define RR7     v3                                
241                                                   
242 #define RR0     v4                                
243 #define RR2     v5                                
244 #define RR4     v6                                
245 #define RR6     v7                                
246                                                   
247 #define RTMP0   v8                                
248 #define RTMP1   v9                                
249 #define RTMP2   v10                               
250 #define RTMP3   v11                               
251 #define RTMP4   v12                               
252 #define RTMP5   v13                               
253 #define RTMP6   v14                               
254 #define RTMP7   v15                               
255                                                   
256 #define RH1     v16                               
257 #define RH2     v17                               
258 #define RH3     v18                               
259 #define RH4     v19                               
260                                                   
261 .align 3                                          
262 SYM_FUNC_START(sm4_ce_pmull_ghash_setup)          
263         /* input:                                 
264          *   x0: round key array, CTX             
265          *   x1: ghash table                      
266          */                                       
267         SM4_PREPARE(x0)                           
268                                                   
269         adr_l           x2, .Lghash_rconst        
270         ld1r            {RRCONST.2d}, [x2]        
271                                                   
272         eor             RZERO.16b, RZERO.16b,     
273                                                   
274         /* H = E(K, 0^128) */                     
275         rev32           v0.16b, RZERO.16b         
276         SM4_CRYPT_BLK_BE(v0)                      
277                                                   
278         /* H ^ 1 */                               
279         rbit            RH1.16b, v0.16b           
280                                                   
281         /* H ^ 2 */                               
282         PMUL_128x128(RR0, RR1, RH1, RH1, RTMP0    
283         REDUCTION(RH2, RR0, RR1, RRCONST, RTMP    
284                                                   
285         /* H ^ 3 */                               
286         PMUL_128x128(RR0, RR1, RH2, RH1, RTMP0    
287         REDUCTION(RH3, RR0, RR1, RRCONST, RTMP    
288                                                   
289         /* H ^ 4 */                               
290         PMUL_128x128(RR0, RR1, RH2, RH2, RTMP0    
291         REDUCTION(RH4, RR0, RR1, RRCONST, RTMP    
292                                                   
293         st1             {RH1.16b-RH4.16b}, [x1    
294                                                   
295         ret                                       
296 SYM_FUNC_END(sm4_ce_pmull_ghash_setup)            
297                                                   
298 .align 3                                          
299 SYM_FUNC_START(pmull_ghash_update)                
300         /* input:                                 
301          *   x0: ghash table                      
302          *   x1: ghash result                     
303          *   x2: src                              
304          *   w3: nblocks                          
305          */                                       
306         ld1             {RH1.16b-RH4.16b}, [x0    
307                                                   
308         ld1             {RHASH.16b}, [x1]         
309         rbit            RHASH.16b, RHASH.16b      
310                                                   
311         adr_l           x4, .Lghash_rconst        
312         ld1r            {RRCONST.2d}, [x4]        
313                                                   
314         eor             RZERO.16b, RZERO.16b,     
315                                                   
316 .Lghash_loop_4x:                                  
317         cmp             w3, #4                    
318         blt             .Lghash_loop_1x           
319                                                   
320         sub             w3, w3, #4                
321                                                   
322         ld1             {v0.16b-v3.16b}, [x2],    
323                                                   
324         rbit            v0.16b, v0.16b            
325         rbit            v1.16b, v1.16b            
326         rbit            v2.16b, v2.16b            
327         rbit            v3.16b, v3.16b            
328                                                   
329         /*                                        
330          * (in0 ^ HASH) * H^4 => rr0:rr1          
331          * (in1)        * H^3 => rr2:rr3          
332          * (in2)        * H^2 => rr4:rr5          
333          * (in3)        * H^1 => rr6:rr7          
334          */                                       
335         eor             RHASH.16b, RHASH.16b,     
336                                                   
337         PMUL_128x128_4x(RR0, RR1, RHASH, RH4,     
338                         RR2, RR3, v1, RH3, RTM    
339                         RR4, RR5, v2, RH2, RTM    
340                         RR6, RR7, v3, RH1, RTM    
341                                                   
342         eor             RR0.16b, RR0.16b, RR2.    
343         eor             RR1.16b, RR1.16b, RR3.    
344         eor             RR0.16b, RR0.16b, RR4.    
345         eor             RR1.16b, RR1.16b, RR5.    
346         eor             RR0.16b, RR0.16b, RR6.    
347         eor             RR1.16b, RR1.16b, RR7.    
348                                                   
349         REDUCTION(RHASH, RR0, RR1, RRCONST, RT    
350                                                   
351         cbz             w3, .Lghash_end           
352         b               .Lghash_loop_4x           
353                                                   
354 .Lghash_loop_1x:                                  
355         sub             w3, w3, #1                
356                                                   
357         ld1             {v0.16b}, [x2], #16       
358         rbit            v0.16b, v0.16b            
359         eor             RHASH.16b, RHASH.16b,     
360                                                   
361         PMUL_128x128(RR0, RR1, RHASH, RH1, RTM    
362         REDUCTION(RHASH, RR0, RR1, RRCONST, RT    
363                                                   
364         cbnz            w3, .Lghash_loop_1x       
365                                                   
366 .Lghash_end:                                      
367         rbit            RHASH.16b, RHASH.16b      
368         st1             {RHASH.2d}, [x1]          
369                                                   
370         ret                                       
371 SYM_FUNC_END(pmull_ghash_update)                  
372                                                   
373 .align 3                                          
374 SYM_TYPED_FUNC_START(sm4_ce_pmull_gcm_enc)        
375         /* input:                                 
376          *   x0: round key array, CTX             
377          *   x1: dst                              
378          *   x2: src                              
379          *   x3: ctr (big endian, 128 bit)        
380          *   w4: nbytes                           
381          *   x5: ghash result                     
382          *   x6: ghash table                      
383          *   x7: lengths (only for last block)    
384          */                                       
385         SM4_PREPARE(x0)                           
386                                                   
387         ldp             x8, x9, [x3]              
388         rev             x8, x8                    
389         rev             x9, x9                    
390                                                   
391         ld1             {RH1.16b-RH4.16b}, [x6    
392                                                   
393         ld1             {RHASH.16b}, [x5]         
394         rbit            RHASH.16b, RHASH.16b      
395                                                   
396         adr_l           x6, .Lghash_rconst        
397         ld1r            {RRCONST.2d}, [x6]        
398                                                   
399         eor             RZERO.16b, RZERO.16b,     
400                                                   
401         cbz             w4, .Lgcm_enc_hash_len    
402                                                   
403 .Lgcm_enc_loop_4x:                                
404         cmp             w4, #(4 * 16)             
405         blt             .Lgcm_enc_loop_1x         
406                                                   
407         sub             w4, w4, #(4 * 16)         
408                                                   
409         /* construct CTRs */                      
410         inc32_le128(v0)                 /* +0     
411         inc32_le128(v1)                 /* +1     
412         inc32_le128(v2)                 /* +2     
413         inc32_le128(v3)                 /* +3     
414                                                   
415         ld1             {RTMP0.16b-RTMP3.16b},    
416                                                   
417         SM4_CRYPT_BLK4(v0, v1, v2, v3)            
418                                                   
419         eor             v0.16b, v0.16b, RTMP0.    
420         eor             v1.16b, v1.16b, RTMP1.    
421         eor             v2.16b, v2.16b, RTMP2.    
422         eor             v3.16b, v3.16b, RTMP3.    
423         st1             {v0.16b-v3.16b}, [x1],    
424                                                   
425         /* ghash update */                        
426                                                   
427         rbit            v0.16b, v0.16b            
428         rbit            v1.16b, v1.16b            
429         rbit            v2.16b, v2.16b            
430         rbit            v3.16b, v3.16b            
431                                                   
432         /*                                        
433          * (in0 ^ HASH) * H^4 => rr0:rr1          
434          * (in1)        * H^3 => rr2:rr3          
435          * (in2)        * H^2 => rr4:rr5          
436          * (in3)        * H^1 => rr6:rr7          
437          */                                       
438         eor             RHASH.16b, RHASH.16b,     
439                                                   
440         PMUL_128x128_4x(RR0, RR1, RHASH, RH4,     
441                         RR2, RR3, v1, RH3, RTM    
442                         RR4, RR5, v2, RH2, RTM    
443                         RR6, RR7, v3, RH1, RTM    
444                                                   
445         eor             RR0.16b, RR0.16b, RR2.    
446         eor             RR1.16b, RR1.16b, RR3.    
447         eor             RR0.16b, RR0.16b, RR4.    
448         eor             RR1.16b, RR1.16b, RR5.    
449         eor             RR0.16b, RR0.16b, RR6.    
450         eor             RR1.16b, RR1.16b, RR7.    
451                                                   
452         REDUCTION(RHASH, RR0, RR1, RRCONST, RT    
453                                                   
454         cbz             w4, .Lgcm_enc_hash_len    
455         b               .Lgcm_enc_loop_4x         
456                                                   
457 .Lgcm_enc_loop_1x:                                
458         cmp             w4, #16                   
459         blt             .Lgcm_enc_tail            
460                                                   
461         sub             w4, w4, #16               
462                                                   
463         /* construct CTRs */                      
464         inc32_le128(v0)                           
465                                                   
466         ld1             {RTMP0.16b}, [x2], #16    
467                                                   
468         SM4_CRYPT_BLK(v0)                         
469                                                   
470         eor             v0.16b, v0.16b, RTMP0.    
471         st1             {v0.16b}, [x1], #16       
472                                                   
473         /* ghash update */                        
474         rbit            v0.16b, v0.16b            
475         eor             RHASH.16b, RHASH.16b,     
476         PMUL_128x128(RR0, RR1, RHASH, RH1, RTM    
477         REDUCTION(RHASH, RR0, RR1, RRCONST, RT    
478                                                   
479         cbz             w4, .Lgcm_enc_hash_len    
480         b               .Lgcm_enc_loop_1x         
481                                                   
482 .Lgcm_enc_tail:                                   
483         /* construct CTRs */                      
484         inc32_le128(v0)                           
485         SM4_CRYPT_BLK(v0)                         
486                                                   
487         /* load permute table */                  
488         adr_l           x0, .Lcts_permute_tabl    
489         add             x0, x0, #32               
490         sub             x0, x0, w4, uxtw          
491         ld1             {v3.16b}, [x0]            
492                                                   
493 .Lgcm_enc_tail_loop:                              
494         /* do encrypt */                          
495         ldrb            w0, [x2], #1    /* get    
496         umov            w6, v0.b[0]     /* get    
497         eor             w6, w6, w0      /* w6     
498         strb            w6, [x1], #1    /* sto    
499                                                   
500         /* shift right out one byte */            
501         ext             v0.16b, v0.16b, v0.16b    
502         /* the last ciphertext is placed in hi    
503         ins             v0.b[15], w6              
504                                                   
505         subs            w4, w4, #1                
506         bne             .Lgcm_enc_tail_loop       
507                                                   
508         /* padding last block with zeros */       
509         tbl             v0.16b, {v0.16b}, v3.1    
510                                                   
511         /* ghash update */                        
512         rbit            v0.16b, v0.16b            
513         eor             RHASH.16b, RHASH.16b,     
514         PMUL_128x128(RR0, RR1, RHASH, RH1, RTM    
515         REDUCTION(RHASH, RR0, RR1, RRCONST, RT    
516                                                   
517 .Lgcm_enc_hash_len:                               
518         cbz             x7, .Lgcm_enc_end         
519                                                   
520         GTAG_HASH_LENGTHS(v1, v3)                 
521                                                   
522         b               .Lgcm_enc_ret             
523                                                   
524 .Lgcm_enc_end:                                    
525         /* store new CTR */                       
526         rev             x8, x8                    
527         rev             x9, x9                    
528         stp             x8, x9, [x3]              
529                                                   
530         rbit            RHASH.16b, RHASH.16b      
531                                                   
532 .Lgcm_enc_ret:                                    
533         /* store new MAC */                       
534         st1             {RHASH.2d}, [x5]          
535                                                   
536         ret                                       
537 SYM_FUNC_END(sm4_ce_pmull_gcm_enc)                
538                                                   
539 #undef  RR1                                       
540 #undef  RR3                                       
541 #undef  RR5                                       
542 #undef  RR7                                       
543 #undef  RR0                                       
544 #undef  RR2                                       
545 #undef  RR4                                       
546 #undef  RR6                                       
547 #undef RTMP0                                      
548 #undef RTMP1                                      
549 #undef RTMP2                                      
550 #undef RTMP3                                      
551 #undef RTMP4                                      
552 #undef RTMP5                                      
553 #undef RTMP6                                      
554 #undef RTMP7                                      
555 #undef  RH1                                       
556 #undef  RH2                                       
557 #undef  RH3                                       
558 #undef  RH4                                       
559                                                   
560                                                   
561 /* Register macros for decrypt */                 
562                                                   
563 /* v0-v2 for building CTRs, v3-v5 for saving i    
564                                                   
565 #define RR1     v6                                
566 #define RR3     v7                                
567 #define RR5     v8                                
568                                                   
569 #define RR0     v9                                
570 #define RR2     v10                               
571 #define RR4     v11                               
572                                                   
573 #define RTMP0   v12                               
574 #define RTMP1   v13                               
575 #define RTMP2   v14                               
576 #define RTMP3   v15                               
577 #define RTMP4   v16                               
578 #define RTMP5   v17                               
579                                                   
580 #define RH1     v18                               
581 #define RH2     v19                               
582 #define RH3     v20                               
583                                                   
584 .align 3                                          
585 SYM_TYPED_FUNC_START(sm4_ce_pmull_gcm_dec)        
586         /* input:                                 
587          *   x0: round key array, CTX             
588          *   x1: dst                              
589          *   x2: src                              
590          *   x3: ctr (big endian, 128 bit)        
591          *   w4: nbytes                           
592          *   x5: ghash result                     
593          *   x6: ghash table                      
594          *   x7: lengths (only for last block)    
595          */                                       
596         SM4_PREPARE(x0)                           
597                                                   
598         ldp             x8, x9, [x3]              
599         rev             x8, x8                    
600         rev             x9, x9                    
601                                                   
602         ld1             {RH1.16b-RH3.16b}, [x6    
603                                                   
604         ld1             {RHASH.16b}, [x5]         
605         rbit            RHASH.16b, RHASH.16b      
606                                                   
607         adr_l           x6, .Lghash_rconst        
608         ld1r            {RRCONST.2d}, [x6]        
609                                                   
610         eor             RZERO.16b, RZERO.16b,     
611                                                   
612         cbz             w4, .Lgcm_dec_hash_len    
613                                                   
614 .Lgcm_dec_loop_3x:                                
615         cmp             w4, #(3 * 16)             
616         blt             .Lgcm_dec_loop_1x         
617                                                   
618         sub             w4, w4, #(3 * 16)         
619                                                   
620         ld1             {v3.16b-v5.16b}, [x2],    
621                                                   
622         /* construct CTRs */                      
623         inc32_le128(v0)                 /* +0     
624         rbit            v6.16b, v3.16b            
625         inc32_le128(v1)                 /* +1     
626         rbit            v7.16b, v4.16b            
627         inc32_le128(v2)                 /* +2     
628         rbit            v8.16b, v5.16b            
629                                                   
630         eor             RHASH.16b, RHASH.16b,     
631                                                   
632         /* decrypt & ghash update */              
633         SM4_CRYPT_PMUL_128x128_BLK3(v0, v1, v2    
634                                     RR0, RR1,     
635                                     RR2, RR3,     
636                                     RR4, RR5,     
637                                                   
638         eor             v0.16b, v0.16b, v3.16b    
639         eor             v1.16b, v1.16b, v4.16b    
640         eor             v2.16b, v2.16b, v5.16b    
641                                                   
642         REDUCTION(RHASH, RR0, RR1, RRCONST, RT    
643                                                   
644         st1             {v0.16b-v2.16b}, [x1],    
645                                                   
646         cbz             w4, .Lgcm_dec_hash_len    
647         b               .Lgcm_dec_loop_3x         
648                                                   
649 .Lgcm_dec_loop_1x:                                
650         cmp             w4, #16                   
651         blt             .Lgcm_dec_tail            
652                                                   
653         sub             w4, w4, #16               
654                                                   
655         ld1             {v3.16b}, [x2], #16       
656                                                   
657         /* construct CTRs */                      
658         inc32_le128(v0)                           
659         rbit            v6.16b, v3.16b            
660                                                   
661         eor             RHASH.16b, RHASH.16b,     
662                                                   
663         SM4_CRYPT_PMUL_128x128_BLK(v0, RR0, RR    
664                                                   
665         eor             v0.16b, v0.16b, v3.16b    
666                                                   
667         REDUCTION(RHASH, RR0, RR1, RRCONST, RT    
668                                                   
669         st1             {v0.16b}, [x1], #16       
670                                                   
671         cbz             w4, .Lgcm_dec_hash_len    
672         b               .Lgcm_dec_loop_1x         
673                                                   
674 .Lgcm_dec_tail:                                   
675         /* construct CTRs */                      
676         inc32_le128(v0)                           
677         SM4_CRYPT_BLK(v0)                         
678                                                   
679         /* load permute table */                  
680         adr_l           x0, .Lcts_permute_tabl    
681         add             x0, x0, #32               
682         sub             x0, x0, w4, uxtw          
683         ld1             {v3.16b}, [x0]            
684                                                   
685 .Lgcm_dec_tail_loop:                              
686         /* do decrypt */                          
687         ldrb            w0, [x2], #1    /* get    
688         umov            w6, v0.b[0]     /* get    
689         eor             w6, w6, w0      /* w6     
690         strb            w6, [x1], #1    /* sto    
691                                                   
692         /* shift right out one byte */            
693         ext             v0.16b, v0.16b, v0.16b    
694         /* the last ciphertext is placed in hi    
695         ins             v0.b[15], w0              
696                                                   
697         subs            w4, w4, #1                
698         bne             .Lgcm_dec_tail_loop       
699                                                   
700         /* padding last block with zeros */       
701         tbl             v0.16b, {v0.16b}, v3.1    
702                                                   
703         /* ghash update */                        
704         rbit            v0.16b, v0.16b            
705         eor             RHASH.16b, RHASH.16b,     
706         PMUL_128x128(RR0, RR1, RHASH, RH1, RTM    
707         REDUCTION(RHASH, RR0, RR1, RRCONST, RT    
708                                                   
709 .Lgcm_dec_hash_len:                               
710         cbz             x7, .Lgcm_dec_end         
711                                                   
712         GTAG_HASH_LENGTHS(v1, v3)                 
713                                                   
714         b               .Lgcm_dec_ret             
715                                                   
716 .Lgcm_dec_end:                                    
717         /* store new CTR */                       
718         rev             x8, x8                    
719         rev             x9, x9                    
720         stp             x8, x9, [x3]              
721                                                   
722         rbit            RHASH.16b, RHASH.16b      
723                                                   
724 .Lgcm_dec_ret:                                    
725         /* store new MAC */                       
726         st1             {RHASH.2d}, [x5]          
727                                                   
728         ret                                       
729 SYM_FUNC_END(sm4_ce_pmull_gcm_dec)                
730                                                   
731         .section        ".rodata", "a"            
732         .align 4                                  
733 .Lcts_permute_table:                              
734         .byte           0xff, 0xff, 0xff, 0xff    
735         .byte           0xff, 0xff, 0xff, 0xff    
736         .byte            0x0,  0x1,  0x2,  0x3    
737         .byte            0x8,  0x9,  0xa,  0xb    
738         .byte           0xff, 0xff, 0xff, 0xff    
739         .byte           0xff, 0xff, 0xff, 0xff    
740                                                   
741 .Lghash_rconst:                                   
742         .quad           0x87                      
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php