~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/crypto/aes-modes.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/arm64/crypto/aes-modes.S (Version linux-6.12-rc7) and /arch/i386/crypto/aes-modes.S (Version linux-4.12.14)


  1 /* SPDX-License-Identifier: GPL-2.0-only */       
  2 /*                                                
  3  * linux/arch/arm64/crypto/aes-modes.S - chain    
  4  *                                                
  5  * Copyright (C) 2013 - 2017 Linaro Ltd <ard.bi    
  6  */                                               
  7                                                   
  8 /* included by aes-ce.S and aes-neon.S */         
  9                                                   
 10         .text                                     
 11         .align          4                         
 12                                                   
 13 #ifndef MAX_STRIDE                                
 14 #define MAX_STRIDE      4                         
 15 #endif                                            
 16                                                   
 17 #if MAX_STRIDE == 4                               
 18 #define ST4(x...) x                               
 19 #define ST5(x...)                                 
 20 #else                                             
 21 #define ST4(x...)                                 
 22 #define ST5(x...) x                               
 23 #endif                                            
 24                                                   
 25 SYM_FUNC_START_LOCAL(aes_encrypt_block4x)         
 26         encrypt_block4x v0, v1, v2, v3, w3, x2    
 27         ret                                       
 28 SYM_FUNC_END(aes_encrypt_block4x)                 
 29                                                   
 30 SYM_FUNC_START_LOCAL(aes_decrypt_block4x)         
 31         decrypt_block4x v0, v1, v2, v3, w3, x2    
 32         ret                                       
 33 SYM_FUNC_END(aes_decrypt_block4x)                 
 34                                                   
 35 #if MAX_STRIDE == 5                               
 36 SYM_FUNC_START_LOCAL(aes_encrypt_block5x)         
 37         encrypt_block5x v0, v1, v2, v3, v4, w3    
 38         ret                                       
 39 SYM_FUNC_END(aes_encrypt_block5x)                 
 40                                                   
 41 SYM_FUNC_START_LOCAL(aes_decrypt_block5x)         
 42         decrypt_block5x v0, v1, v2, v3, v4, w3    
 43         ret                                       
 44 SYM_FUNC_END(aes_decrypt_block5x)                 
 45 #endif                                            
 46                                                   
 47         /*                                        
 48          * aes_ecb_encrypt(u8 out[], u8 const     
 49          *                 int blocks)            
 50          * aes_ecb_decrypt(u8 out[], u8 const     
 51          *                 int blocks)            
 52          */                                       
 53                                                   
 54 AES_FUNC_START(aes_ecb_encrypt)                   
 55         frame_push      0                         
 56                                                   
 57         enc_prepare     w3, x2, x5                
 58                                                   
 59 .LecbencloopNx:                                   
 60         subs            w4, w4, #MAX_STRIDE       
 61         bmi             .Lecbenc1x                
 62         ld1             {v0.16b-v3.16b}, [x1],    
 63 ST4(    bl              aes_encrypt_block4x       
 64 ST5(    ld1             {v4.16b}, [x1], #16       
 65 ST5(    bl              aes_encrypt_block5x       
 66         st1             {v0.16b-v3.16b}, [x0],    
 67 ST5(    st1             {v4.16b}, [x0], #16       
 68         b               .LecbencloopNx            
 69 .Lecbenc1x:                                       
 70         adds            w4, w4, #MAX_STRIDE       
 71         beq             .Lecbencout               
 72 .Lecbencloop:                                     
 73         ld1             {v0.16b}, [x1], #16       
 74         encrypt_block   v0, w3, x2, x5, w6        
 75         st1             {v0.16b}, [x0], #16       
 76         subs            w4, w4, #1                
 77         bne             .Lecbencloop              
 78 .Lecbencout:                                      
 79         frame_pop                                 
 80         ret                                       
 81 AES_FUNC_END(aes_ecb_encrypt)                     
 82                                                   
 83                                                   
 84 AES_FUNC_START(aes_ecb_decrypt)                   
 85         frame_push      0                         
 86                                                   
 87         dec_prepare     w3, x2, x5                
 88                                                   
 89 .LecbdecloopNx:                                   
 90         subs            w4, w4, #MAX_STRIDE       
 91         bmi             .Lecbdec1x                
 92         ld1             {v0.16b-v3.16b}, [x1],    
 93 ST4(    bl              aes_decrypt_block4x       
 94 ST5(    ld1             {v4.16b}, [x1], #16       
 95 ST5(    bl              aes_decrypt_block5x       
 96         st1             {v0.16b-v3.16b}, [x0],    
 97 ST5(    st1             {v4.16b}, [x0], #16       
 98         b               .LecbdecloopNx            
 99 .Lecbdec1x:                                       
100         adds            w4, w4, #MAX_STRIDE       
101         beq             .Lecbdecout               
102 .Lecbdecloop:                                     
103         ld1             {v0.16b}, [x1], #16       
104         decrypt_block   v0, w3, x2, x5, w6        
105         st1             {v0.16b}, [x0], #16       
106         subs            w4, w4, #1                
107         bne             .Lecbdecloop              
108 .Lecbdecout:                                      
109         frame_pop                                 
110         ret                                       
111 AES_FUNC_END(aes_ecb_decrypt)                     
112                                                   
113                                                   
114         /*                                        
115          * aes_cbc_encrypt(u8 out[], u8 const     
116          *                 int blocks, u8 iv[]    
117          * aes_cbc_decrypt(u8 out[], u8 const     
118          *                 int blocks, u8 iv[]    
119          * aes_essiv_cbc_encrypt(u8 out[], u8     
120          *                       int rounds, i    
121          *                       u32 const rk2    
122          * aes_essiv_cbc_decrypt(u8 out[], u8     
123          *                       int rounds, i    
124          *                       u32 const rk2    
125          */                                       
126                                                   
127 AES_FUNC_START(aes_essiv_cbc_encrypt)             
128         ld1             {v4.16b}, [x5]            
129                                                   
130         mov             w8, #14                   
131         enc_prepare     w8, x6, x7                
132         encrypt_block   v4, w8, x6, x7, w9        
133         enc_switch_key  w3, x2, x6                
134         b               .Lcbcencloop4x            
135                                                   
136 AES_FUNC_START(aes_cbc_encrypt)                   
137         ld1             {v4.16b}, [x5]            
138         enc_prepare     w3, x2, x6                
139                                                   
140 .Lcbcencloop4x:                                   
141         subs            w4, w4, #4                
142         bmi             .Lcbcenc1x                
143         ld1             {v0.16b-v3.16b}, [x1],    
144         eor             v0.16b, v0.16b, v4.16b    
145         encrypt_block   v0, w3, x2, x6, w7        
146         eor             v1.16b, v1.16b, v0.16b    
147         encrypt_block   v1, w3, x2, x6, w7        
148         eor             v2.16b, v2.16b, v1.16b    
149         encrypt_block   v2, w3, x2, x6, w7        
150         eor             v3.16b, v3.16b, v2.16b    
151         encrypt_block   v3, w3, x2, x6, w7        
152         st1             {v0.16b-v3.16b}, [x0],    
153         mov             v4.16b, v3.16b            
154         b               .Lcbcencloop4x            
155 .Lcbcenc1x:                                       
156         adds            w4, w4, #4                
157         beq             .Lcbcencout               
158 .Lcbcencloop:                                     
159         ld1             {v0.16b}, [x1], #16       
160         eor             v4.16b, v4.16b, v0.16b    
161         encrypt_block   v4, w3, x2, x6, w7        
162         st1             {v4.16b}, [x0], #16       
163         subs            w4, w4, #1                
164         bne             .Lcbcencloop              
165 .Lcbcencout:                                      
166         st1             {v4.16b}, [x5]            
167         ret                                       
168 AES_FUNC_END(aes_cbc_encrypt)                     
169 AES_FUNC_END(aes_essiv_cbc_encrypt)               
170                                                   
171 AES_FUNC_START(aes_essiv_cbc_decrypt)             
172         ld1             {cbciv.16b}, [x5]         
173                                                   
174         mov             w8, #14                   
175         enc_prepare     w8, x6, x7                
176         encrypt_block   cbciv, w8, x6, x7, w9     
177         b               .Lessivcbcdecstart        
178                                                   
179 AES_FUNC_START(aes_cbc_decrypt)                   
180         ld1             {cbciv.16b}, [x5]         
181 .Lessivcbcdecstart:                               
182         frame_push      0                         
183         dec_prepare     w3, x2, x6                
184                                                   
185 .LcbcdecloopNx:                                   
186         subs            w4, w4, #MAX_STRIDE       
187         bmi             .Lcbcdec1x                
188         ld1             {v0.16b-v3.16b}, [x1],    
189 #if MAX_STRIDE == 5                               
190         ld1             {v4.16b}, [x1], #16       
191         mov             v5.16b, v0.16b            
192         mov             v6.16b, v1.16b            
193         mov             v7.16b, v2.16b            
194         bl              aes_decrypt_block5x       
195         sub             x1, x1, #32               
196         eor             v0.16b, v0.16b, cbciv.    
197         eor             v1.16b, v1.16b, v5.16b    
198         ld1             {v5.16b}, [x1], #16       
199         ld1             {cbciv.16b}, [x1], #16    
200         eor             v2.16b, v2.16b, v6.16b    
201         eor             v3.16b, v3.16b, v7.16b    
202         eor             v4.16b, v4.16b, v5.16b    
203 #else                                             
204         mov             v4.16b, v0.16b            
205         mov             v5.16b, v1.16b            
206         mov             v6.16b, v2.16b            
207         bl              aes_decrypt_block4x       
208         sub             x1, x1, #16               
209         eor             v0.16b, v0.16b, cbciv.    
210         eor             v1.16b, v1.16b, v4.16b    
211         ld1             {cbciv.16b}, [x1], #16    
212         eor             v2.16b, v2.16b, v5.16b    
213         eor             v3.16b, v3.16b, v6.16b    
214 #endif                                            
215         st1             {v0.16b-v3.16b}, [x0],    
216 ST5(    st1             {v4.16b}, [x0], #16       
217         b               .LcbcdecloopNx            
218 .Lcbcdec1x:                                       
219         adds            w4, w4, #MAX_STRIDE       
220         beq             .Lcbcdecout               
221 .Lcbcdecloop:                                     
222         ld1             {v1.16b}, [x1], #16       
223         mov             v0.16b, v1.16b            
224         decrypt_block   v0, w3, x2, x6, w7        
225         eor             v0.16b, v0.16b, cbciv.    
226         mov             cbciv.16b, v1.16b         
227         st1             {v0.16b}, [x0], #16       
228         subs            w4, w4, #1                
229         bne             .Lcbcdecloop              
230 .Lcbcdecout:                                      
231         st1             {cbciv.16b}, [x5]         
232         frame_pop                                 
233         ret                                       
234 AES_FUNC_END(aes_cbc_decrypt)                     
235 AES_FUNC_END(aes_essiv_cbc_decrypt)               
236                                                   
237                                                   
238         /*                                        
239          * aes_cbc_cts_encrypt(u8 out[], u8 co    
240          *                     int rounds, int    
241          * aes_cbc_cts_decrypt(u8 out[], u8 co    
242          *                     int rounds, int    
243          */                                       
244                                                   
245 AES_FUNC_START(aes_cbc_cts_encrypt)               
246         adr_l           x8, .Lcts_permute_tabl    
247         sub             x4, x4, #16               
248         add             x9, x8, #32               
249         add             x8, x8, x4                
250         sub             x9, x9, x4                
251         ld1             {v3.16b}, [x8]            
252         ld1             {v4.16b}, [x9]            
253                                                   
254         ld1             {v0.16b}, [x1], x4        
255         ld1             {v1.16b}, [x1]            
256                                                   
257         ld1             {v5.16b}, [x5]            
258         enc_prepare     w3, x2, x6                
259                                                   
260         eor             v0.16b, v0.16b, v5.16b    
261         tbl             v1.16b, {v1.16b}, v4.1    
262         encrypt_block   v0, w3, x2, x6, w7        
263                                                   
264         eor             v1.16b, v1.16b, v0.16b    
265         tbl             v0.16b, {v0.16b}, v3.1    
266         encrypt_block   v1, w3, x2, x6, w7        
267                                                   
268         add             x4, x0, x4                
269         st1             {v0.16b}, [x4]            
270         st1             {v1.16b}, [x0]            
271         ret                                       
272 AES_FUNC_END(aes_cbc_cts_encrypt)                 
273                                                   
274 AES_FUNC_START(aes_cbc_cts_decrypt)               
275         adr_l           x8, .Lcts_permute_tabl    
276         sub             x4, x4, #16               
277         add             x9, x8, #32               
278         add             x8, x8, x4                
279         sub             x9, x9, x4                
280         ld1             {v3.16b}, [x8]            
281         ld1             {v4.16b}, [x9]            
282                                                   
283         ld1             {v0.16b}, [x1], x4        
284         ld1             {v1.16b}, [x1]            
285                                                   
286         ld1             {v5.16b}, [x5]            
287         dec_prepare     w3, x2, x6                
288                                                   
289         decrypt_block   v0, w3, x2, x6, w7        
290         tbl             v2.16b, {v0.16b}, v3.1    
291         eor             v2.16b, v2.16b, v1.16b    
292                                                   
293         tbx             v0.16b, {v1.16b}, v4.1    
294         decrypt_block   v0, w3, x2, x6, w7        
295         eor             v0.16b, v0.16b, v5.16b    
296                                                   
297         add             x4, x0, x4                
298         st1             {v2.16b}, [x4]            
299         st1             {v0.16b}, [x0]            
300         ret                                       
301 AES_FUNC_END(aes_cbc_cts_decrypt)                 
302                                                   
303         .section        ".rodata", "a"            
304         .align          6                         
305 .Lcts_permute_table:                              
306         .byte           0xff, 0xff, 0xff, 0xff    
307         .byte           0xff, 0xff, 0xff, 0xff    
308         .byte            0x0,  0x1,  0x2,  0x3    
309         .byte            0x8,  0x9,  0xa,  0xb    
310         .byte           0xff, 0xff, 0xff, 0xff    
311         .byte           0xff, 0xff, 0xff, 0xff    
312         .previous                                 
313                                                   
314         /*                                        
315          * This macro generates the code for C    
316          */                                       
317 .macro ctr_encrypt xctr                           
318         // Arguments                              
319         OUT             .req x0                   
320         IN              .req x1                   
321         KEY             .req x2                   
322         ROUNDS_W        .req w3                   
323         BYTES_W         .req w4                   
324         IV              .req x5                   
325         BYTE_CTR_W      .req w6         // XCT    
326         // Intermediate values                    
327         CTR_W           .req w11        // XCT    
328         CTR             .req x11        // XCT    
329         IV_PART         .req x12                  
330         BLOCKS          .req x13                  
331         BLOCKS_W        .req w13                  
332                                                   
333         frame_push      0                         
334                                                   
335         enc_prepare     ROUNDS_W, KEY, IV_PART    
336         ld1             {vctr.16b}, [IV]          
337                                                   
338         /*                                        
339          * Keep 64 bits of the IV in a registe    
340          * easily increment the IV.  For XCTR     
341          * the 64-bit counter with the IV.        
342          */                                       
343         .if \xctr                                 
344                 umov            IV_PART, vctr.    
345                 lsr             CTR_W, BYTE_CT    
346         .else                                     
347                 umov            IV_PART, vctr.    
348                 rev             IV_PART, IV_PA    
349         .endif                                    
350                                                   
351 .LctrloopNx\xctr:                                 
352         add             BLOCKS_W, BYTES_W, #15    
353         sub             BYTES_W, BYTES_W, #MAX    
354         lsr             BLOCKS_W, BLOCKS_W, #4    
355         mov             w8, #MAX_STRIDE           
356         cmp             BLOCKS_W, w8              
357         csel            BLOCKS_W, BLOCKS_W, w8    
358                                                   
359         /*                                        
360          * Set up the counter values in v0-v{M    
361          *                                        
362          * If we are encrypting less than MAX_    
363          * handling code expects the last keys    
364          * v{MAX_STRIDE-1}.  For example: if e    
365          * MAX_STRIDE=5, then v3 and v4 should    
366          */                                       
367         .if \xctr                                 
368                 add             CTR, CTR, BLOC    
369         .else                                     
370                 adds            IV_PART, IV_PA    
371         .endif                                    
372         mov             v0.16b, vctr.16b          
373         mov             v1.16b, vctr.16b          
374         mov             v2.16b, vctr.16b          
375         mov             v3.16b, vctr.16b          
376 ST5(    mov             v4.16b, vctr.16b          
377         .if \xctr                                 
378                 sub             x6, CTR, #MAX_    
379                 sub             x7, CTR, #MAX_    
380                 sub             x8, CTR, #MAX_    
381                 sub             x9, CTR, #MAX_    
382 ST5(            sub             x10, CTR, #MAX    
383                 eor             x6, x6, IV_PAR    
384                 eor             x7, x7, IV_PAR    
385                 eor             x8, x8, IV_PAR    
386                 eor             x9, x9, IV_PAR    
387 ST5(            eor             x10, x10, IV_P    
388                 mov             v0.d[0], x6       
389                 mov             v1.d[0], x7       
390                 mov             v2.d[0], x8       
391                 mov             v3.d[0], x9       
392 ST5(            mov             v4.d[0], x10      
393         .else                                     
394                 bcs             0f                
395                 .subsection     1                 
396                 /*                                
397                  * This subsection handles car    
398                  *                                
399                  * Conditional branching here     
400                  * invariance since the branch    
401                  * of the plaintext or key.  T    
402                  * practice anyway.               
403                  */                               
404                                                   
405                 /* Apply carry to outgoing cou    
406 0:              umov            x8, vctr.d[0]     
407                 rev             x8, x8            
408                 add             x8, x8, #1        
409                 rev             x8, x8            
410                 ins             vctr.d[0], x8     
411                                                   
412                 /*                                
413                  * Apply carry to counter bloc    
414                  *                                
415                  * Since the carry flag was se    
416                  * MAX_STRIDE.  Using the valu    
417                  * many counter blocks need to    
418                  */                               
419                 cbz             IV_PART, 2f       
420                 adr             x16, 1f           
421                 sub             x16, x16, IV_P    
422                 br              x16               
423                 bti             c                 
424                 mov             v0.d[0], vctr.    
425                 bti             c                 
426                 mov             v1.d[0], vctr.    
427                 bti             c                 
428                 mov             v2.d[0], vctr.    
429                 bti             c                 
430                 mov             v3.d[0], vctr.    
431 ST5(            bti             c                 
432 ST5(            mov             v4.d[0], vctr.    
433 1:              b               2f                
434                 .previous                         
435                                                   
436 2:              rev             x7, IV_PART       
437                 ins             vctr.d[1], x7     
438                 sub             x7, IV_PART, #    
439                 sub             x8, IV_PART, #    
440                 sub             x9, IV_PART, #    
441                 rev             x7, x7            
442                 rev             x8, x8            
443                 mov             v1.d[1], x7       
444                 rev             x9, x9            
445 ST5(            sub             x10, IV_PART,     
446                 mov             v2.d[1], x8       
447 ST5(            rev             x10, x10          
448                 mov             v3.d[1], x9       
449 ST5(            mov             v4.d[1], x10      
450         .endif                                    
451                                                   
452         /*                                        
453          * If there are at least MAX_STRIDE bl    
454          * keystream and store.  Otherwise jum    
455          */                                       
456         tbnz            BYTES_W, #31, .Lctrtai    
457         ld1             {v5.16b-v7.16b}, [IN],    
458 ST4(    bl              aes_encrypt_block4x       
459 ST5(    bl              aes_encrypt_block5x       
460         eor             v0.16b, v5.16b, v0.16b    
461 ST4(    ld1             {v5.16b}, [IN], #16       
462         eor             v1.16b, v6.16b, v1.16b    
463 ST5(    ld1             {v5.16b-v6.16b}, [IN],    
464         eor             v2.16b, v7.16b, v2.16b    
465         eor             v3.16b, v5.16b, v3.16b    
466 ST5(    eor             v4.16b, v6.16b, v4.16b    
467         st1             {v0.16b-v3.16b}, [OUT]    
468 ST5(    st1             {v4.16b}, [OUT], #16      
469         cbz             BYTES_W, .Lctrout\xctr    
470         b               .LctrloopNx\xctr          
471                                                   
472 .Lctrout\xctr:                                    
473         .if !\xctr                                
474                 st1             {vctr.16b}, [I    
475         .endif                                    
476         frame_pop                                 
477         ret                                       
478                                                   
479 .Lctrtail\xctr:                                   
480         /*                                        
481          * Handle up to MAX_STRIDE * 16 - 1 by    
482          *                                        
483          * This code expects the last keystrea    
484          * For example: if encrypting two bloc    
485          * v4 should have the next two counter    
486          *                                        
487          * This allows us to store the ciphert    
488          * regions of memory.  Any invalid cip    
489          * correctly computed blocks.  This ap    
490          * logic for storing the ciphertext.      
491          */                                       
492         mov             x16, #16                  
493         ands            w7, BYTES_W, #0xf         
494         csel            x13, x7, x16, ne          
495                                                   
496 ST5(    cmp             BYTES_W, #64 - (MAX_ST    
497 ST5(    csel            x14, x16, xzr, gt         
498         cmp             BYTES_W, #48 - (MAX_ST    
499         csel            x15, x16, xzr, gt         
500         cmp             BYTES_W, #32 - (MAX_ST    
501         csel            x16, x16, xzr, gt         
502         cmp             BYTES_W, #16 - (MAX_ST    
503                                                   
504         adr_l           x9, .Lcts_permute_tabl    
505         add             x9, x9, x13               
506         ble             .Lctrtail1x\xctr          
507                                                   
508 ST5(    ld1             {v5.16b}, [IN], x14       
509         ld1             {v6.16b}, [IN], x15       
510         ld1             {v7.16b}, [IN], x16       
511                                                   
512 ST4(    bl              aes_encrypt_block4x       
513 ST5(    bl              aes_encrypt_block5x       
514                                                   
515         ld1             {v8.16b}, [IN], x13       
516         ld1             {v9.16b}, [IN]            
517         ld1             {v10.16b}, [x9]           
518                                                   
519 ST4(    eor             v6.16b, v6.16b, v0.16b    
520 ST4(    eor             v7.16b, v7.16b, v1.16b    
521 ST4(    tbl             v3.16b, {v3.16b}, v10.    
522 ST4(    eor             v8.16b, v8.16b, v2.16b    
523 ST4(    eor             v9.16b, v9.16b, v3.16b    
524                                                   
525 ST5(    eor             v5.16b, v5.16b, v0.16b    
526 ST5(    eor             v6.16b, v6.16b, v1.16b    
527 ST5(    tbl             v4.16b, {v4.16b}, v10.    
528 ST5(    eor             v7.16b, v7.16b, v2.16b    
529 ST5(    eor             v8.16b, v8.16b, v3.16b    
530 ST5(    eor             v9.16b, v9.16b, v4.16b    
531                                                   
532 ST5(    st1             {v5.16b}, [OUT], x14      
533         st1             {v6.16b}, [OUT], x15      
534         st1             {v7.16b}, [OUT], x16      
535         add             x13, x13, OUT             
536         st1             {v9.16b}, [x13]           
537         st1             {v8.16b}, [OUT]           
538         b               .Lctrout\xctr             
539                                                   
540 .Lctrtail1x\xctr:                                 
541         /*                                        
542          * Handle <= 16 bytes of plaintext        
543          *                                        
544          * This code always reads and writes 1    
545          * accesses, XCTR and CTR modes must u    
546          * encrypting/decrypting less than 16     
547          *                                        
548          * This code is unusual in that it loa    
549          * relative to the end of the buffers     
550          * This causes unusual behaviour when     
551          * bytes; the end of the data is expec    
552          * temporary buffer rather than the st    
553          * of the temporary buffer.               
554          */                                       
555         sub             x8, x7, #16               
556         csel            x7, x7, x8, eq            
557         add             IN, IN, x7                
558         add             OUT, OUT, x7              
559         ld1             {v5.16b}, [IN]            
560         ld1             {v6.16b}, [OUT]           
561 ST5(    mov             v3.16b, v4.16b            
562         encrypt_block   v3, ROUNDS_W, KEY, x8,    
563         ld1             {v10.16b-v11.16b}, [x9    
564         tbl             v3.16b, {v3.16b}, v10.    
565         sshr            v11.16b, v11.16b, #7      
566         eor             v5.16b, v5.16b, v3.16b    
567         bif             v5.16b, v6.16b, v11.16    
568         st1             {v5.16b}, [OUT]           
569         b               .Lctrout\xctr             
570                                                   
571         // Arguments                              
572         .unreq OUT                                
573         .unreq IN                                 
574         .unreq KEY                                
575         .unreq ROUNDS_W                           
576         .unreq BYTES_W                            
577         .unreq IV                                 
578         .unreq BYTE_CTR_W       // XCTR only      
579         // Intermediate values                    
580         .unreq CTR_W            // XCTR only      
581         .unreq CTR              // XCTR only      
582         .unreq IV_PART                            
583         .unreq BLOCKS                             
584         .unreq BLOCKS_W                           
585 .endm                                             
586                                                   
587         /*                                        
588          * aes_ctr_encrypt(u8 out[], u8 const     
589          *                 int bytes, u8 ctr[]    
590          *                                        
591          * The input and output buffers must a    
592          * encrypting/decrypting less than 16     
593          * accesses will occur.  The data to b    
594          * to be at the end of this 16-byte te    
595          * start.                                 
596          */                                       
597                                                   
598 AES_FUNC_START(aes_ctr_encrypt)                   
599         ctr_encrypt 0                             
600 AES_FUNC_END(aes_ctr_encrypt)                     
601                                                   
602         /*                                        
603          * aes_xctr_encrypt(u8 out[], u8 const    
604          *                 int bytes, u8 const    
605          *                                        
606          * The input and output buffers must a    
607          * encrypting/decrypting less than 16     
608          * accesses will occur.  The data to b    
609          * to be at the end of this 16-byte te    
610          * start.                                 
611          */                                       
612                                                   
613 AES_FUNC_START(aes_xctr_encrypt)                  
614         ctr_encrypt 1                             
615 AES_FUNC_END(aes_xctr_encrypt)                    
616                                                   
617                                                   
618         /*                                        
619          * aes_xts_encrypt(u8 out[], u8 const     
620          *                 int bytes, u8 const    
621          * aes_xts_decrypt(u8 out[], u8 const     
622          *                 int bytes, u8 const    
623          */                                       
624                                                   
625         .macro          next_tweak, out, in, t    
626         sshr            \tmp\().2d,  \in\().2d    
627         and             \tmp\().16b, \tmp\().1    
628         add             \out\().2d,  \in\().2d    
629         ext             \tmp\().16b, \tmp\().1    
630         eor             \out\().16b, \out\().1    
631         .endm                                     
632                                                   
633         .macro          xts_load_mask, tmp        
634         movi            xtsmask.2s, #0x1          
635         movi            \tmp\().2s, #0x87         
636         uzp1            xtsmask.4s, xtsmask.4s    
637         .endm                                     
638                                                   
639 AES_FUNC_START(aes_xts_encrypt)                   
640         frame_push      0                         
641                                                   
642         ld1             {v4.16b}, [x6]            
643         xts_load_mask   v8                        
644         cbz             w7, .Lxtsencnotfirst      
645                                                   
646         enc_prepare     w3, x5, x8                
647         xts_cts_skip_tw w7, .LxtsencNx            
648         encrypt_block   v4, w3, x5, x8, w7        
649         enc_switch_key  w3, x2, x8                
650         b               .LxtsencNx                
651                                                   
652 .Lxtsencnotfirst:                                 
653         enc_prepare     w3, x2, x8                
654 .LxtsencloopNx:                                   
655         next_tweak      v4, v4, v8                
656 .LxtsencNx:                                       
657         subs            w4, w4, #64               
658         bmi             .Lxtsenc1x                
659         ld1             {v0.16b-v3.16b}, [x1],    
660         next_tweak      v5, v4, v8                
661         eor             v0.16b, v0.16b, v4.16b    
662         next_tweak      v6, v5, v8                
663         eor             v1.16b, v1.16b, v5.16b    
664         eor             v2.16b, v2.16b, v6.16b    
665         next_tweak      v7, v6, v8                
666         eor             v3.16b, v3.16b, v7.16b    
667         bl              aes_encrypt_block4x       
668         eor             v3.16b, v3.16b, v7.16b    
669         eor             v0.16b, v0.16b, v4.16b    
670         eor             v1.16b, v1.16b, v5.16b    
671         eor             v2.16b, v2.16b, v6.16b    
672         st1             {v0.16b-v3.16b}, [x0],    
673         mov             v4.16b, v7.16b            
674         cbz             w4, .Lxtsencret           
675         xts_reload_mask v8                        
676         b               .LxtsencloopNx            
677 .Lxtsenc1x:                                       
678         adds            w4, w4, #64               
679         beq             .Lxtsencout               
680         subs            w4, w4, #16               
681         bmi             .LxtsencctsNx             
682 .Lxtsencloop:                                     
683         ld1             {v0.16b}, [x1], #16       
684 .Lxtsencctsout:                                   
685         eor             v0.16b, v0.16b, v4.16b    
686         encrypt_block   v0, w3, x2, x8, w7        
687         eor             v0.16b, v0.16b, v4.16b    
688         cbz             w4, .Lxtsencout           
689         subs            w4, w4, #16               
690         next_tweak      v4, v4, v8                
691         bmi             .Lxtsenccts               
692         st1             {v0.16b}, [x0], #16       
693         b               .Lxtsencloop              
694 .Lxtsencout:                                      
695         st1             {v0.16b}, [x0]            
696 .Lxtsencret:                                      
697         st1             {v4.16b}, [x6]            
698         frame_pop                                 
699         ret                                       
700                                                   
701 .LxtsencctsNx:                                    
702         mov             v0.16b, v3.16b            
703         sub             x0, x0, #16               
704 .Lxtsenccts:                                      
705         adr_l           x8, .Lcts_permute_tabl    
706                                                   
707         add             x1, x1, w4, sxtw          
708         add             w4, w4, #16               
709         add             x9, x8, #32               
710         add             x8, x8, x4                
711         sub             x9, x9, x4                
712         add             x4, x0, x4                
713                                                   
714         ld1             {v1.16b}, [x1]            
715         ld1             {v2.16b}, [x8]            
716         ld1             {v3.16b}, [x9]            
717                                                   
718         tbl             v2.16b, {v0.16b}, v2.1    
719         tbx             v0.16b, {v1.16b}, v3.1    
720         st1             {v2.16b}, [x4]            
721         mov             w4, wzr                   
722         b               .Lxtsencctsout            
723 AES_FUNC_END(aes_xts_encrypt)                     
724                                                   
725 AES_FUNC_START(aes_xts_decrypt)                   
726         frame_push      0                         
727                                                   
728         /* subtract 16 bytes if we are doing C    
729         sub             w8, w4, #0x10             
730         tst             w4, #0xf                  
731         csel            w4, w4, w8, eq            
732                                                   
733         ld1             {v4.16b}, [x6]            
734         xts_load_mask   v8                        
735         xts_cts_skip_tw w7, .Lxtsdecskiptw        
736         cbz             w7, .Lxtsdecnotfirst      
737                                                   
738         enc_prepare     w3, x5, x8                
739         encrypt_block   v4, w3, x5, x8, w7        
740 .Lxtsdecskiptw:                                   
741         dec_prepare     w3, x2, x8                
742         b               .LxtsdecNx                
743                                                   
744 .Lxtsdecnotfirst:                                 
745         dec_prepare     w3, x2, x8                
746 .LxtsdecloopNx:                                   
747         next_tweak      v4, v4, v8                
748 .LxtsdecNx:                                       
749         subs            w4, w4, #64               
750         bmi             .Lxtsdec1x                
751         ld1             {v0.16b-v3.16b}, [x1],    
752         next_tweak      v5, v4, v8                
753         eor             v0.16b, v0.16b, v4.16b    
754         next_tweak      v6, v5, v8                
755         eor             v1.16b, v1.16b, v5.16b    
756         eor             v2.16b, v2.16b, v6.16b    
757         next_tweak      v7, v6, v8                
758         eor             v3.16b, v3.16b, v7.16b    
759         bl              aes_decrypt_block4x       
760         eor             v3.16b, v3.16b, v7.16b    
761         eor             v0.16b, v0.16b, v4.16b    
762         eor             v1.16b, v1.16b, v5.16b    
763         eor             v2.16b, v2.16b, v6.16b    
764         st1             {v0.16b-v3.16b}, [x0],    
765         mov             v4.16b, v7.16b            
766         cbz             w4, .Lxtsdecout           
767         xts_reload_mask v8                        
768         b               .LxtsdecloopNx            
769 .Lxtsdec1x:                                       
770         adds            w4, w4, #64               
771         beq             .Lxtsdecout               
772         subs            w4, w4, #16               
773 .Lxtsdecloop:                                     
774         ld1             {v0.16b}, [x1], #16       
775         bmi             .Lxtsdeccts               
776 .Lxtsdecctsout:                                   
777         eor             v0.16b, v0.16b, v4.16b    
778         decrypt_block   v0, w3, x2, x8, w7        
779         eor             v0.16b, v0.16b, v4.16b    
780         st1             {v0.16b}, [x0], #16       
781         cbz             w4, .Lxtsdecout           
782         subs            w4, w4, #16               
783         next_tweak      v4, v4, v8                
784         b               .Lxtsdecloop              
785 .Lxtsdecout:                                      
786         st1             {v4.16b}, [x6]            
787         frame_pop                                 
788         ret                                       
789                                                   
790 .Lxtsdeccts:                                      
791         adr_l           x8, .Lcts_permute_tabl    
792                                                   
793         add             x1, x1, w4, sxtw          
794         add             w4, w4, #16               
795         add             x9, x8, #32               
796         add             x8, x8, x4                
797         sub             x9, x9, x4                
798         add             x4, x0, x4                
799                                                   
800         next_tweak      v5, v4, v8                
801                                                   
802         ld1             {v1.16b}, [x1]            
803         ld1             {v2.16b}, [x8]            
804         ld1             {v3.16b}, [x9]            
805                                                   
806         eor             v0.16b, v0.16b, v5.16b    
807         decrypt_block   v0, w3, x2, x8, w7        
808         eor             v0.16b, v0.16b, v5.16b    
809                                                   
810         tbl             v2.16b, {v0.16b}, v2.1    
811         tbx             v0.16b, {v1.16b}, v3.1    
812                                                   
813         st1             {v2.16b}, [x4]            
814         mov             w4, wzr                   
815         b               .Lxtsdecctsout            
816 AES_FUNC_END(aes_xts_decrypt)                     
817                                                   
818         /*                                        
819          * aes_mac_update(u8 const in[], u32 c    
820          *                int blocks, u8 dg[],    
821          */                                       
822 AES_FUNC_START(aes_mac_update)                    
823         ld1             {v0.16b}, [x4]            
824         enc_prepare     w2, x1, x7                
825         cbz             w5, .Lmacloop4x           
826                                                   
827         encrypt_block   v0, w2, x1, x7, w8        
828                                                   
829 .Lmacloop4x:                                      
830         subs            w3, w3, #4                
831         bmi             .Lmac1x                   
832         ld1             {v1.16b-v4.16b}, [x0],    
833         eor             v0.16b, v0.16b, v1.16b    
834         encrypt_block   v0, w2, x1, x7, w8        
835         eor             v0.16b, v0.16b, v2.16b    
836         encrypt_block   v0, w2, x1, x7, w8        
837         eor             v0.16b, v0.16b, v3.16b    
838         encrypt_block   v0, w2, x1, x7, w8        
839         eor             v0.16b, v0.16b, v4.16b    
840         cmp             w3, wzr                   
841         csinv           x5, x6, xzr, eq           
842         cbz             w5, .Lmacout              
843         encrypt_block   v0, w2, x1, x7, w8        
844         st1             {v0.16b}, [x4]            
845         cond_yield      .Lmacout, x7, x8          
846         b               .Lmacloop4x               
847 .Lmac1x:                                          
848         add             w3, w3, #4                
849 .Lmacloop:                                        
850         cbz             w3, .Lmacout              
851         ld1             {v1.16b}, [x0], #16       
852         eor             v0.16b, v0.16b, v1.16b    
853                                                   
854         subs            w3, w3, #1                
855         csinv           x5, x6, xzr, eq           
856         cbz             w5, .Lmacout              
857                                                   
858 .Lmacenc:                                         
859         encrypt_block   v0, w2, x1, x7, w8        
860         b               .Lmacloop                 
861                                                   
862 .Lmacout:                                         
863         st1             {v0.16b}, [x4]            
864         mov             w0, w3                    
865         ret                                       
866 AES_FUNC_END(aes_mac_update)                      
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php