~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/crypto/sm4-neon-core.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/arm64/crypto/sm4-neon-core.S (Architecture i386) and /arch/sparc64/crypto/sm4-neon-core.S (Architecture sparc64)


  1 /* SPDX-License-Identifier: GPL-2.0-or-later *    
  2 /*                                                
  3  * SM4 Cipher Algorithm for ARMv8 NEON            
  4  * as specified in                                
  5  * https://tools.ietf.org/id/draft-ribose-cfrg    
  6  *                                                
  7  * Copyright (C) 2022, Alibaba Group.             
  8  * Copyright (C) 2022 Tianjia Zhang <tianjia.zh    
  9  */                                               
 10                                                   
 11 #include <linux/linkage.h>                        
 12 #include <asm/assembler.h>                        
 13                                                   
 14 /* Register macros */                             
 15                                                   
 16 #define RTMP0   v8                                
 17 #define RTMP1   v9                                
 18 #define RTMP2   v10                               
 19 #define RTMP3   v11                               
 20                                                   
 21 #define RTMP4   v12                               
 22 #define RTMP5   v13                               
 23 #define RTMP6   v14                               
 24 #define RTMP7   v15                               
 25                                                   
 26 #define RX0     v12                               
 27 #define RX1     v13                               
 28 #define RKEY    v14                               
 29 #define RIV     v15                               
 30                                                   
 31 /* Helper macros. */                              
 32                                                   
 33 #define SM4_PREPARE()                             
 34         adr_l           x5, crypto_sm4_sbox;      
 35         ld1             {v16.16b-v19.16b}, [x5    
 36         ld1             {v20.16b-v23.16b}, [x5    
 37         ld1             {v24.16b-v27.16b}, [x5    
 38         ld1             {v28.16b-v31.16b}, [x5    
 39                                                   
 40 #define transpose_4x4(s0, s1, s2, s3)             
 41         zip1            RTMP0.4s, s0.4s, s1.4s    
 42         zip1            RTMP1.4s, s2.4s, s3.4s    
 43         zip2            RTMP2.4s, s0.4s, s1.4s    
 44         zip2            RTMP3.4s, s2.4s, s3.4s    
 45         zip1            s0.2d, RTMP0.2d, RTMP1    
 46         zip2            s1.2d, RTMP0.2d, RTMP1    
 47         zip1            s2.2d, RTMP2.2d, RTMP3    
 48         zip2            s3.2d, RTMP2.2d, RTMP3    
 49                                                   
 50 #define transpose_4x4_2x(s0, s1, s2, s3, s4, s    
 51         zip1            RTMP0.4s, s0.4s, s1.4s    
 52         zip1            RTMP1.4s, s2.4s, s3.4s    
 53         zip2            RTMP2.4s, s0.4s, s1.4s    
 54         zip2            RTMP3.4s, s2.4s, s3.4s    
 55         zip1            RTMP4.4s, s4.4s, s5.4s    
 56         zip1            RTMP5.4s, s6.4s, s7.4s    
 57         zip2            RTMP6.4s, s4.4s, s5.4s    
 58         zip2            RTMP7.4s, s6.4s, s7.4s    
 59         zip1            s0.2d, RTMP0.2d, RTMP1    
 60         zip2            s1.2d, RTMP0.2d, RTMP1    
 61         zip1            s2.2d, RTMP2.2d, RTMP3    
 62         zip2            s3.2d, RTMP2.2d, RTMP3    
 63         zip1            s4.2d, RTMP4.2d, RTMP5    
 64         zip2            s5.2d, RTMP4.2d, RTMP5    
 65         zip1            s6.2d, RTMP6.2d, RTMP7    
 66         zip2            s7.2d, RTMP6.2d, RTMP7    
 67                                                   
 68 #define rotate_clockwise_4x4(s0, s1, s2, s3)      
 69         zip1            RTMP0.4s, s1.4s, s0.4s    
 70         zip2            RTMP1.4s, s1.4s, s0.4s    
 71         zip1            RTMP2.4s, s3.4s, s2.4s    
 72         zip2            RTMP3.4s, s3.4s, s2.4s    
 73         zip1            s0.2d, RTMP2.2d, RTMP0    
 74         zip2            s1.2d, RTMP2.2d, RTMP0    
 75         zip1            s2.2d, RTMP3.2d, RTMP1    
 76         zip2            s3.2d, RTMP3.2d, RTMP1    
 77                                                   
 78 #define rotate_clockwise_4x4_2x(s0, s1, s2, s3    
 79         zip1            RTMP0.4s, s1.4s, s0.4s    
 80         zip1            RTMP2.4s, s3.4s, s2.4s    
 81         zip2            RTMP1.4s, s1.4s, s0.4s    
 82         zip2            RTMP3.4s, s3.4s, s2.4s    
 83         zip1            RTMP4.4s, s5.4s, s4.4s    
 84         zip1            RTMP6.4s, s7.4s, s6.4s    
 85         zip2            RTMP5.4s, s5.4s, s4.4s    
 86         zip2            RTMP7.4s, s7.4s, s6.4s    
 87         zip1            s0.2d, RTMP2.2d, RTMP0    
 88         zip2            s1.2d, RTMP2.2d, RTMP0    
 89         zip1            s2.2d, RTMP3.2d, RTMP1    
 90         zip2            s3.2d, RTMP3.2d, RTMP1    
 91         zip1            s4.2d, RTMP6.2d, RTMP4    
 92         zip2            s5.2d, RTMP6.2d, RTMP4    
 93         zip1            s6.2d, RTMP7.2d, RTMP5    
 94         zip2            s7.2d, RTMP7.2d, RTMP5    
 95                                                   
 96 #define ROUND4(round, s0, s1, s2, s3)             
 97         dup             RX0.4s, RKEY.s[round];    
 98         /* rk ^ s1 ^ s2 ^ s3 */                   
 99         eor             RTMP1.16b, s2.16b, s3.    
100         eor             RX0.16b, RX0.16b, s1.1    
101         eor             RX0.16b, RX0.16b, RTMP    
102                                                   
103         /* sbox, non-linear part */               
104         movi            RTMP3.16b, #64;  /* si    
105         tbl             RTMP0.16b, {v16.16b-v1    
106         sub             RX0.16b, RX0.16b, RTMP    
107         tbx             RTMP0.16b, {v20.16b-v2    
108         sub             RX0.16b, RX0.16b, RTMP    
109         tbx             RTMP0.16b, {v24.16b-v2    
110         sub             RX0.16b, RX0.16b, RTMP    
111         tbx             RTMP0.16b, {v28.16b-v3    
112                                                   
113         /* linear part */                         
114         shl             RTMP1.4s, RTMP0.4s, #8    
115         shl             RTMP2.4s, RTMP0.4s, #1    
116         shl             RTMP3.4s, RTMP0.4s, #2    
117         sri             RTMP1.4s, RTMP0.4s, #(    
118         sri             RTMP2.4s, RTMP0.4s, #(    
119         sri             RTMP3.4s, RTMP0.4s, #(    
120         /* RTMP1 = x ^ rol32(x, 8) ^ rol32(x,     
121         eor             RTMP1.16b, RTMP1.16b,     
122         eor             RTMP1.16b, RTMP1.16b,     
123         /* RTMP3 = x ^ rol32(x, 24) ^ rol32(RT    
124         eor             RTMP3.16b, RTMP3.16b,     
125         shl             RTMP2.4s, RTMP1.4s, 2;    
126         sri             RTMP2.4s, RTMP1.4s, #(    
127         eor             RTMP3.16b, RTMP3.16b,     
128         /* s0 ^= RTMP3 */                         
129         eor             s0.16b, s0.16b, RTMP3.    
130                                                   
131 #define SM4_CRYPT_BLK4_BE(b0, b1, b2, b3)         
132         mov             x6, 8;                    
133 4:                                                
134         ld1             {RKEY.4s}, [x0], #16;     
135         subs            x6, x6, #1;               
136                                                   
137         ROUND4(0, b0, b1, b2, b3);                
138         ROUND4(1, b1, b2, b3, b0);                
139         ROUND4(2, b2, b3, b0, b1);                
140         ROUND4(3, b3, b0, b1, b2);                
141                                                   
142         bne             4b;                       
143                                                   
144         rev32           b0.16b, b0.16b;           
145         rev32           b1.16b, b1.16b;           
146         rev32           b2.16b, b2.16b;           
147         rev32           b3.16b, b3.16b;           
148                                                   
149         rotate_clockwise_4x4(b0, b1, b2, b3);     
150                                                   
151         /* repoint to rkey */                     
152         sub             x0, x0, #128;             
153                                                   
154 #define SM4_CRYPT_BLK4(b0, b1, b2, b3)            
155         rev32           b0.16b, b0.16b;           
156         rev32           b1.16b, b1.16b;           
157         rev32           b2.16b, b2.16b;           
158         rev32           b3.16b, b3.16b;           
159         SM4_CRYPT_BLK4_BE(b0, b1, b2, b3);        
160                                                   
161 #define ROUND8(round, s0, s1, s2, s3, t0, t1,     
162         /* rk ^ s1 ^ s2 ^ s3 */                   
163         dup             RX0.4s, RKEY.s[round];    
164         eor             RTMP0.16b, s2.16b, s3.    
165         mov             RX1.16b, RX0.16b;         
166         eor             RTMP1.16b, t2.16b, t3.    
167         eor             RX0.16b, RX0.16b, s1.1    
168         eor             RX1.16b, RX1.16b, t1.1    
169         eor             RX0.16b, RX0.16b, RTMP    
170         eor             RX1.16b, RX1.16b, RTMP    
171                                                   
172         /* sbox, non-linear part */               
173         movi            RTMP3.16b, #64;  /* si    
174         tbl             RTMP0.16b, {v16.16b-v1    
175         tbl             RTMP1.16b, {v16.16b-v1    
176         sub             RX0.16b, RX0.16b, RTMP    
177         sub             RX1.16b, RX1.16b, RTMP    
178         tbx             RTMP0.16b, {v20.16b-v2    
179         tbx             RTMP1.16b, {v20.16b-v2    
180         sub             RX0.16b, RX0.16b, RTMP    
181         sub             RX1.16b, RX1.16b, RTMP    
182         tbx             RTMP0.16b, {v24.16b-v2    
183         tbx             RTMP1.16b, {v24.16b-v2    
184         sub             RX0.16b, RX0.16b, RTMP    
185         sub             RX1.16b, RX1.16b, RTMP    
186         tbx             RTMP0.16b, {v28.16b-v3    
187         tbx             RTMP1.16b, {v28.16b-v3    
188                                                   
189         /* linear part */                         
190         shl             RX0.4s, RTMP0.4s, #8;     
191         shl             RX1.4s, RTMP1.4s, #8;     
192         shl             RTMP2.4s, RTMP0.4s, #1    
193         shl             RTMP3.4s, RTMP1.4s, #1    
194         sri             RX0.4s, RTMP0.4s, #(32    
195         sri             RX1.4s, RTMP1.4s, #(32    
196         sri             RTMP2.4s, RTMP0.4s, #(    
197         sri             RTMP3.4s, RTMP1.4s, #(    
198         /* RX = x ^ rol32(x, 8) ^ rol32(x, 16)    
199         eor             RX0.16b, RX0.16b, RTMP    
200         eor             RX1.16b, RX1.16b, RTMP    
201         eor             RX0.16b, RX0.16b, RTMP    
202         eor             RX1.16b, RX1.16b, RTMP    
203         /* RTMP0/1 ^= x ^ rol32(x, 24) ^ rol32    
204         shl             RTMP2.4s, RTMP0.4s, #2    
205         shl             RTMP3.4s, RTMP1.4s, #2    
206         sri             RTMP2.4s, RTMP0.4s, #(    
207         sri             RTMP3.4s, RTMP1.4s, #(    
208         eor             RTMP0.16b, RTMP0.16b,     
209         eor             RTMP1.16b, RTMP1.16b,     
210         shl             RTMP2.4s, RX0.4s, #2;     
211         shl             RTMP3.4s, RX1.4s, #2;     
212         sri             RTMP2.4s, RX0.4s, #(32    
213         sri             RTMP3.4s, RX1.4s, #(32    
214         eor             RTMP0.16b, RTMP0.16b,     
215         eor             RTMP1.16b, RTMP1.16b,     
216         /* s0/t0 ^= RTMP0/1 */                    
217         eor             s0.16b, s0.16b, RTMP0.    
218         eor             t0.16b, t0.16b, RTMP1.    
219                                                   
220 #define SM4_CRYPT_BLK8_norotate(b0, b1, b2, b3    
221         rev32           b0.16b, b0.16b;           
222         rev32           b1.16b, b1.16b;           
223         rev32           b2.16b, b2.16b;           
224         rev32           b3.16b, b3.16b;           
225         rev32           b4.16b, b4.16b;           
226         rev32           b5.16b, b5.16b;           
227         rev32           b6.16b, b6.16b;           
228         rev32           b7.16b, b7.16b;           
229                                                   
230         mov             x6, 8;                    
231 8:                                                
232         ld1             {RKEY.4s}, [x0], #16;     
233         subs            x6, x6, #1;               
234                                                   
235         ROUND8(0, b0, b1, b2, b3, b4, b5, b6,     
236         ROUND8(1, b1, b2, b3, b0, b5, b6, b7,     
237         ROUND8(2, b2, b3, b0, b1, b6, b7, b4,     
238         ROUND8(3, b3, b0, b1, b2, b7, b4, b5,     
239                                                   
240         bne             8b;                       
241                                                   
242         rev32           b0.16b, b0.16b;           
243         rev32           b1.16b, b1.16b;           
244         rev32           b2.16b, b2.16b;           
245         rev32           b3.16b, b3.16b;           
246         rev32           b4.16b, b4.16b;           
247         rev32           b5.16b, b5.16b;           
248         rev32           b6.16b, b6.16b;           
249         rev32           b7.16b, b7.16b;           
250                                                   
251         /* repoint to rkey */                     
252         sub             x0, x0, #128;             
253                                                   
254 #define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5,    
255         SM4_CRYPT_BLK8_norotate(b0, b1, b2, b3    
256         rotate_clockwise_4x4_2x(b0, b1, b2, b3    
257                                                   
258                                                   
259 .align 3                                          
260 SYM_FUNC_START(sm4_neon_crypt)                    
261         /* input:                                 
262          *   x0: round key array, CTX             
263          *   x1: dst                              
264          *   x2: src                              
265          *   w3: nblocks                          
266          */                                       
267         SM4_PREPARE()                             
268                                                   
269 .Lcrypt_loop_8x:                                  
270         sub             w3, w3, #8                
271         tbnz            w3, #31, .Lcrypt_4x       
272                                                   
273         ld4             {v0.4s-v3.4s}, [x2], #    
274         ld4             {v4.4s-v7.4s}, [x2], #    
275                                                   
276         SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5,    
277                                                   
278         st1             {v0.16b-v3.16b}, [x1],    
279         st1             {v4.16b-v7.16b}, [x1],    
280                                                   
281         cbz             w3, .Lcrypt_end           
282         b               .Lcrypt_loop_8x           
283                                                   
284 .Lcrypt_4x:                                       
285         add             w3, w3, #8                
286         cmp             w3, #4                    
287         blt             .Lcrypt_tail              
288                                                   
289         sub             w3, w3, #4                
290                                                   
291         ld4             {v0.4s-v3.4s}, [x2], #    
292                                                   
293         SM4_CRYPT_BLK4(v0, v1, v2, v3)            
294                                                   
295         st1             {v0.16b-v3.16b}, [x1],    
296                                                   
297         cbz             w3, .Lcrypt_end           
298                                                   
299 .Lcrypt_tail:                                     
300         cmp             w3, #2                    
301         ld1             {v0.16b}, [x2], #16       
302         blt             .Lcrypt_tail_load_done    
303         ld1             {v1.16b}, [x2], #16       
304         beq             .Lcrypt_tail_load_done    
305         ld1             {v2.16b}, [x2], #16       
306                                                   
307 .Lcrypt_tail_load_done:                           
308         transpose_4x4(v0, v1, v2, v3)             
309                                                   
310         SM4_CRYPT_BLK4(v0, v1, v2, v3)            
311                                                   
312         cmp             w3, #2                    
313         st1             {v0.16b}, [x1], #16       
314         blt             .Lcrypt_end               
315         st1             {v1.16b}, [x1], #16       
316         beq             .Lcrypt_end               
317         st1             {v2.16b}, [x1], #16       
318                                                   
319 .Lcrypt_end:                                      
320         ret                                       
321 SYM_FUNC_END(sm4_neon_crypt)                      
322                                                   
323 .align 3                                          
324 SYM_FUNC_START(sm4_neon_cbc_dec)                  
325         /* input:                                 
326          *   x0: round key array, CTX             
327          *   x1: dst                              
328          *   x2: src                              
329          *   x3: iv (big endian, 128 bit)         
330          *   w4: nblocks                          
331          */                                       
332         SM4_PREPARE()                             
333                                                   
334         ld1             {RIV.16b}, [x3]           
335                                                   
336 .Lcbc_dec_loop_8x:                                
337         sub             w4, w4, #8                
338         tbnz            w4, #31, .Lcbc_dec_4x     
339                                                   
340         ld4             {v0.4s-v3.4s}, [x2], #    
341         ld4             {v4.4s-v7.4s}, [x2]       
342                                                   
343         SM4_CRYPT_BLK8_norotate(v0, v1, v2, v3    
344                                                   
345         /* Avoid overwriting the RIV register     
346         rotate_clockwise_4x4(v0, v1, v2, v3)      
347         rotate_clockwise_4x4(v4, v5, v6, v7)      
348                                                   
349         sub             x2, x2, #64               
350                                                   
351         eor             v0.16b, v0.16b, RIV.16    
352                                                   
353         ld1             {RTMP0.16b-RTMP3.16b},    
354         ld1             {RTMP4.16b-RTMP7.16b},    
355                                                   
356         eor             v1.16b, v1.16b, RTMP0.    
357         eor             v2.16b, v2.16b, RTMP1.    
358         eor             v3.16b, v3.16b, RTMP2.    
359         eor             v4.16b, v4.16b, RTMP3.    
360         eor             v5.16b, v5.16b, RTMP4.    
361         eor             v6.16b, v6.16b, RTMP5.    
362         eor             v7.16b, v7.16b, RTMP6.    
363                                                   
364         mov             RIV.16b, RTMP7.16b        
365                                                   
366         st1             {v0.16b-v3.16b}, [x1],    
367         st1             {v4.16b-v7.16b}, [x1],    
368                                                   
369         cbz             w4, .Lcbc_dec_end         
370         b               .Lcbc_dec_loop_8x         
371                                                   
372 .Lcbc_dec_4x:                                     
373         add             w4, w4, #8                
374         cmp             w4, #4                    
375         blt             .Lcbc_dec_tail            
376                                                   
377         sub             w4, w4, #4                
378                                                   
379         ld1             {v0.16b-v3.16b}, [x2],    
380                                                   
381         rev32           v4.16b, v0.16b            
382         rev32           v5.16b, v1.16b            
383         rev32           v6.16b, v2.16b            
384         rev32           v7.16b, v3.16b            
385                                                   
386         transpose_4x4(v4, v5, v6, v7)             
387                                                   
388         SM4_CRYPT_BLK4_BE(v4, v5, v6, v7)         
389                                                   
390         eor             v4.16b, v4.16b, RIV.16    
391         eor             v5.16b, v5.16b, v0.16b    
392         eor             v6.16b, v6.16b, v1.16b    
393         eor             v7.16b, v7.16b, v2.16b    
394                                                   
395         mov             RIV.16b, v3.16b           
396                                                   
397         st1             {v4.16b-v7.16b}, [x1],    
398                                                   
399         cbz             w4, .Lcbc_dec_end         
400                                                   
401 .Lcbc_dec_tail:                                   
402         cmp             w4, #2                    
403         ld1             {v0.16b}, [x2], #16       
404         blt             .Lcbc_dec_tail_load_do    
405         ld1             {v1.16b}, [x2], #16       
406         beq             .Lcbc_dec_tail_load_do    
407         ld1             {v2.16b}, [x2], #16       
408                                                   
409 .Lcbc_dec_tail_load_done:                         
410         rev32           v4.16b, v0.16b            
411         rev32           v5.16b, v1.16b            
412         rev32           v6.16b, v2.16b            
413                                                   
414         transpose_4x4(v4, v5, v6, v7)             
415                                                   
416         SM4_CRYPT_BLK4_BE(v4, v5, v6, v7)         
417                                                   
418         cmp             w4, #2                    
419         eor             v4.16b, v4.16b, RIV.16    
420         mov             RIV.16b, v0.16b           
421         st1             {v4.16b}, [x1], #16       
422         blt             .Lcbc_dec_end             
423                                                   
424         eor             v5.16b, v5.16b, v0.16b    
425         mov             RIV.16b, v1.16b           
426         st1             {v5.16b}, [x1], #16       
427         beq             .Lcbc_dec_end             
428                                                   
429         eor             v6.16b, v6.16b, v1.16b    
430         mov             RIV.16b, v2.16b           
431         st1             {v6.16b}, [x1], #16       
432                                                   
433 .Lcbc_dec_end:                                    
434         /* store new IV */                        
435         st1             {RIV.16b}, [x3]           
436                                                   
437         ret                                       
438 SYM_FUNC_END(sm4_neon_cbc_dec)                    
439                                                   
440 .align 3                                          
441 SYM_FUNC_START(sm4_neon_ctr_crypt)                
442         /* input:                                 
443          *   x0: round key array, CTX             
444          *   x1: dst                              
445          *   x2: src                              
446          *   x3: ctr (big endian, 128 bit)        
447          *   w4: nblocks                          
448          */                                       
449         SM4_PREPARE()                             
450                                                   
451         ldp             x7, x8, [x3]              
452         rev             x7, x7                    
453         rev             x8, x8                    
454                                                   
455 .Lctr_crypt_loop_8x:                              
456         sub             w4, w4, #8                
457         tbnz            w4, #31, .Lctr_crypt_4    
458                                                   
459 #define inc_le128(vctr)                           
460                 mov             vctr.d[1], x8;    
461                 mov             vctr.d[0], x7;    
462                 adds            x8, x8, #1;       
463                 rev64           vctr.16b, vctr    
464                 adc             x7, x7, xzr;      
465                                                   
466         /* construct CTRs */                      
467         inc_le128(v0)                   /* +0     
468         inc_le128(v1)                   /* +1     
469         inc_le128(v2)                   /* +2     
470         inc_le128(v3)                   /* +3     
471         inc_le128(v4)                   /* +4     
472         inc_le128(v5)                   /* +5     
473         inc_le128(v6)                   /* +6     
474         inc_le128(v7)                   /* +7     
475                                                   
476         transpose_4x4_2x(v0, v1, v2, v3, v4, v    
477                                                   
478         SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5,    
479                                                   
480         ld1             {RTMP0.16b-RTMP3.16b},    
481         ld1             {RTMP4.16b-RTMP7.16b},    
482                                                   
483         eor             v0.16b, v0.16b, RTMP0.    
484         eor             v1.16b, v1.16b, RTMP1.    
485         eor             v2.16b, v2.16b, RTMP2.    
486         eor             v3.16b, v3.16b, RTMP3.    
487         eor             v4.16b, v4.16b, RTMP4.    
488         eor             v5.16b, v5.16b, RTMP5.    
489         eor             v6.16b, v6.16b, RTMP6.    
490         eor             v7.16b, v7.16b, RTMP7.    
491                                                   
492         st1             {v0.16b-v3.16b}, [x1],    
493         st1             {v4.16b-v7.16b}, [x1],    
494                                                   
495         cbz             w4, .Lctr_crypt_end       
496         b               .Lctr_crypt_loop_8x       
497                                                   
498 .Lctr_crypt_4x:                                   
499         add             w4, w4, #8                
500         cmp             w4, #4                    
501         blt             .Lctr_crypt_tail          
502                                                   
503         sub             w4, w4, #4                
504                                                   
505         /* construct CTRs */                      
506         inc_le128(v0)                   /* +0     
507         inc_le128(v1)                   /* +1     
508         inc_le128(v2)                   /* +2     
509         inc_le128(v3)                   /* +3     
510                                                   
511         ld1             {v4.16b-v7.16b}, [x2],    
512                                                   
513         transpose_4x4(v0, v1, v2, v3)             
514                                                   
515         SM4_CRYPT_BLK4(v0, v1, v2, v3)            
516                                                   
517         eor             v0.16b, v0.16b, v4.16b    
518         eor             v1.16b, v1.16b, v5.16b    
519         eor             v2.16b, v2.16b, v6.16b    
520         eor             v3.16b, v3.16b, v7.16b    
521                                                   
522         st1             {v0.16b-v3.16b}, [x1],    
523                                                   
524         cbz             w4, .Lctr_crypt_end       
525                                                   
526 .Lctr_crypt_tail:                                 
527         /* inc_le128 will change the sign bit     
528         ld1             {v4.16b}, [x2], #16       
529         inc_le128(v0)                             
530         cmp             w4, #2                    
531         blt             .Lctr_crypt_tail_load_    
532                                                   
533         ld1             {v5.16b}, [x2], #16       
534         inc_le128(v1)                             
535         cmp             w4, #2                    
536         beq             .Lctr_crypt_tail_load_    
537                                                   
538         ld1             {v6.16b}, [x2], #16       
539         inc_le128(v2)                             
540                                                   
541 .Lctr_crypt_tail_load_done:                       
542         transpose_4x4(v0, v1, v2, v3)             
543                                                   
544         SM4_CRYPT_BLK4(v0, v1, v2, v3)            
545                                                   
546         cmp             w4, #2                    
547                                                   
548         eor             v0.16b, v0.16b, v4.16b    
549         st1             {v0.16b}, [x1], #16       
550         blt             .Lctr_crypt_end           
551                                                   
552         eor             v1.16b, v1.16b, v5.16b    
553         st1             {v1.16b}, [x1], #16       
554         beq             .Lctr_crypt_end           
555                                                   
556         eor             v2.16b, v2.16b, v6.16b    
557         st1             {v2.16b}, [x1], #16       
558                                                   
559 .Lctr_crypt_end:                                  
560         /* store new CTR */                       
561         rev             x7, x7                    
562         rev             x8, x8                    
563         stp             x7, x8, [x3]              
564                                                   
565         ret                                       
566 SYM_FUNC_END(sm4_neon_ctr_crypt)                  
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php