~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm/crypto/chacha-scalar-core.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/arm/crypto/chacha-scalar-core.S (Architecture i386) and /arch/sparc64/crypto/chacha-scalar-core.S (Architecture sparc64)


  1 /* SPDX-License-Identifier: GPL-2.0 */            
  2 /*                                                
  3  * Copyright (C) 2018 Google, Inc.                
  4  */                                               
  5                                                   
  6 #include <linux/linkage.h>                        
  7 #include <asm/assembler.h>                        
  8                                                   
  9 /*                                                
 10  * Design notes:                                  
 11  *                                                
 12  * 16 registers would be needed to hold the st    
 13  * available because 'sp' and 'pc' cannot be u    
 14  * (x8, x9) to the stack and swap them out wit    
 15  * 'ldrd' and one 'strd' instruction per round    
 16  *                                                
 17  * All rotates are performed using the implici    
 18  * 'add' and 'eor' instructions.  This is fast    
 19  * instructions.  To make this work, we allow     
 20  * rows of the ChaCha state matrix (rows 'b' a    
 21  * wrong rotation amount.  The rotation amount    
 22  * when the values are used.  'brot' is the nu    
 23  * need to be rotated right to arrive at the c    
 24  * similarly for row 'd'.  (brot, drot) start     
 25  * that they end up as (25, 24) after every ro    
 26  */                                               
 27                                                   
 28         // ChaCha state registers                 
 29         X0      .req    r0                        
 30         X1      .req    r1                        
 31         X2      .req    r2                        
 32         X3      .req    r3                        
 33         X4      .req    r4                        
 34         X5      .req    r5                        
 35         X6      .req    r6                        
 36         X7      .req    r7                        
 37         X8_X10  .req    r8      // shared by x    
 38         X9_X11  .req    r9      // shared by x    
 39         X12     .req    r10                       
 40         X13     .req    r11                       
 41         X14     .req    r12                       
 42         X15     .req    r14                       
 43                                                   
 44 .macro _le32_bswap_4x   a, b, c, d,  tmp          
 45 #ifdef __ARMEB__                                  
 46         rev_l           \a,  \tmp                 
 47         rev_l           \b,  \tmp                 
 48         rev_l           \c,  \tmp                 
 49         rev_l           \d,  \tmp                 
 50 #endif                                            
 51 .endm                                             
 52                                                   
 53 .macro __ldrd           a, b, src, offset         
 54 #if __LINUX_ARM_ARCH__ >= 6                       
 55         ldrd            \a, \b, [\src, #\offse    
 56 #else                                             
 57         ldr             \a, [\src, #\offset]      
 58         ldr             \b, [\src, #\offset +     
 59 #endif                                            
 60 .endm                                             
 61                                                   
 62 .macro __strd           a, b, dst, offset         
 63 #if __LINUX_ARM_ARCH__ >= 6                       
 64         strd            \a, \b, [\dst, #\offse    
 65 #else                                             
 66         str             \a, [\dst, #\offset]      
 67         str             \b, [\dst, #\offset +     
 68 #endif                                            
 69 .endm                                             
 70                                                   
 71 .macro _halfround       a1, b1, c1, d1,  a2, b    
 72                                                   
 73         // a += b; d ^= a; d = rol(d, 16);        
 74         add             \a1, \a1, \b1, ror #br    
 75         add             \a2, \a2, \b2, ror #br    
 76         eor             \d1, \a1, \d1, ror #dr    
 77         eor             \d2, \a2, \d2, ror #dr    
 78         // drot == 32 - 16 == 16                  
 79                                                   
 80         // c += d; b ^= c; b = rol(b, 12);        
 81         add             \c1, \c1, \d1, ror #16    
 82         add             \c2, \c2, \d2, ror #16    
 83         eor             \b1, \c1, \b1, ror #br    
 84         eor             \b2, \c2, \b2, ror #br    
 85         // brot == 32 - 12 == 20                  
 86                                                   
 87         // a += b; d ^= a; d = rol(d, 8);         
 88         add             \a1, \a1, \b1, ror #20    
 89         add             \a2, \a2, \b2, ror #20    
 90         eor             \d1, \a1, \d1, ror #16    
 91         eor             \d2, \a2, \d2, ror #16    
 92         // drot == 32 - 8 == 24                   
 93                                                   
 94         // c += d; b ^= c; b = rol(b, 7);         
 95         add             \c1, \c1, \d1, ror #24    
 96         add             \c2, \c2, \d2, ror #24    
 97         eor             \b1, \c1, \b1, ror #20    
 98         eor             \b2, \c2, \b2, ror #20    
 99         // brot == 32 - 7 == 25                   
100 .endm                                             
101                                                   
102 .macro _doubleround                               
103                                                   
104         // column round                           
105                                                   
106         // quarterrounds: (x0, x4, x8, x12) an    
107         _halfround      X0, X4, X8_X10, X12,      
108                                                   
109         // save (x8, x9); restore (x10, x11)      
110         __strd          X8_X10, X9_X11, sp, 0     
111         __ldrd          X8_X10, X9_X11, sp, 8     
112                                                   
113         // quarterrounds: (x2, x6, x10, x14) a    
114         _halfround      X2, X6, X8_X10, X14,      
115                                                   
116         .set brot, 25                             
117         .set drot, 24                             
118                                                   
119         // diagonal round                         
120                                                   
121         // quarterrounds: (x0, x5, x10, x15) a    
122         _halfround      X0, X5, X8_X10, X15,      
123                                                   
124         // save (x10, x11); restore (x8, x9)      
125         __strd          X8_X10, X9_X11, sp, 8     
126         __ldrd          X8_X10, X9_X11, sp, 0     
127                                                   
128         // quarterrounds: (x2, x7, x8, x13) an    
129         _halfround      X2, X7, X8_X10, X13,      
130 .endm                                             
131                                                   
132 .macro _chacha_permute  nrounds                   
133         .set brot, 0                              
134         .set drot, 0                              
135         .rept \nrounds / 2                        
136          _doubleround                             
137         .endr                                     
138 .endm                                             
139                                                   
140 .macro _chacha          nrounds                   
141                                                   
142 .Lnext_block\@:                                   
143         // Stack: unused0-unused1 x10-x11 x0-x    
144         // Registers contain x0-x9,x12-x15.       
145                                                   
146         // Do the core ChaCha permutation to u    
147         _chacha_permute \nrounds                  
148                                                   
149         add             sp, #8                    
150         // Stack: x10-x11 orig_x0-orig_x15 OUT    
151         // Registers contain x0-x9,x12-x15.       
152         // x4-x7 are rotated by 'brot'; x12-x1    
153                                                   
154         // Free up some registers (r8-r12,r14)    
155         push            {X8_X10, X9_X11, X12,     
156                                                   
157         // Load (OUT, IN, LEN).                   
158         ldr             r14, [sp, #96]            
159         ldr             r12, [sp, #100]           
160         ldr             r11, [sp, #104]           
161                                                   
162         orr             r10, r14, r12             
163                                                   
164         // Use slow path if fewer than 64 byte    
165         cmp             r11, #64                  
166         blt             .Lxor_slowpath\@          
167                                                   
168         // Use slow path if IN and/or OUT isn'    
169         // ARMv6+, since ldmia and stmia (used    
170         tst             r10, #3                   
171         bne             .Lxor_slowpath\@          
172                                                   
173         // Fast path: XOR 64 bytes of aligned     
174                                                   
175         // Stack: x8-x9 x12-x15 x10-x11 orig_x    
176         // Registers: r0-r7 are x0-x7; r8-r11     
177         // x4-x7 are rotated by 'brot'; x12-x1    
178                                                   
179         // x0-x3                                  
180         __ldrd          r8, r9, sp, 32            
181         __ldrd          r10, r11, sp, 40          
182         add             X0, X0, r8                
183         add             X1, X1, r9                
184         add             X2, X2, r10               
185         add             X3, X3, r11               
186         _le32_bswap_4x  X0, X1, X2, X3,  r8       
187         ldmia           r12!, {r8-r11}            
188         eor             X0, X0, r8                
189         eor             X1, X1, r9                
190         eor             X2, X2, r10               
191         eor             X3, X3, r11               
192         stmia           r14!, {X0-X3}             
193                                                   
194         // x4-x7                                  
195         __ldrd          r8, r9, sp, 48            
196         __ldrd          r10, r11, sp, 56          
197         add             X4, r8, X4, ror #brot     
198         add             X5, r9, X5, ror #brot     
199         ldmia           r12!, {X0-X3}             
200         add             X6, r10, X6, ror #brot    
201         add             X7, r11, X7, ror #brot    
202         _le32_bswap_4x  X4, X5, X6, X7,  r8       
203         eor             X4, X4, X0                
204         eor             X5, X5, X1                
205         eor             X6, X6, X2                
206         eor             X7, X7, X3                
207         stmia           r14!, {X4-X7}             
208                                                   
209         // x8-x15                                 
210         pop             {r0-r7}                   
211         __ldrd          r8, r9, sp, 32            
212         __ldrd          r10, r11, sp, 40          
213         add             r0, r0, r8                
214         add             r1, r1, r9                
215         add             r6, r6, r10               
216         add             r7, r7, r11               
217         _le32_bswap_4x  r0, r1, r6, r7,  r8       
218         ldmia           r12!, {r8-r11}            
219         eor             r0, r0, r8                
220         eor             r1, r1, r9                
221         eor             r6, r6, r10               
222         eor             r7, r7, r11               
223         stmia           r14!, {r0,r1,r6,r7}       
224         ldmia           r12!, {r0,r1,r6,r7}       
225         __ldrd          r8, r9, sp, 48            
226         __ldrd          r10, r11, sp, 56          
227         add             r2, r8, r2, ror #drot     
228         add             r3, r9, r3, ror #drot     
229         add             r4, r10, r4, ror #drot    
230         add             r5, r11, r5, ror #drot    
231         _le32_bswap_4x  r2, r3, r4, r5,  r9       
232           ldr           r9, [sp, #72]             
233         eor             r2, r2, r0                
234         eor             r3, r3, r1                
235         eor             r4, r4, r6                
236         eor             r5, r5, r7                
237           subs          r9, #64                   
238         stmia           r14!, {r2-r5}             
239                                                   
240         beq             .Ldone\@                  
241                                                   
242 .Lprepare_for_next_block\@:                       
243                                                   
244         // Stack: x0-x15 OUT IN LEN               
245                                                   
246         // Increment block counter (x12)          
247         add             r8, #1                    
248                                                   
249         // Store updated (OUT, IN, LEN)           
250         str             r14, [sp, #64]            
251         str             r12, [sp, #68]            
252         str             r9, [sp, #72]             
253                                                   
254           mov           r14, sp                   
255                                                   
256         // Store updated block counter (x12)      
257         str             r8, [sp, #48]             
258                                                   
259           sub           sp, #16                   
260                                                   
261         // Reload state and do next block         
262         ldmia           r14!, {r0-r11}            
263         __strd          r10, r11, sp, 8           
264         ldmia           r14, {r10-r12,r14}        
265         b               .Lnext_block\@            
266                                                   
267 .Lxor_slowpath\@:                                 
268         // Slow path: < 64 bytes remaining, or    
269         // We handle it by storing the 64 byte    
270         // XOR-ing the needed portion with the    
271                                                   
272         // Allocate keystream buffer              
273         sub             sp, #64                   
274         mov             r14, sp                   
275                                                   
276         // Stack: ks0-ks15 x8-x9 x12-x15 x10-x    
277         // Registers: r0-r7 are x0-x7; r8-r11     
278         // x4-x7 are rotated by 'brot'; x12-x1    
279                                                   
280         // Save keystream for x0-x3               
281         __ldrd          r8, r9, sp, 96            
282         __ldrd          r10, r11, sp, 104         
283         add             X0, X0, r8                
284         add             X1, X1, r9                
285         add             X2, X2, r10               
286         add             X3, X3, r11               
287         _le32_bswap_4x  X0, X1, X2, X3,  r8       
288         stmia           r14!, {X0-X3}             
289                                                   
290         // Save keystream for x4-x7               
291         __ldrd          r8, r9, sp, 112           
292         __ldrd          r10, r11, sp, 120         
293         add             X4, r8, X4, ror #brot     
294         add             X5, r9, X5, ror #brot     
295         add             X6, r10, X6, ror #brot    
296         add             X7, r11, X7, ror #brot    
297         _le32_bswap_4x  X4, X5, X6, X7,  r8       
298           add           r8, sp, #64               
299         stmia           r14!, {X4-X7}             
300                                                   
301         // Save keystream for x8-x15              
302         ldm             r8, {r0-r7}               
303         __ldrd          r8, r9, sp, 128           
304         __ldrd          r10, r11, sp, 136         
305         add             r0, r0, r8                
306         add             r1, r1, r9                
307         add             r6, r6, r10               
308         add             r7, r7, r11               
309         _le32_bswap_4x  r0, r1, r6, r7,  r8       
310         stmia           r14!, {r0,r1,r6,r7}       
311         __ldrd          r8, r9, sp, 144           
312         __ldrd          r10, r11, sp, 152         
313         add             r2, r8, r2, ror #drot     
314         add             r3, r9, r3, ror #drot     
315         add             r4, r10, r4, ror #drot    
316         add             r5, r11, r5, ror #drot    
317         _le32_bswap_4x  r2, r3, r4, r5,  r9       
318         stmia           r14, {r2-r5}              
319                                                   
320         // Stack: ks0-ks15 unused0-unused7 x0-    
321         // Registers: r8 is block counter, r12    
322                                                   
323         ldr             r9, [sp, #168]            
324         ldr             r14, [sp, #160]           
325         cmp             r9, #64                   
326           mov           r0, sp                    
327         movle           r1, r9                    
328         movgt           r1, #64                   
329         // r1 is number of bytes to XOR, in ra    
330                                                   
331 .if __LINUX_ARM_ARCH__ < 6                        
332         orr             r2, r12, r14              
333         tst             r2, #3                    
334         bne             .Lxor_next_byte\@         
335 .endif                                            
336                                                   
337         // XOR a word at a time                   
338 .rept 16                                          
339         subs            r1, #4                    
340         blt             .Lxor_words_done\@        
341         ldr             r2, [r12], #4             
342         ldr             r3, [r0], #4              
343         eor             r2, r2, r3                
344         str             r2, [r14], #4             
345 .endr                                             
346         b               .Lxor_slowpath_done\@     
347 .Lxor_words_done\@:                               
348         ands            r1, r1, #3                
349         beq             .Lxor_slowpath_done\@     
350                                                   
351         // XOR a byte at a time                   
352 .Lxor_next_byte\@:                                
353         ldrb            r2, [r12], #1             
354         ldrb            r3, [r0], #1              
355         eor             r2, r2, r3                
356         strb            r2, [r14], #1             
357         subs            r1, #1                    
358         bne             .Lxor_next_byte\@         
359                                                   
360 .Lxor_slowpath_done\@:                            
361         subs            r9, #64                   
362         add             sp, #96                   
363         bgt             .Lprepare_for_next_blo    
364                                                   
365 .Ldone\@:                                         
366 .endm   // _chacha                                
367                                                   
368 /*                                                
369  * void chacha_doarm(u8 *dst, const u8 *src, u    
370  *                   const u32 *state, int nro    
371  */                                               
372 ENTRY(chacha_doarm)                               
373         cmp             r2, #0                    
374         reteq           lr                        
375                                                   
376         ldr             ip, [sp]                  
377         cmp             ip, #12                   
378                                                   
379         push            {r0-r2,r4-r11,lr}         
380                                                   
381         // Push state x0-x15 onto stack.          
382         // Also store an extra copy of x10-x11    
383                                                   
384         add             X12, r3, #48              
385         ldm             X12, {X12,X13,X14,X15}    
386         push            {X12,X13,X14,X15}         
387         sub             sp, sp, #64               
388                                                   
389         __ldrd          X8_X10, X9_X11, r3, 40    
390         __strd          X8_X10, X9_X11, sp, 8     
391         __strd          X8_X10, X9_X11, sp, 56    
392         ldm             r3, {X0-X9_X11}           
393         __strd          X0, X1, sp, 16            
394         __strd          X2, X3, sp, 24            
395         __strd          X4, X5, sp, 32            
396         __strd          X6, X7, sp, 40            
397         __strd          X8_X10, X9_X11, sp, 48    
398                                                   
399         beq             1f                        
400         _chacha         20                        
401                                                   
402 0:      add             sp, #76                   
403         pop             {r4-r11, pc}              
404                                                   
405 1:      _chacha         12                        
406         b               0b                        
407 ENDPROC(chacha_doarm)                             
408                                                   
409 /*                                                
410  * void hchacha_block_arm(const u32 state[16],    
411  */                                               
412 ENTRY(hchacha_block_arm)                          
413         push            {r1,r4-r11,lr}            
414                                                   
415         cmp             r2, #12                   
416                                                   
417         mov             r14, r0                   
418         ldmia           r14!, {r0-r11}            
419         push            {r10-r11}                 
420         ldm             r14, {r10-r12,r14}        
421         sub             sp, #8                    
422                                                   
423         beq             1f                        
424         _chacha_permute 20                        
425                                                   
426         // Skip over (unused0-unused1, x10-x11    
427 0:      add             sp, #16                   
428                                                   
429         // Fix up rotations of x12-x15            
430         ror             X12, X12, #drot           
431         ror             X13, X13, #drot           
432           pop           {r4}                      
433         ror             X14, X14, #drot           
434         ror             X15, X15, #drot           
435                                                   
436         // Store (x0-x3,x12-x15) to 'out'         
437         stm             r4, {X0,X1,X2,X3,X12,X    
438                                                   
439         pop             {r4-r11,pc}               
440                                                   
441 1:      _chacha_permute 12                        
442         b               0b                        
443 ENDPROC(hchacha_block_arm)                        
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php