~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/powerpc/crypto/chacha-p10le-8x.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/powerpc/crypto/chacha-p10le-8x.S (Version linux-6.12-rc7) and /arch/i386/crypto/chacha-p10le-8x.S (Version linux-5.10.229)


  1 /* SPDX-License-Identifier: GPL-2.0-or-later *    
  2 #                                                 
  3 # Accelerated chacha20 implementation for ppc6    
  4 #                                                 
  5 # Copyright 2023- IBM Corp. All rights reserve    
  6 #                                                 
  7 #=============================================    
  8 # Written by Danny Tsen <dtsen@us.ibm.com>         
  9 #                                                 
 10 # chacha_p10le_8x(u32 *state, byte *dst, const    
 11 #                                size_t len, i    
 12 #                                                 
 13 # do rounds,  8 quarter rounds                    
 14 # 1.  a += b; d ^= a; d <<<= 16;                  
 15 # 2.  c += d; b ^= c; b <<<= 12;                  
 16 # 3.  a += b; d ^= a; d <<<= 8;                   
 17 # 4.  c += d; b ^= c; b <<<= 7                    
 18 #                                                 
 19 # row1 = (row1 + row2),  row4 = row1 xor row4,    
 20 # row3 = (row3 + row4),  row2 = row3 xor row2,    
 21 # row1 = (row1 + row2), row4 = row1 xor row4,     
 22 # row3 = (row3 + row4), row2 = row3 xor row2,     
 23 #                                                 
 24 # 4 blocks (a b c d)                              
 25 #                                                 
 26 # a0 b0 c0 d0                                     
 27 # a1 b1 c1 d1                                     
 28 # ...                                             
 29 # a4 b4 c4 d4                                     
 30 # ...                                             
 31 # a8 b8 c8 d8                                     
 32 # ...                                             
 33 # a12 b12 c12 d12                                 
 34 # a13 ...                                         
 35 # a14 ...                                         
 36 # a15 b15 c15 d15                                 
 37 #                                                 
 38 # Column round (v0, v4,  v8, v12, v1, v5,  v9,    
 39 # Diagnal round (v0, v5, v10, v15, v1, v6, v11    
 40 #                                                 
 41                                                   
 42 #include <asm/ppc_asm.h>                          
 43 #include <asm/asm-offsets.h>                      
 44 #include <asm/asm-compat.h>                       
 45 #include <linux/linkage.h>                        
 46                                                   
 47 .machine        "any"                             
 48 .text                                             
 49                                                   
 50 .macro  SAVE_GPR GPR OFFSET FRAME                 
 51         std     \GPR,\OFFSET(\FRAME)              
 52 .endm                                             
 53                                                   
 54 .macro  SAVE_VRS VRS OFFSET FRAME                 
 55         li      16, \OFFSET                       
 56         stvx    \VRS, 16, \FRAME                  
 57 .endm                                             
 58                                                   
 59 .macro  SAVE_VSX VSX OFFSET FRAME                 
 60         li      16, \OFFSET                       
 61         stxvx   \VSX, 16, \FRAME                  
 62 .endm                                             
 63                                                   
 64 .macro  RESTORE_GPR GPR OFFSET FRAME              
 65         ld      \GPR,\OFFSET(\FRAME)              
 66 .endm                                             
 67                                                   
 68 .macro  RESTORE_VRS VRS OFFSET FRAME              
 69         li      16, \OFFSET                       
 70         lvx     \VRS, 16, \FRAME                  
 71 .endm                                             
 72                                                   
 73 .macro  RESTORE_VSX VSX OFFSET FRAME              
 74         li      16, \OFFSET                       
 75         lxvx    \VSX, 16, \FRAME                  
 76 .endm                                             
 77                                                   
 78 .macro SAVE_REGS                                  
 79         mflr 0                                    
 80         std 0, 16(1)                              
 81         stdu 1,-752(1)                            
 82                                                   
 83         SAVE_GPR 14, 112, 1                       
 84         SAVE_GPR 15, 120, 1                       
 85         SAVE_GPR 16, 128, 1                       
 86         SAVE_GPR 17, 136, 1                       
 87         SAVE_GPR 18, 144, 1                       
 88         SAVE_GPR 19, 152, 1                       
 89         SAVE_GPR 20, 160, 1                       
 90         SAVE_GPR 21, 168, 1                       
 91         SAVE_GPR 22, 176, 1                       
 92         SAVE_GPR 23, 184, 1                       
 93         SAVE_GPR 24, 192, 1                       
 94         SAVE_GPR 25, 200, 1                       
 95         SAVE_GPR 26, 208, 1                       
 96         SAVE_GPR 27, 216, 1                       
 97         SAVE_GPR 28, 224, 1                       
 98         SAVE_GPR 29, 232, 1                       
 99         SAVE_GPR 30, 240, 1                       
100         SAVE_GPR 31, 248, 1                       
101                                                   
102         addi    9, 1, 256                         
103         SAVE_VRS 20, 0, 9                         
104         SAVE_VRS 21, 16, 9                        
105         SAVE_VRS 22, 32, 9                        
106         SAVE_VRS 23, 48, 9                        
107         SAVE_VRS 24, 64, 9                        
108         SAVE_VRS 25, 80, 9                        
109         SAVE_VRS 26, 96, 9                        
110         SAVE_VRS 27, 112, 9                       
111         SAVE_VRS 28, 128, 9                       
112         SAVE_VRS 29, 144, 9                       
113         SAVE_VRS 30, 160, 9                       
114         SAVE_VRS 31, 176, 9                       
115                                                   
116         SAVE_VSX 14, 192, 9                       
117         SAVE_VSX 15, 208, 9                       
118         SAVE_VSX 16, 224, 9                       
119         SAVE_VSX 17, 240, 9                       
120         SAVE_VSX 18, 256, 9                       
121         SAVE_VSX 19, 272, 9                       
122         SAVE_VSX 20, 288, 9                       
123         SAVE_VSX 21, 304, 9                       
124         SAVE_VSX 22, 320, 9                       
125         SAVE_VSX 23, 336, 9                       
126         SAVE_VSX 24, 352, 9                       
127         SAVE_VSX 25, 368, 9                       
128         SAVE_VSX 26, 384, 9                       
129         SAVE_VSX 27, 400, 9                       
130         SAVE_VSX 28, 416, 9                       
131         SAVE_VSX 29, 432, 9                       
132         SAVE_VSX 30, 448, 9                       
133         SAVE_VSX 31, 464, 9                       
134 .endm # SAVE_REGS                                 
135                                                   
136 .macro RESTORE_REGS                               
137         addi    9, 1, 256                         
138         RESTORE_VRS 20, 0, 9                      
139         RESTORE_VRS 21, 16, 9                     
140         RESTORE_VRS 22, 32, 9                     
141         RESTORE_VRS 23, 48, 9                     
142         RESTORE_VRS 24, 64, 9                     
143         RESTORE_VRS 25, 80, 9                     
144         RESTORE_VRS 26, 96, 9                     
145         RESTORE_VRS 27, 112, 9                    
146         RESTORE_VRS 28, 128, 9                    
147         RESTORE_VRS 29, 144, 9                    
148         RESTORE_VRS 30, 160, 9                    
149         RESTORE_VRS 31, 176, 9                    
150                                                   
151         RESTORE_VSX 14, 192, 9                    
152         RESTORE_VSX 15, 208, 9                    
153         RESTORE_VSX 16, 224, 9                    
154         RESTORE_VSX 17, 240, 9                    
155         RESTORE_VSX 18, 256, 9                    
156         RESTORE_VSX 19, 272, 9                    
157         RESTORE_VSX 20, 288, 9                    
158         RESTORE_VSX 21, 304, 9                    
159         RESTORE_VSX 22, 320, 9                    
160         RESTORE_VSX 23, 336, 9                    
161         RESTORE_VSX 24, 352, 9                    
162         RESTORE_VSX 25, 368, 9                    
163         RESTORE_VSX 26, 384, 9                    
164         RESTORE_VSX 27, 400, 9                    
165         RESTORE_VSX 28, 416, 9                    
166         RESTORE_VSX 29, 432, 9                    
167         RESTORE_VSX 30, 448, 9                    
168         RESTORE_VSX 31, 464, 9                    
169                                                   
170         RESTORE_GPR 14, 112, 1                    
171         RESTORE_GPR 15, 120, 1                    
172         RESTORE_GPR 16, 128, 1                    
173         RESTORE_GPR 17, 136, 1                    
174         RESTORE_GPR 18, 144, 1                    
175         RESTORE_GPR 19, 152, 1                    
176         RESTORE_GPR 20, 160, 1                    
177         RESTORE_GPR 21, 168, 1                    
178         RESTORE_GPR 22, 176, 1                    
179         RESTORE_GPR 23, 184, 1                    
180         RESTORE_GPR 24, 192, 1                    
181         RESTORE_GPR 25, 200, 1                    
182         RESTORE_GPR 26, 208, 1                    
183         RESTORE_GPR 27, 216, 1                    
184         RESTORE_GPR 28, 224, 1                    
185         RESTORE_GPR 29, 232, 1                    
186         RESTORE_GPR 30, 240, 1                    
187         RESTORE_GPR 31, 248, 1                    
188                                                   
189         addi    1, 1, 752                         
190         ld 0, 16(1)                               
191         mtlr 0                                    
192 .endm # RESTORE_REGS                              
193                                                   
194 .macro QT_loop_8x                                 
195         # QR(v0, v4,  v8, v12, v1, v5,  v9, v1    
196         xxlor   0, 32+25, 32+25                   
197         xxlor   32+25, 20, 20                     
198         vadduwm 0, 0, 4                           
199         vadduwm 1, 1, 5                           
200         vadduwm 2, 2, 6                           
201         vadduwm 3, 3, 7                           
202           vadduwm 16, 16, 20                      
203           vadduwm 17, 17, 21                      
204           vadduwm 18, 18, 22                      
205           vadduwm 19, 19, 23                      
206                                                   
207           vpermxor 12, 12, 0, 25                  
208           vpermxor 13, 13, 1, 25                  
209           vpermxor 14, 14, 2, 25                  
210           vpermxor 15, 15, 3, 25                  
211           vpermxor 28, 28, 16, 25                 
212           vpermxor 29, 29, 17, 25                 
213           vpermxor 30, 30, 18, 25                 
214           vpermxor 31, 31, 19, 25                 
215         xxlor   32+25, 0, 0                       
216         vadduwm 8, 8, 12                          
217         vadduwm 9, 9, 13                          
218         vadduwm 10, 10, 14                        
219         vadduwm 11, 11, 15                        
220           vadduwm 24, 24, 28                      
221           vadduwm 25, 25, 29                      
222           vadduwm 26, 26, 30                      
223           vadduwm 27, 27, 31                      
224         vxor 4, 4, 8                              
225         vxor 5, 5, 9                              
226         vxor 6, 6, 10                             
227         vxor 7, 7, 11                             
228           vxor 20, 20, 24                         
229           vxor 21, 21, 25                         
230           vxor 22, 22, 26                         
231           vxor 23, 23, 27                         
232                                                   
233         xxlor   0, 32+25, 32+25                   
234         xxlor   32+25, 21, 21                     
235         vrlw 4, 4, 25  #                          
236         vrlw 5, 5, 25                             
237         vrlw 6, 6, 25                             
238         vrlw 7, 7, 25                             
239           vrlw 20, 20, 25  #                      
240           vrlw 21, 21, 25                         
241           vrlw 22, 22, 25                         
242           vrlw 23, 23, 25                         
243         xxlor   32+25, 0, 0                       
244         vadduwm 0, 0, 4                           
245         vadduwm 1, 1, 5                           
246         vadduwm 2, 2, 6                           
247         vadduwm 3, 3, 7                           
248           vadduwm 16, 16, 20                      
249           vadduwm 17, 17, 21                      
250           vadduwm 18, 18, 22                      
251           vadduwm 19, 19, 23                      
252                                                   
253         xxlor   0, 32+25, 32+25                   
254         xxlor   32+25, 22, 22                     
255           vpermxor 12, 12, 0, 25                  
256           vpermxor 13, 13, 1, 25                  
257           vpermxor 14, 14, 2, 25                  
258           vpermxor 15, 15, 3, 25                  
259           vpermxor 28, 28, 16, 25                 
260           vpermxor 29, 29, 17, 25                 
261           vpermxor 30, 30, 18, 25                 
262           vpermxor 31, 31, 19, 25                 
263         xxlor   32+25, 0, 0                       
264         vadduwm 8, 8, 12                          
265         vadduwm 9, 9, 13                          
266         vadduwm 10, 10, 14                        
267         vadduwm 11, 11, 15                        
268           vadduwm 24, 24, 28                      
269           vadduwm 25, 25, 29                      
270           vadduwm 26, 26, 30                      
271           vadduwm 27, 27, 31                      
272         xxlor   0, 32+28, 32+28                   
273         xxlor   32+28, 23, 23                     
274         vxor 4, 4, 8                              
275         vxor 5, 5, 9                              
276         vxor 6, 6, 10                             
277         vxor 7, 7, 11                             
278           vxor 20, 20, 24                         
279           vxor 21, 21, 25                         
280           vxor 22, 22, 26                         
281           vxor 23, 23, 27                         
282         vrlw 4, 4, 28  #                          
283         vrlw 5, 5, 28                             
284         vrlw 6, 6, 28                             
285         vrlw 7, 7, 28                             
286           vrlw 20, 20, 28  #                      
287           vrlw 21, 21, 28                         
288           vrlw 22, 22, 28                         
289           vrlw 23, 23, 28                         
290         xxlor   32+28, 0, 0                       
291                                                   
292         # QR(v0, v5, v10, v15, v1, v6, v11, v1    
293         xxlor   0, 32+25, 32+25                   
294         xxlor   32+25, 20, 20                     
295         vadduwm 0, 0, 5                           
296         vadduwm 1, 1, 6                           
297         vadduwm 2, 2, 7                           
298         vadduwm 3, 3, 4                           
299           vadduwm 16, 16, 21                      
300           vadduwm 17, 17, 22                      
301           vadduwm 18, 18, 23                      
302           vadduwm 19, 19, 20                      
303                                                   
304           vpermxor 15, 15, 0, 25                  
305           vpermxor 12, 12, 1, 25                  
306           vpermxor 13, 13, 2, 25                  
307           vpermxor 14, 14, 3, 25                  
308           vpermxor 31, 31, 16, 25                 
309           vpermxor 28, 28, 17, 25                 
310           vpermxor 29, 29, 18, 25                 
311           vpermxor 30, 30, 19, 25                 
312                                                   
313         xxlor   32+25, 0, 0                       
314         vadduwm 10, 10, 15                        
315         vadduwm 11, 11, 12                        
316         vadduwm 8, 8, 13                          
317         vadduwm 9, 9, 14                          
318           vadduwm 26, 26, 31                      
319           vadduwm 27, 27, 28                      
320           vadduwm 24, 24, 29                      
321           vadduwm 25, 25, 30                      
322         vxor 5, 5, 10                             
323         vxor 6, 6, 11                             
324         vxor 7, 7, 8                              
325         vxor 4, 4, 9                              
326           vxor 21, 21, 26                         
327           vxor 22, 22, 27                         
328           vxor 23, 23, 24                         
329           vxor 20, 20, 25                         
330                                                   
331         xxlor   0, 32+25, 32+25                   
332         xxlor   32+25, 21, 21                     
333         vrlw 5, 5, 25                             
334         vrlw 6, 6, 25                             
335         vrlw 7, 7, 25                             
336         vrlw 4, 4, 25                             
337           vrlw 21, 21, 25                         
338           vrlw 22, 22, 25                         
339           vrlw 23, 23, 25                         
340           vrlw 20, 20, 25                         
341         xxlor   32+25, 0, 0                       
342                                                   
343         vadduwm 0, 0, 5                           
344         vadduwm 1, 1, 6                           
345         vadduwm 2, 2, 7                           
346         vadduwm 3, 3, 4                           
347           vadduwm 16, 16, 21                      
348           vadduwm 17, 17, 22                      
349           vadduwm 18, 18, 23                      
350           vadduwm 19, 19, 20                      
351                                                   
352         xxlor   0, 32+25, 32+25                   
353         xxlor   32+25, 22, 22                     
354           vpermxor 15, 15, 0, 25                  
355           vpermxor 12, 12, 1, 25                  
356           vpermxor 13, 13, 2, 25                  
357           vpermxor 14, 14, 3, 25                  
358           vpermxor 31, 31, 16, 25                 
359           vpermxor 28, 28, 17, 25                 
360           vpermxor 29, 29, 18, 25                 
361           vpermxor 30, 30, 19, 25                 
362         xxlor   32+25, 0, 0                       
363                                                   
364         vadduwm 10, 10, 15                        
365         vadduwm 11, 11, 12                        
366         vadduwm 8, 8, 13                          
367         vadduwm 9, 9, 14                          
368           vadduwm 26, 26, 31                      
369           vadduwm 27, 27, 28                      
370           vadduwm 24, 24, 29                      
371           vadduwm 25, 25, 30                      
372                                                   
373         xxlor   0, 32+28, 32+28                   
374         xxlor   32+28, 23, 23                     
375         vxor 5, 5, 10                             
376         vxor 6, 6, 11                             
377         vxor 7, 7, 8                              
378         vxor 4, 4, 9                              
379           vxor 21, 21, 26                         
380           vxor 22, 22, 27                         
381           vxor 23, 23, 24                         
382           vxor 20, 20, 25                         
383         vrlw 5, 5, 28                             
384         vrlw 6, 6, 28                             
385         vrlw 7, 7, 28                             
386         vrlw 4, 4, 28                             
387           vrlw 21, 21, 28                         
388           vrlw 22, 22, 28                         
389           vrlw 23, 23, 28                         
390           vrlw 20, 20, 28                         
391         xxlor   32+28, 0, 0                       
392 .endm                                             
393                                                   
394 .macro QT_loop_4x                                 
395         # QR(v0, v4,  v8, v12, v1, v5,  v9, v1    
396         vadduwm 0, 0, 4                           
397         vadduwm 1, 1, 5                           
398         vadduwm 2, 2, 6                           
399         vadduwm 3, 3, 7                           
400           vpermxor 12, 12, 0, 20                  
401           vpermxor 13, 13, 1, 20                  
402           vpermxor 14, 14, 2, 20                  
403           vpermxor 15, 15, 3, 20                  
404         vadduwm 8, 8, 12                          
405         vadduwm 9, 9, 13                          
406         vadduwm 10, 10, 14                        
407         vadduwm 11, 11, 15                        
408         vxor 4, 4, 8                              
409         vxor 5, 5, 9                              
410         vxor 6, 6, 10                             
411         vxor 7, 7, 11                             
412         vrlw 4, 4, 21                             
413         vrlw 5, 5, 21                             
414         vrlw 6, 6, 21                             
415         vrlw 7, 7, 21                             
416         vadduwm 0, 0, 4                           
417         vadduwm 1, 1, 5                           
418         vadduwm 2, 2, 6                           
419         vadduwm 3, 3, 7                           
420           vpermxor 12, 12, 0, 22                  
421           vpermxor 13, 13, 1, 22                  
422           vpermxor 14, 14, 2, 22                  
423           vpermxor 15, 15, 3, 22                  
424         vadduwm 8, 8, 12                          
425         vadduwm 9, 9, 13                          
426         vadduwm 10, 10, 14                        
427         vadduwm 11, 11, 15                        
428         vxor 4, 4, 8                              
429         vxor 5, 5, 9                              
430         vxor 6, 6, 10                             
431         vxor 7, 7, 11                             
432         vrlw 4, 4, 23                             
433         vrlw 5, 5, 23                             
434         vrlw 6, 6, 23                             
435         vrlw 7, 7, 23                             
436                                                   
437         # QR(v0, v5, v10, v15, v1, v6, v11, v1    
438         vadduwm 0, 0, 5                           
439         vadduwm 1, 1, 6                           
440         vadduwm 2, 2, 7                           
441         vadduwm 3, 3, 4                           
442           vpermxor 15, 15, 0, 20                  
443           vpermxor 12, 12, 1, 20                  
444           vpermxor 13, 13, 2, 20                  
445           vpermxor 14, 14, 3, 20                  
446         vadduwm 10, 10, 15                        
447         vadduwm 11, 11, 12                        
448         vadduwm 8, 8, 13                          
449         vadduwm 9, 9, 14                          
450         vxor 5, 5, 10                             
451         vxor 6, 6, 11                             
452         vxor 7, 7, 8                              
453         vxor 4, 4, 9                              
454         vrlw 5, 5, 21                             
455         vrlw 6, 6, 21                             
456         vrlw 7, 7, 21                             
457         vrlw 4, 4, 21                             
458         vadduwm 0, 0, 5                           
459         vadduwm 1, 1, 6                           
460         vadduwm 2, 2, 7                           
461         vadduwm 3, 3, 4                           
462           vpermxor 15, 15, 0, 22                  
463           vpermxor 12, 12, 1, 22                  
464           vpermxor 13, 13, 2, 22                  
465           vpermxor 14, 14, 3, 22                  
466         vadduwm 10, 10, 15                        
467         vadduwm 11, 11, 12                        
468         vadduwm 8, 8, 13                          
469         vadduwm 9, 9, 14                          
470         vxor 5, 5, 10                             
471         vxor 6, 6, 11                             
472         vxor 7, 7, 8                              
473         vxor 4, 4, 9                              
474         vrlw 5, 5, 23                             
475         vrlw 6, 6, 23                             
476         vrlw 7, 7, 23                             
477         vrlw 4, 4, 23                             
478 .endm                                             
479                                                   
480 # Transpose                                       
481 .macro TP_4x a0 a1 a2 a3                          
482         xxmrghw  10, 32+\a0, 32+\a1     # a0,     
483         xxmrghw  11, 32+\a2, 32+\a3     # a2,     
484         xxmrglw  12, 32+\a0, 32+\a1     # c0,     
485         xxmrglw  13, 32+\a2, 32+\a3     # c2,     
486         xxpermdi        32+\a0, 10, 11, 0         
487         xxpermdi        32+\a1, 10, 11, 3         
488         xxpermdi        32+\a2, 12, 13, 0         
489         xxpermdi        32+\a3, 12, 13, 3         
490 .endm                                             
491                                                   
492 # key stream = working state + state              
493 .macro Add_state S                                
494         vadduwm \S+0, \S+0, 16-\S                 
495         vadduwm \S+4, \S+4, 17-\S                 
496         vadduwm \S+8, \S+8, 18-\S                 
497         vadduwm \S+12, \S+12, 19-\S               
498                                                   
499         vadduwm \S+1, \S+1, 16-\S                 
500         vadduwm \S+5, \S+5, 17-\S                 
501         vadduwm \S+9, \S+9, 18-\S                 
502         vadduwm \S+13, \S+13, 19-\S               
503                                                   
504         vadduwm \S+2, \S+2, 16-\S                 
505         vadduwm \S+6, \S+6, 17-\S                 
506         vadduwm \S+10, \S+10, 18-\S               
507         vadduwm \S+14, \S+14, 19-\S               
508                                                   
509         vadduwm \S+3, \S+3, 16-\S                 
510         vadduwm \S+7, \S+7, 17-\S                 
511         vadduwm \S+11, \S+11, 18-\S               
512         vadduwm \S+15, \S+15, 19-\S               
513 .endm                                             
514                                                   
515 #                                                 
516 # write 256 bytes                                 
517 #                                                 
518 .macro Write_256 S                                
519         add 9, 14, 5                              
520         add 16, 14, 4                             
521         lxvw4x 0, 0, 9                            
522         lxvw4x 1, 17, 9                           
523         lxvw4x 2, 18, 9                           
524         lxvw4x 3, 19, 9                           
525         lxvw4x 4, 20, 9                           
526         lxvw4x 5, 21, 9                           
527         lxvw4x 6, 22, 9                           
528         lxvw4x 7, 23, 9                           
529         lxvw4x 8, 24, 9                           
530         lxvw4x 9, 25, 9                           
531         lxvw4x 10, 26, 9                          
532         lxvw4x 11, 27, 9                          
533         lxvw4x 12, 28, 9                          
534         lxvw4x 13, 29, 9                          
535         lxvw4x 14, 30, 9                          
536         lxvw4x 15, 31, 9                          
537                                                   
538         xxlxor \S+32, \S+32, 0                    
539         xxlxor \S+36, \S+36, 1                    
540         xxlxor \S+40, \S+40, 2                    
541         xxlxor \S+44, \S+44, 3                    
542         xxlxor \S+33, \S+33, 4                    
543         xxlxor \S+37, \S+37, 5                    
544         xxlxor \S+41, \S+41, 6                    
545         xxlxor \S+45, \S+45, 7                    
546         xxlxor \S+34, \S+34, 8                    
547         xxlxor \S+38, \S+38, 9                    
548         xxlxor \S+42, \S+42, 10                   
549         xxlxor \S+46, \S+46, 11                   
550         xxlxor \S+35, \S+35, 12                   
551         xxlxor \S+39, \S+39, 13                   
552         xxlxor \S+43, \S+43, 14                   
553         xxlxor \S+47, \S+47, 15                   
554                                                   
555         stxvw4x \S+32, 0, 16                      
556         stxvw4x \S+36, 17, 16                     
557         stxvw4x \S+40, 18, 16                     
558         stxvw4x \S+44, 19, 16                     
559                                                   
560         stxvw4x \S+33, 20, 16                     
561         stxvw4x \S+37, 21, 16                     
562         stxvw4x \S+41, 22, 16                     
563         stxvw4x \S+45, 23, 16                     
564                                                   
565         stxvw4x \S+34, 24, 16                     
566         stxvw4x \S+38, 25, 16                     
567         stxvw4x \S+42, 26, 16                     
568         stxvw4x \S+46, 27, 16                     
569                                                   
570         stxvw4x \S+35, 28, 16                     
571         stxvw4x \S+39, 29, 16                     
572         stxvw4x \S+43, 30, 16                     
573         stxvw4x \S+47, 31, 16                     
574                                                   
575 .endm                                             
576                                                   
577 #                                                 
578 # chacha20_p10le_8x(u32 *state, byte *dst, con    
579 #                                                 
580 SYM_FUNC_START(chacha_p10le_8x)                   
581 .align 5                                          
582         cmpdi   6, 0                              
583         ble     Out_no_chacha                     
584                                                   
585         SAVE_REGS                                 
586                                                   
587         # r17 - r31 mainly for Write_256 macro    
588         li      17, 16                            
589         li      18, 32                            
590         li      19, 48                            
591         li      20, 64                            
592         li      21, 80                            
593         li      22, 96                            
594         li      23, 112                           
595         li      24, 128                           
596         li      25, 144                           
597         li      26, 160                           
598         li      27, 176                           
599         li      28, 192                           
600         li      29, 208                           
601         li      30, 224                           
602         li      31, 240                           
603                                                   
604         mr 15, 6                        # len     
605         li 14, 0                        # offs    
606                                                   
607         lxvw4x  48, 0, 3                #  vr1    
608         lxvw4x  49, 17, 3               #  vr1    
609         lxvw4x  50, 18, 3               #  vr1    
610         lxvw4x  51, 19, 3               #  vr1    
611                                                   
612         # create (0, 1, 2, 3) counters            
613         vspltisw 0, 0                             
614         vspltisw 1, 1                             
615         vspltisw 2, 2                             
616         vspltisw 3, 3                             
617         vmrghw  4, 0, 1                           
618         vmrglw  5, 2, 3                           
619         vsldoi  30, 4, 5, 8             # vr30    
620                                                   
621         vspltisw 21, 12                           
622         vspltisw 23, 7                            
623                                                   
624         addis   11, 2, permx@toc@ha               
625         addi    11, 11, permx@toc@l               
626         lxvw4x  32+20, 0, 11                      
627         lxvw4x  32+22, 17, 11                     
628                                                   
629         sradi   8, 7, 1                           
630                                                   
631         mtctr 8                                   
632                                                   
633         # save constants to vsx                   
634         xxlor   16, 48, 48                        
635         xxlor   17, 49, 49                        
636         xxlor   18, 50, 50                        
637         xxlor   19, 51, 51                        
638                                                   
639         vspltisw 25, 4                            
640         vspltisw 26, 8                            
641                                                   
642         xxlor   25, 32+26, 32+26                  
643         xxlor   24, 32+25, 32+25                  
644                                                   
645         vadduwm 31, 30, 25              # coun    
646         xxlor   30, 32+30, 32+30                  
647         xxlor   31, 32+31, 32+31                  
648                                                   
649         xxlor   20, 32+20, 32+20                  
650         xxlor   21, 32+21, 32+21                  
651         xxlor   22, 32+22, 32+22                  
652         xxlor   23, 32+23, 32+23                  
653                                                   
654         cmpdi   6, 512                            
655         blt     Loop_last                         
656                                                   
657 Loop_8x:                                          
658         xxspltw  32+0, 16, 0                      
659         xxspltw  32+1, 16, 1                      
660         xxspltw  32+2, 16, 2                      
661         xxspltw  32+3, 16, 3                      
662                                                   
663         xxspltw  32+4, 17, 0                      
664         xxspltw  32+5, 17, 1                      
665         xxspltw  32+6, 17, 2                      
666         xxspltw  32+7, 17, 3                      
667         xxspltw  32+8, 18, 0                      
668         xxspltw  32+9, 18, 1                      
669         xxspltw  32+10, 18, 2                     
670         xxspltw  32+11, 18, 3                     
671         xxspltw  32+12, 19, 0                     
672         xxspltw  32+13, 19, 1                     
673         xxspltw  32+14, 19, 2                     
674         xxspltw  32+15, 19, 3                     
675         vadduwm 12, 12, 30      # increase cou    
676                                                   
677         xxspltw  32+16, 16, 0                     
678         xxspltw  32+17, 16, 1                     
679         xxspltw  32+18, 16, 2                     
680         xxspltw  32+19, 16, 3                     
681                                                   
682         xxspltw  32+20, 17, 0                     
683         xxspltw  32+21, 17, 1                     
684         xxspltw  32+22, 17, 2                     
685         xxspltw  32+23, 17, 3                     
686         xxspltw  32+24, 18, 0                     
687         xxspltw  32+25, 18, 1                     
688         xxspltw  32+26, 18, 2                     
689         xxspltw  32+27, 18, 3                     
690         xxspltw  32+28, 19, 0                     
691         xxspltw  32+29, 19, 1                     
692         vadduwm 28, 28, 31      # increase cou    
693         xxspltw  32+30, 19, 2                     
694         xxspltw  32+31, 19, 3                     
695                                                   
696 .align 5                                          
697 quarter_loop_8x:                                  
698         QT_loop_8x                                
699                                                   
700         bdnz    quarter_loop_8x                   
701                                                   
702         xxlor   0, 32+30, 32+30                   
703         xxlor   32+30, 30, 30                     
704         vadduwm 12, 12, 30                        
705         xxlor   32+30, 0, 0                       
706         TP_4x 0, 1, 2, 3                          
707         TP_4x 4, 5, 6, 7                          
708         TP_4x 8, 9, 10, 11                        
709         TP_4x 12, 13, 14, 15                      
710                                                   
711         xxlor   0, 48, 48                         
712         xxlor   1, 49, 49                         
713         xxlor   2, 50, 50                         
714         xxlor   3, 51, 51                         
715         xxlor   48, 16, 16                        
716         xxlor   49, 17, 17                        
717         xxlor   50, 18, 18                        
718         xxlor   51, 19, 19                        
719         Add_state 0                               
720         xxlor   48, 0, 0                          
721         xxlor   49, 1, 1                          
722         xxlor   50, 2, 2                          
723         xxlor   51, 3, 3                          
724         Write_256 0                               
725         addi    14, 14, 256     # offset +=256    
726         addi    15, 15, -256    # len -=256       
727                                                   
728         xxlor   5, 32+31, 32+31                   
729         xxlor   32+31, 31, 31                     
730         vadduwm 28, 28, 31                        
731         xxlor   32+31, 5, 5                       
732         TP_4x 16+0, 16+1, 16+2, 16+3              
733         TP_4x 16+4, 16+5, 16+6, 16+7              
734         TP_4x 16+8, 16+9, 16+10, 16+11            
735         TP_4x 16+12, 16+13, 16+14, 16+15          
736                                                   
737         xxlor   32, 16, 16                        
738         xxlor   33, 17, 17                        
739         xxlor   34, 18, 18                        
740         xxlor   35, 19, 19                        
741         Add_state 16                              
742         Write_256 16                              
743         addi    14, 14, 256     # offset +=256    
744         addi    15, 15, -256    # len +=256       
745                                                   
746         xxlor   32+24, 24, 24                     
747         xxlor   32+25, 25, 25                     
748         xxlor   32+30, 30, 30                     
749         vadduwm 30, 30, 25                        
750         vadduwm 31, 30, 24                        
751         xxlor   30, 32+30, 32+30                  
752         xxlor   31, 32+31, 32+31                  
753                                                   
754         cmpdi   15, 0                             
755         beq     Out_loop                          
756                                                   
757         cmpdi   15, 512                           
758         blt     Loop_last                         
759                                                   
760         mtctr 8                                   
761         b Loop_8x                                 
762                                                   
763 Loop_last:                                        
764         lxvw4x  48, 0, 3                #  vr1    
765         lxvw4x  49, 17, 3               #  vr1    
766         lxvw4x  50, 18, 3               #  vr1    
767         lxvw4x  51, 19, 3               #  vr1    
768                                                   
769         vspltisw 21, 12                           
770         vspltisw 23, 7                            
771         addis   11, 2, permx@toc@ha               
772         addi    11, 11, permx@toc@l               
773         lxvw4x  32+20, 0, 11                      
774         lxvw4x  32+22, 17, 11                     
775                                                   
776         sradi   8, 7, 1                           
777         mtctr 8                                   
778                                                   
779 Loop_4x:                                          
780         vspltw  0, 16, 0                          
781         vspltw  1, 16, 1                          
782         vspltw  2, 16, 2                          
783         vspltw  3, 16, 3                          
784                                                   
785         vspltw  4, 17, 0                          
786         vspltw  5, 17, 1                          
787         vspltw  6, 17, 2                          
788         vspltw  7, 17, 3                          
789         vspltw  8, 18, 0                          
790         vspltw  9, 18, 1                          
791         vspltw  10, 18, 2                         
792         vspltw  11, 18, 3                         
793         vspltw  12, 19, 0                         
794         vadduwm 12, 12, 30      # increase cou    
795         vspltw  13, 19, 1                         
796         vspltw  14, 19, 2                         
797         vspltw  15, 19, 3                         
798                                                   
799 .align 5                                          
800 quarter_loop:                                     
801         QT_loop_4x                                
802                                                   
803         bdnz    quarter_loop                      
804                                                   
805         vadduwm 12, 12, 30                        
806         TP_4x 0, 1, 2, 3                          
807         TP_4x 4, 5, 6, 7                          
808         TP_4x 8, 9, 10, 11                        
809         TP_4x 12, 13, 14, 15                      
810                                                   
811         Add_state 0                               
812         Write_256 0                               
813         addi    14, 14, 256     # offset += 25    
814         addi    15, 15, -256    # len += 256      
815                                                   
816         # Update state counter                    
817         vspltisw 25, 4                            
818         vadduwm 30, 30, 25                        
819                                                   
820         cmpdi   15, 0                             
821         beq     Out_loop                          
822         cmpdi   15, 256                           
823         blt     Out_loop                          
824                                                   
825         mtctr 8                                   
826         b Loop_4x                                 
827                                                   
828 Out_loop:                                         
829         RESTORE_REGS                              
830         blr                                       
831                                                   
832 Out_no_chacha:                                    
833         li      3, 0                              
834         blr                                       
835 SYM_FUNC_END(chacha_p10le_8x)                     
836                                                   
837 SYM_DATA_START_LOCAL(PERMX)                       
838 .align 5                                          
839 permx:                                            
840 .long 0x22330011, 0x66774455, 0xaabb8899, 0xee    
841 .long 0x11223300, 0x55667744, 0x99aabb88, 0xdd    
842 SYM_DATA_END(PERMX)                               
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php