~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/powerpc/crypto/poly1305-p10le_64.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/powerpc/crypto/poly1305-p10le_64.S (Architecture i386) and /arch/ppc/crypto/poly1305-p10le_64.S (Architecture ppc)


  1 /* SPDX-License-Identifier: GPL-2.0-or-later *    
  2 #                                                 
  3 # Accelerated poly1305 implementation for ppc6    
  4 #                                                 
  5 # Copyright 2023- IBM Corp. All rights reserve    
  6 #                                                 
  7 #=============================================    
  8 # Written by Danny Tsen <dtsen@us.ibm.com>         
  9 #                                                 
 10 # Poly1305 - this version mainly using vector/    
 11 #  - 26 bits limbs                                
 12 #  - Handle multiple 64 byte blcok.               
 13 #                                                 
 14 # Block size 16 bytes                             
 15 # key = (r, s)                                    
 16 # clamp r &= 0x0FFFFFFC0FFFFFFC 0x0FFFFFFC0FFF    
 17 # p = 2^130 - 5                                   
 18 # a += m                                          
 19 # a = (r + a) % p                                 
 20 # a += s                                          
 21 #                                                 
 22 # Improve performance by breaking down polynom    
 23 #     h4 = m1 * r⁴ + m2 * r³ + m3 * r² + m    
 24 #                                                 
 25 #  07/22/21 - this revison based on the above     
 26 #             to 9 vectors for multiplications    
 27 #                                                 
 28 # setup r^4, r^3, r^2, r vectors                  
 29 #    vs    [r^1, r^3, r^2, r^4]                   
 30 #    vs0 = [r0,.....]                             
 31 #    vs1 = [r1,.....]                             
 32 #    vs2 = [r2,.....]                             
 33 #    vs3 = [r3,.....]                             
 34 #    vs4 = [r4,.....]                             
 35 #    vs5 = [r1*5,...]                             
 36 #    vs6 = [r2*5,...]                             
 37 #    vs7 = [r2*5,...]                             
 38 #    vs8 = [r4*5,...]                             
 39 #                                                 
 40 #  Each word in a vector consists a member of     
 41 #                                                 
 42 # r0, r4*5, r3*5, r2*5, r1*5;                     
 43 # r1, r0,   r4*5, r3*5, r2*5;                     
 44 # r2, r1,   r0,   r4*5, r3*5;                     
 45 # r3, r2,   r1,   r0,   r4*5;                     
 46 # r4, r3,   r2,   r1,   r0  ;                     
 47 #                                                 
 48 #                                                 
 49 # poly1305_p10le_4blocks( uint8_t *k, uint32_t    
 50 #  k = 32 bytes key                               
 51 #  r3 = k (r, s)                                  
 52 #  r4 = mlen                                      
 53 #  r5 = m                                         
 54 #                                                 
 55 #include <asm/ppc_asm.h>                          
 56 #include <asm/asm-offsets.h>                      
 57 #include <asm/asm-compat.h>                       
 58 #include <linux/linkage.h>                        
 59                                                   
 60 .machine "any"                                    
 61                                                   
 62 .text                                             
 63                                                   
 64 .macro  SAVE_GPR GPR OFFSET FRAME                 
 65         std     \GPR,\OFFSET(\FRAME)              
 66 .endm                                             
 67                                                   
 68 .macro  SAVE_VRS VRS OFFSET FRAME                 
 69         li      16, \OFFSET                       
 70         stvx    \VRS, 16, \FRAME                  
 71 .endm                                             
 72                                                   
 73 .macro  SAVE_VSX VSX OFFSET FRAME                 
 74         li      16, \OFFSET                       
 75         stxvx   \VSX, 16, \FRAME                  
 76 .endm                                             
 77                                                   
 78 .macro  RESTORE_GPR GPR OFFSET FRAME              
 79         ld      \GPR,\OFFSET(\FRAME)              
 80 .endm                                             
 81                                                   
 82 .macro  RESTORE_VRS VRS OFFSET FRAME              
 83         li      16, \OFFSET                       
 84         lvx     \VRS, 16, \FRAME                  
 85 .endm                                             
 86                                                   
 87 .macro  RESTORE_VSX VSX OFFSET FRAME              
 88         li      16, \OFFSET                       
 89         lxvx    \VSX, 16, \FRAME                  
 90 .endm                                             
 91                                                   
 92 .macro SAVE_REGS                                  
 93         mflr 0                                    
 94         std 0, 16(1)                              
 95         stdu 1,-752(1)                            
 96                                                   
 97         SAVE_GPR 14, 112, 1                       
 98         SAVE_GPR 15, 120, 1                       
 99         SAVE_GPR 16, 128, 1                       
100         SAVE_GPR 17, 136, 1                       
101         SAVE_GPR 18, 144, 1                       
102         SAVE_GPR 19, 152, 1                       
103         SAVE_GPR 20, 160, 1                       
104         SAVE_GPR 21, 168, 1                       
105         SAVE_GPR 22, 176, 1                       
106         SAVE_GPR 23, 184, 1                       
107         SAVE_GPR 24, 192, 1                       
108         SAVE_GPR 25, 200, 1                       
109         SAVE_GPR 26, 208, 1                       
110         SAVE_GPR 27, 216, 1                       
111         SAVE_GPR 28, 224, 1                       
112         SAVE_GPR 29, 232, 1                       
113         SAVE_GPR 30, 240, 1                       
114         SAVE_GPR 31, 248, 1                       
115                                                   
116         addi    9, 1, 256                         
117         SAVE_VRS 20, 0, 9                         
118         SAVE_VRS 21, 16, 9                        
119         SAVE_VRS 22, 32, 9                        
120         SAVE_VRS 23, 48, 9                        
121         SAVE_VRS 24, 64, 9                        
122         SAVE_VRS 25, 80, 9                        
123         SAVE_VRS 26, 96, 9                        
124         SAVE_VRS 27, 112, 9                       
125         SAVE_VRS 28, 128, 9                       
126         SAVE_VRS 29, 144, 9                       
127         SAVE_VRS 30, 160, 9                       
128         SAVE_VRS 31, 176, 9                       
129                                                   
130         SAVE_VSX 14, 192, 9                       
131         SAVE_VSX 15, 208, 9                       
132         SAVE_VSX 16, 224, 9                       
133         SAVE_VSX 17, 240, 9                       
134         SAVE_VSX 18, 256, 9                       
135         SAVE_VSX 19, 272, 9                       
136         SAVE_VSX 20, 288, 9                       
137         SAVE_VSX 21, 304, 9                       
138         SAVE_VSX 22, 320, 9                       
139         SAVE_VSX 23, 336, 9                       
140         SAVE_VSX 24, 352, 9                       
141         SAVE_VSX 25, 368, 9                       
142         SAVE_VSX 26, 384, 9                       
143         SAVE_VSX 27, 400, 9                       
144         SAVE_VSX 28, 416, 9                       
145         SAVE_VSX 29, 432, 9                       
146         SAVE_VSX 30, 448, 9                       
147         SAVE_VSX 31, 464, 9                       
148 .endm # SAVE_REGS                                 
149                                                   
150 .macro RESTORE_REGS                               
151         addi    9, 1, 256                         
152         RESTORE_VRS 20, 0, 9                      
153         RESTORE_VRS 21, 16, 9                     
154         RESTORE_VRS 22, 32, 9                     
155         RESTORE_VRS 23, 48, 9                     
156         RESTORE_VRS 24, 64, 9                     
157         RESTORE_VRS 25, 80, 9                     
158         RESTORE_VRS 26, 96, 9                     
159         RESTORE_VRS 27, 112, 9                    
160         RESTORE_VRS 28, 128, 9                    
161         RESTORE_VRS 29, 144, 9                    
162         RESTORE_VRS 30, 160, 9                    
163         RESTORE_VRS 31, 176, 9                    
164                                                   
165         RESTORE_VSX 14, 192, 9                    
166         RESTORE_VSX 15, 208, 9                    
167         RESTORE_VSX 16, 224, 9                    
168         RESTORE_VSX 17, 240, 9                    
169         RESTORE_VSX 18, 256, 9                    
170         RESTORE_VSX 19, 272, 9                    
171         RESTORE_VSX 20, 288, 9                    
172         RESTORE_VSX 21, 304, 9                    
173         RESTORE_VSX 22, 320, 9                    
174         RESTORE_VSX 23, 336, 9                    
175         RESTORE_VSX 24, 352, 9                    
176         RESTORE_VSX 25, 368, 9                    
177         RESTORE_VSX 26, 384, 9                    
178         RESTORE_VSX 27, 400, 9                    
179         RESTORE_VSX 28, 416, 9                    
180         RESTORE_VSX 29, 432, 9                    
181         RESTORE_VSX 30, 448, 9                    
182         RESTORE_VSX 31, 464, 9                    
183                                                   
184         RESTORE_GPR 14, 112, 1                    
185         RESTORE_GPR 15, 120, 1                    
186         RESTORE_GPR 16, 128, 1                    
187         RESTORE_GPR 17, 136, 1                    
188         RESTORE_GPR 18, 144, 1                    
189         RESTORE_GPR 19, 152, 1                    
190         RESTORE_GPR 20, 160, 1                    
191         RESTORE_GPR 21, 168, 1                    
192         RESTORE_GPR 22, 176, 1                    
193         RESTORE_GPR 23, 184, 1                    
194         RESTORE_GPR 24, 192, 1                    
195         RESTORE_GPR 25, 200, 1                    
196         RESTORE_GPR 26, 208, 1                    
197         RESTORE_GPR 27, 216, 1                    
198         RESTORE_GPR 28, 224, 1                    
199         RESTORE_GPR 29, 232, 1                    
200         RESTORE_GPR 30, 240, 1                    
201         RESTORE_GPR 31, 248, 1                    
202                                                   
203         addi    1, 1, 752                         
204         ld 0, 16(1)                               
205         mtlr 0                                    
206 .endm # RESTORE_REGS                              
207                                                   
208 #                                                 
209 # p[0] = a0*r0 + a1*r4*5 + a2*r3*5 + a3*r2*5 +    
210 # p[1] = a0*r1 + a1*r0   + a2*r4*5 + a3*r3*5 +    
211 # p[2] = a0*r2 + a1*r1   + a2*r0   + a3*r4*5 +    
212 # p[3] = a0*r3 + a1*r2   + a2*r1   + a3*r0   +    
213 # p[4] = a0*r4 + a1*r3   + a2*r2   + a3*r1   +    
214 #                                                 
215 #    [r^2, r^3, r^1, r^4]                         
216 #    [m3,  m2,  m4,  m1]                          
217 #                                                 
218 # multiply odd and even words                     
219 .macro mul_odd                                    
220         vmulouw 14, 4, 26                         
221         vmulouw 10, 5, 3                          
222         vmulouw 11, 6, 2                          
223         vmulouw 12, 7, 1                          
224         vmulouw 13, 8, 0                          
225         vmulouw 15, 4, 27                         
226         vaddudm 14, 14, 10                        
227         vaddudm 14, 14, 11                        
228         vmulouw 10, 5, 26                         
229         vmulouw 11, 6, 3                          
230         vaddudm 14, 14, 12                        
231         vaddudm 14, 14, 13      # x0              
232         vaddudm 15, 15, 10                        
233         vaddudm 15, 15, 11                        
234         vmulouw 12, 7, 2                          
235         vmulouw 13, 8, 1                          
236         vaddudm 15, 15, 12                        
237         vaddudm 15, 15, 13      # x1              
238         vmulouw 16, 4, 28                         
239         vmulouw 10, 5, 27                         
240         vmulouw 11, 6, 26                         
241         vaddudm 16, 16, 10                        
242         vaddudm 16, 16, 11                        
243         vmulouw 12, 7, 3                          
244         vmulouw 13, 8, 2                          
245         vaddudm 16, 16, 12                        
246         vaddudm 16, 16, 13      # x2              
247         vmulouw 17, 4, 29                         
248         vmulouw 10, 5, 28                         
249         vmulouw 11, 6, 27                         
250         vaddudm 17, 17, 10                        
251         vaddudm 17, 17, 11                        
252         vmulouw 12, 7, 26                         
253         vmulouw 13, 8, 3                          
254         vaddudm 17, 17, 12                        
255         vaddudm 17, 17, 13      # x3              
256         vmulouw 18, 4, 30                         
257         vmulouw 10, 5, 29                         
258         vmulouw 11, 6, 28                         
259         vaddudm 18, 18, 10                        
260         vaddudm 18, 18, 11                        
261         vmulouw 12, 7, 27                         
262         vmulouw 13, 8, 26                         
263         vaddudm 18, 18, 12                        
264         vaddudm 18, 18, 13      # x4              
265 .endm                                             
266                                                   
267 .macro mul_even                                   
268         vmuleuw 9, 4, 26                          
269         vmuleuw 10, 5, 3                          
270         vmuleuw 11, 6, 2                          
271         vmuleuw 12, 7, 1                          
272         vmuleuw 13, 8, 0                          
273         vaddudm 14, 14, 9                         
274         vaddudm 14, 14, 10                        
275         vaddudm 14, 14, 11                        
276         vaddudm 14, 14, 12                        
277         vaddudm 14, 14, 13      # x0              
278                                                   
279         vmuleuw 9, 4, 27                          
280         vmuleuw 10, 5, 26                         
281         vmuleuw 11, 6, 3                          
282         vmuleuw 12, 7, 2                          
283         vmuleuw 13, 8, 1                          
284         vaddudm 15, 15, 9                         
285         vaddudm 15, 15, 10                        
286         vaddudm 15, 15, 11                        
287         vaddudm 15, 15, 12                        
288         vaddudm 15, 15, 13      # x1              
289                                                   
290         vmuleuw 9, 4, 28                          
291         vmuleuw 10, 5, 27                         
292         vmuleuw 11, 6, 26                         
293         vmuleuw 12, 7, 3                          
294         vmuleuw 13, 8, 2                          
295         vaddudm 16, 16, 9                         
296         vaddudm 16, 16, 10                        
297         vaddudm 16, 16, 11                        
298         vaddudm 16, 16, 12                        
299         vaddudm 16, 16, 13      # x2              
300                                                   
301         vmuleuw 9, 4, 29                          
302         vmuleuw 10, 5, 28                         
303         vmuleuw 11, 6, 27                         
304         vmuleuw 12, 7, 26                         
305         vmuleuw 13, 8, 3                          
306         vaddudm 17, 17, 9                         
307         vaddudm 17, 17, 10                        
308         vaddudm 17, 17, 11                        
309         vaddudm 17, 17, 12                        
310         vaddudm 17, 17, 13      # x3              
311                                                   
312         vmuleuw 9, 4, 30                          
313         vmuleuw 10, 5, 29                         
314         vmuleuw 11, 6, 28                         
315         vmuleuw 12, 7, 27                         
316         vmuleuw 13, 8, 26                         
317         vaddudm 18, 18, 9                         
318         vaddudm 18, 18, 10                        
319         vaddudm 18, 18, 11                        
320         vaddudm 18, 18, 12                        
321         vaddudm 18, 18, 13      # x4              
322 .endm                                             
323                                                   
324 #                                                 
325 # poly1305_setup_r                                
326 #                                                 
327 # setup r^4, r^3, r^2, r vectors                  
328 #    [r, r^3, r^2, r^4]                           
329 #    vs0 = [r0,...]                               
330 #    vs1 = [r1,...]                               
331 #    vs2 = [r2,...]                               
332 #    vs3 = [r3,...]                               
333 #    vs4 = [r4,...]                               
334 #    vs5 = [r4*5,...]                             
335 #    vs6 = [r3*5,...]                             
336 #    vs7 = [r2*5,...]                             
337 #    vs8 = [r1*5,...]                             
338 #                                                 
339 # r0, r4*5, r3*5, r2*5, r1*5;                     
340 # r1, r0,   r4*5, r3*5, r2*5;                     
341 # r2, r1,   r0,   r4*5, r3*5;                     
342 # r3, r2,   r1,   r0,   r4*5;                     
343 # r4, r3,   r2,   r1,   r0  ;                     
344 #                                                 
345 .macro poly1305_setup_r                           
346                                                   
347         # save r                                  
348         xxlor   26, 58, 58                        
349         xxlor   27, 59, 59                        
350         xxlor   28, 60, 60                        
351         xxlor   29, 61, 61                        
352         xxlor   30, 62, 62                        
353                                                   
354         xxlxor  31, 31, 31                        
355                                                   
356 #    [r, r^3, r^2, r^4]                           
357         # compute r^2                             
358         vmr     4, 26                             
359         vmr     5, 27                             
360         vmr     6, 28                             
361         vmr     7, 29                             
362         vmr     8, 30                             
363         bl      do_mul          # r^2 r^1         
364         xxpermdi 58, 58, 36, 0x3                  
365         xxpermdi 59, 59, 37, 0x3                  
366         xxpermdi 60, 60, 38, 0x3                  
367         xxpermdi 61, 61, 39, 0x3                  
368         xxpermdi 62, 62, 40, 0x3                  
369         xxpermdi 36, 36, 36, 0x3                  
370         xxpermdi 37, 37, 37, 0x3                  
371         xxpermdi 38, 38, 38, 0x3                  
372         xxpermdi 39, 39, 39, 0x3                  
373         xxpermdi 40, 40, 40, 0x3                  
374         vspltisb 13, 2                            
375         vsld    9, 27, 13                         
376         vsld    10, 28, 13                        
377         vsld    11, 29, 13                        
378         vsld    12, 30, 13                        
379         vaddudm 0, 9, 27                          
380         vaddudm 1, 10, 28                         
381         vaddudm 2, 11, 29                         
382         vaddudm 3, 12, 30                         
383                                                   
384         bl      do_mul          # r^4 r^3         
385         vmrgow  26, 26, 4                         
386         vmrgow  27, 27, 5                         
387         vmrgow  28, 28, 6                         
388         vmrgow  29, 29, 7                         
389         vmrgow  30, 30, 8                         
390         vspltisb 13, 2                            
391         vsld    9, 27, 13                         
392         vsld    10, 28, 13                        
393         vsld    11, 29, 13                        
394         vsld    12, 30, 13                        
395         vaddudm 0, 9, 27                          
396         vaddudm 1, 10, 28                         
397         vaddudm 2, 11, 29                         
398         vaddudm 3, 12, 30                         
399                                                   
400         # r^2 r^4                                 
401         xxlor   0, 58, 58                         
402         xxlor   1, 59, 59                         
403         xxlor   2, 60, 60                         
404         xxlor   3, 61, 61                         
405         xxlor   4, 62, 62                         
406         xxlor   5, 32, 32                         
407         xxlor   6, 33, 33                         
408         xxlor   7, 34, 34                         
409         xxlor   8, 35, 35                         
410                                                   
411         vspltw  9, 26, 3                          
412         vspltw  10, 26, 2                         
413         vmrgow  26, 10, 9                         
414         vspltw  9, 27, 3                          
415         vspltw  10, 27, 2                         
416         vmrgow  27, 10, 9                         
417         vspltw  9, 28, 3                          
418         vspltw  10, 28, 2                         
419         vmrgow  28, 10, 9                         
420         vspltw  9, 29, 3                          
421         vspltw  10, 29, 2                         
422         vmrgow  29, 10, 9                         
423         vspltw  9, 30, 3                          
424         vspltw  10, 30, 2                         
425         vmrgow  30, 10, 9                         
426                                                   
427         vsld    9, 27, 13                         
428         vsld    10, 28, 13                        
429         vsld    11, 29, 13                        
430         vsld    12, 30, 13                        
431         vaddudm 0, 9, 27                          
432         vaddudm 1, 10, 28                         
433         vaddudm 2, 11, 29                         
434         vaddudm 3, 12, 30                         
435 .endm                                             
436                                                   
437 SYM_FUNC_START_LOCAL(do_mul)                      
438         mul_odd                                   
439                                                   
440         # do reduction ( h %= p )                 
441         # carry reduction                         
442         vspltisb 9, 2                             
443         vsrd    10, 14, 31                        
444         vsrd    11, 17, 31                        
445         vand    7, 17, 25                         
446         vand    4, 14, 25                         
447         vaddudm 18, 18, 11                        
448         vsrd    12, 18, 31                        
449         vaddudm 15, 15, 10                        
450                                                   
451         vsrd    11, 15, 31                        
452         vand    8, 18, 25                         
453         vand    5, 15, 25                         
454         vaddudm 4, 4, 12                          
455         vsld    10, 12, 9                         
456         vaddudm 6, 16, 11                         
457                                                   
458         vsrd    13, 6, 31                         
459         vand    6, 6, 25                          
460         vaddudm 4, 4, 10                          
461         vsrd    10, 4, 31                         
462         vaddudm 7, 7, 13                          
463                                                   
464         vsrd    11, 7, 31                         
465         vand    7, 7, 25                          
466         vand    4, 4, 25                          
467         vaddudm 5, 5, 10                          
468         vaddudm 8, 8, 11                          
469         blr                                       
470 SYM_FUNC_END(do_mul)                              
471                                                   
472 #                                                 
473 # init key                                        
474 #                                                 
475 .macro do_poly1305_init                           
476         addis   10, 2, rmask@toc@ha               
477         addi    10, 10, rmask@toc@l               
478                                                   
479         ld      11, 0(10)                         
480         ld      12, 8(10)                         
481                                                   
482         li      14, 16                            
483         li      15, 32                            
484         addis   10, 2, cnum@toc@ha                
485         addi    10, 10, cnum@toc@l                
486         lvx     25, 0, 10       # v25 - mask      
487         lvx     31, 14, 10      # v31 = 1a        
488         lvx     19, 15, 10      # v19 = 1 << 2    
489         lxv     24, 48(10)      # vs24            
490         lxv     25, 64(10)      # vs25            
491                                                   
492         # initialize                              
493         # load key from r3 to vectors             
494         ld      9, 24(3)                          
495         ld      10, 32(3)                         
496         and.    9, 9, 11                          
497         and.    10, 10, 12                        
498                                                   
499         # break 26 bits                           
500         extrdi  14, 9, 26, 38                     
501         extrdi  15, 9, 26, 12                     
502         extrdi  16, 9, 12, 0                      
503         mtvsrdd 58, 0, 14                         
504         insrdi  16, 10, 14, 38                    
505         mtvsrdd 59, 0, 15                         
506         extrdi  17, 10, 26, 24                    
507         mtvsrdd 60, 0, 16                         
508         extrdi  18, 10, 24, 0                     
509         mtvsrdd 61, 0, 17                         
510         mtvsrdd 62, 0, 18                         
511                                                   
512         # r1 = r1 * 5, r2 = r2 * 5, r3 = r3 *     
513         li      9, 5                              
514         mtvsrdd 36, 0, 9                          
515         vmulouw 0, 27, 4                # v0 =    
516         vmulouw 1, 28, 4                # v1 =    
517         vmulouw 2, 29, 4                # v2 =    
518         vmulouw 3, 30, 4                # v3 =    
519 .endm                                             
520                                                   
521 #                                                 
522 # poly1305_p10le_4blocks( uint8_t *k, uint32_t    
523 #  k = 32 bytes key                               
524 #  r3 = k (r, s)                                  
525 #  r4 = mlen                                      
526 #  r5 = m                                         
527 #                                                 
528 SYM_FUNC_START(poly1305_p10le_4blocks)            
529 .align 5                                          
530         cmpdi   5, 64                             
531         blt     Out_no_poly1305                   
532                                                   
533         SAVE_REGS                                 
534                                                   
535         do_poly1305_init                          
536                                                   
537         li      21, 0   # counter to message      
538                                                   
539         poly1305_setup_r                          
540                                                   
541         # load previous H state                   
542         # break/convert r6 to 26 bits             
543         ld      9, 0(3)                           
544         ld      10, 8(3)                          
545         ld      19, 16(3)                         
546         sldi    19, 19, 24                        
547         mtvsrdd 41, 0, 19                         
548         extrdi  14, 9, 26, 38                     
549         extrdi  15, 9, 26, 12                     
550         extrdi  16, 9, 12, 0                      
551         mtvsrdd 36, 0, 14                         
552         insrdi  16, 10, 14, 38                    
553         mtvsrdd 37, 0, 15                         
554         extrdi  17, 10, 26, 24                    
555         mtvsrdd 38, 0, 16                         
556         extrdi  18, 10, 24, 0                     
557         mtvsrdd 39, 0, 17                         
558         mtvsrdd 40, 0, 18                         
559         vor     8, 8, 9                           
560                                                   
561         # input m1 m2                             
562         add     20, 4, 21                         
563         xxlor   49, 24, 24                        
564         xxlor   50, 25, 25                        
565         lxvw4x  43, 0, 20                         
566         addi    17, 20, 16                        
567         lxvw4x  44, 0, 17                         
568         vperm   14, 11, 12, 17                    
569         vperm   15, 11, 12, 18                    
570         vand    9, 14, 25       # a0              
571         vsrd    10, 14, 31      # >> 26           
572         vsrd    11, 10, 31      # 12 bits left    
573         vand    10, 10, 25      # a1              
574         vspltisb 13, 12                           
575         vand    16, 15, 25                        
576         vsld    12, 16, 13                        
577         vor     11, 11, 12                        
578         vand    11, 11, 25      # a2              
579         vspltisb 13, 14                           
580         vsrd    12, 15, 13      # >> 14           
581         vsrd    13, 12, 31      # >> 26, a4       
582         vand    12, 12, 25      # a3              
583                                                   
584         vaddudm 20, 4, 9                          
585         vaddudm 21, 5, 10                         
586         vaddudm 22, 6, 11                         
587         vaddudm 23, 7, 12                         
588         vaddudm 24, 8, 13                         
589                                                   
590         # m3 m4                                   
591         addi    17, 17, 16                        
592         lxvw4x  43, 0, 17                         
593         addi    17, 17, 16                        
594         lxvw4x  44, 0, 17                         
595         vperm   14, 11, 12, 17                    
596         vperm   15, 11, 12, 18                    
597         vand    9, 14, 25       # a0              
598         vsrd    10, 14, 31      # >> 26           
599         vsrd    11, 10, 31      # 12 bits left    
600         vand    10, 10, 25      # a1              
601         vspltisb 13, 12                           
602         vand    16, 15, 25                        
603         vsld    12, 16, 13                        
604         vspltisb 13, 14                           
605         vor     11, 11, 12                        
606         vand    11, 11, 25      # a2              
607         vsrd    12, 15, 13      # >> 14           
608         vsrd    13, 12, 31      # >> 26, a4       
609         vand    12, 12, 25      # a3              
610                                                   
611         # Smash 4 message blocks into 5 vector    
612         vmrgow  4, 9, 20                          
613         vmrgow  5, 10, 21                         
614         vmrgow  6, 11, 22                         
615         vmrgow  7, 12, 23                         
616         vmrgow  8, 13, 24                         
617         vaddudm 8, 8, 19                          
618                                                   
619         addi    5, 5, -64       # len -= 64       
620         addi    21, 21, 64      # offset += 64    
621                                                   
622         li      9, 64                             
623         divdu   31, 5, 9                          
624                                                   
625         cmpdi   31, 0                             
626         ble     Skip_block_loop                   
627                                                   
628         mtctr   31                                
629                                                   
630 # h4 =   m1 * r⁴ + m2 * r³ + m3 * r² + m4     
631 # Rewrite the polynominal sum of product as fo    
632 # h1 = (h0 + m1) * r^2, h2 = (h0 + m2) * r^2      
633 # h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2      
634 #  .... Repeat                                    
635 # h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2      
636 # h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1      
637 #                                                 
638 loop_4blocks:                                     
639                                                   
640         # Multiply odd words and even words       
641         mul_odd                                   
642         mul_even                                  
643         # carry reduction                         
644         vspltisb 9, 2                             
645         vsrd    10, 14, 31                        
646         vsrd    11, 17, 31                        
647         vand    7, 17, 25                         
648         vand    4, 14, 25                         
649         vaddudm 18, 18, 11                        
650         vsrd    12, 18, 31                        
651         vaddudm 15, 15, 10                        
652                                                   
653         vsrd    11, 15, 31                        
654         vand    8, 18, 25                         
655         vand    5, 15, 25                         
656         vaddudm 4, 4, 12                          
657         vsld    10, 12, 9                         
658         vaddudm 6, 16, 11                         
659                                                   
660         vsrd    13, 6, 31                         
661         vand    6, 6, 25                          
662         vaddudm 4, 4, 10                          
663         vsrd    10, 4, 31                         
664         vaddudm 7, 7, 13                          
665                                                   
666         vsrd    11, 7, 31                         
667         vand    7, 7, 25                          
668         vand    4, 4, 25                          
669         vaddudm 5, 5, 10                          
670         vaddudm 8, 8, 11                          
671                                                   
672         # input m1  m2  m3  m4                    
673         add     20, 4, 21                         
674         xxlor   49, 24, 24                        
675         xxlor   50, 25, 25                        
676         lxvw4x  43, 0, 20                         
677         addi    17, 20, 16                        
678         lxvw4x  44, 0, 17                         
679         vperm   14, 11, 12, 17                    
680         vperm   15, 11, 12, 18                    
681         addi    17, 17, 16                        
682         lxvw4x  43, 0, 17                         
683         addi    17, 17, 16                        
684         lxvw4x  44, 0, 17                         
685         vperm   17, 11, 12, 17                    
686         vperm   18, 11, 12, 18                    
687                                                   
688         vand    20, 14, 25      # a0              
689         vand    9, 17, 25       # a0              
690         vsrd    21, 14, 31      # >> 26           
691         vsrd    22, 21, 31      # 12 bits left    
692         vsrd    10, 17, 31      # >> 26           
693         vsrd    11, 10, 31      # 12 bits left    
694                                                   
695         vand    21, 21, 25      # a1              
696         vand    10, 10, 25      # a1              
697                                                   
698         vspltisb 13, 12                           
699         vand    16, 15, 25                        
700         vsld    23, 16, 13                        
701         vor     22, 22, 23                        
702         vand    22, 22, 25      # a2              
703         vand    16, 18, 25                        
704         vsld    12, 16, 13                        
705         vor     11, 11, 12                        
706         vand    11, 11, 25      # a2              
707         vspltisb 13, 14                           
708         vsrd    23, 15, 13      # >> 14           
709         vsrd    24, 23, 31      # >> 26, a4       
710         vand    23, 23, 25      # a3              
711         vsrd    12, 18, 13      # >> 14           
712         vsrd    13, 12, 31      # >> 26, a4       
713         vand    12, 12, 25      # a3              
714                                                   
715         vaddudm 4, 4, 20                          
716         vaddudm 5, 5, 21                          
717         vaddudm 6, 6, 22                          
718         vaddudm 7, 7, 23                          
719         vaddudm 8, 8, 24                          
720                                                   
721         # Smash 4 message blocks into 5 vector    
722         vmrgow  4, 9, 4                           
723         vmrgow  5, 10, 5                          
724         vmrgow  6, 11, 6                          
725         vmrgow  7, 12, 7                          
726         vmrgow  8, 13, 8                          
727         vaddudm 8, 8, 19                          
728                                                   
729         addi    5, 5, -64       # len -= 64       
730         addi    21, 21, 64      # offset += 64    
731                                                   
732         bdnz    loop_4blocks                      
733                                                   
734 Skip_block_loop:                                  
735         xxlor   58, 0, 0                          
736         xxlor   59, 1, 1                          
737         xxlor   60, 2, 2                          
738         xxlor   61, 3, 3                          
739         xxlor   62, 4, 4                          
740         xxlor   32, 5, 5                          
741         xxlor   33, 6, 6                          
742         xxlor   34, 7, 7                          
743         xxlor   35, 8, 8                          
744                                                   
745         # Multiply odd words and even words       
746         mul_odd                                   
747         mul_even                                  
748                                                   
749         # Sum the products.                       
750         xxpermdi 41, 31, 46, 0                    
751         xxpermdi 42, 31, 47, 0                    
752         vaddudm 4, 14, 9                          
753         xxpermdi 36, 31, 36, 3                    
754         vaddudm 5, 15, 10                         
755         xxpermdi 37, 31, 37, 3                    
756         xxpermdi 43, 31, 48, 0                    
757         vaddudm 6, 16, 11                         
758         xxpermdi 38, 31, 38, 3                    
759         xxpermdi 44, 31, 49, 0                    
760         vaddudm 7, 17, 12                         
761         xxpermdi 39, 31, 39, 3                    
762         xxpermdi 45, 31, 50, 0                    
763         vaddudm 8, 18, 13                         
764         xxpermdi 40, 31, 40, 3                    
765                                                   
766         # carry reduction                         
767         vspltisb 9, 2                             
768         vsrd    10, 4, 31                         
769         vsrd    11, 7, 31                         
770         vand    7, 7, 25                          
771         vand    4, 4, 25                          
772         vaddudm 8, 8, 11                          
773         vsrd    12, 8, 31                         
774         vaddudm 5, 5, 10                          
775                                                   
776         vsrd    11, 5, 31                         
777         vand    8, 8, 25                          
778         vand    5, 5, 25                          
779         vaddudm 4, 4, 12                          
780         vsld    10, 12, 9                         
781         vaddudm 6, 6, 11                          
782                                                   
783         vsrd    13, 6, 31                         
784         vand    6, 6, 25                          
785         vaddudm 4, 4, 10                          
786         vsrd    10, 4, 31                         
787         vaddudm 7, 7, 13                          
788                                                   
789         vsrd    11, 7, 31                         
790         vand    7, 7, 25                          
791         vand    4, 4, 25                          
792         vaddudm 5, 5, 10                          
793         vsrd    10, 5, 31                         
794         vand    5, 5, 25                          
795         vaddudm 6, 6, 10                          
796         vaddudm 8, 8, 11                          
797                                                   
798         b       do_final_update                   
799                                                   
800 do_final_update:                                  
801         # combine 26 bit limbs                    
802         # v4, v5, v6, v7 and v8 are 26 bit vec    
803         vsld    5, 5, 31                          
804         vor     20, 4, 5                          
805         vspltisb 11, 12                           
806         vsrd    12, 6, 11                         
807         vsld    6, 6, 31                          
808         vsld    6, 6, 31                          
809         vor     20, 20, 6                         
810         vspltisb 11, 14                           
811         vsld    7, 7, 11                          
812         vor     21, 7, 12                         
813         mfvsrld 16, 40          # save last 2     
814         vsld    8, 8, 11                          
815         vsld    8, 8, 31                          
816         vor     21, 21, 8                         
817         mfvsrld 17, 52                            
818         mfvsrld 19, 53                            
819         srdi    16, 16, 24                        
820                                                   
821         std     17, 0(3)                          
822         std     19, 8(3)                          
823         stw     16, 16(3)                         
824                                                   
825 Out_loop:                                         
826         li      3, 0                              
827                                                   
828         RESTORE_REGS                              
829                                                   
830         blr                                       
831                                                   
832 Out_no_poly1305:                                  
833         li      3, 0                              
834         blr                                       
835 SYM_FUNC_END(poly1305_p10le_4blocks)              
836                                                   
837 #                                                 
838 # ============================================    
839 # The following functions implement 64 x 64 bi    
840 #                                                 
841 SYM_FUNC_START_LOCAL(Poly1305_init_64)            
842         #  mask 0x0FFFFFFC0FFFFFFC                
843         #  mask 0x0FFFFFFC0FFFFFFF                
844         addis   10, 2, rmask@toc@ha               
845         addi    10, 10, rmask@toc@l               
846         ld      11, 0(10)                         
847         ld      12, 8(10)                         
848                                                   
849         # initialize                              
850         # load key from r3                        
851         ld      9, 24(3)                          
852         ld      10, 32(3)                         
853         and.    9, 9, 11        # cramp mask r    
854         and.    10, 10, 12      # cramp mask r    
855                                                   
856         srdi    21, 10, 2                         
857         add     19, 21, 10      # s1: r19 - (r    
858                                                   
859         # setup r and s                           
860         li      25, 0                             
861         mtvsrdd 32+0, 9, 19     # r0, s1          
862         mtvsrdd 32+1, 10, 9     # r1, r0          
863         mtvsrdd 32+2, 19, 25    # s1              
864         mtvsrdd 32+3, 9, 25     # r0              
865                                                   
866         blr                                       
867 SYM_FUNC_END(Poly1305_init_64)                    
868                                                   
869 # Poly1305_mult                                   
870 # v6 = (h0, h1), v8 = h2                          
871 # v0 = (r0, s1), v1 = (r1, r0), v2 = s1, v3 =     
872 #                                                 
873 # Output: v7, v10, v11                            
874 #                                                 
875 SYM_FUNC_START_LOCAL(Poly1305_mult)               
876         #                                         
877         #       d0 = h0 * r0 + h1 * s1            
878         vmsumudm        7, 6, 0, 9                
879                                                   
880         #       d1 = h0 * r1 + h1 * r0 + h2 *     
881         vmsumudm        11, 6, 1, 9               
882         vmsumudm        10, 8, 2, 11              
883                                                   
884         #       d2 = r0                           
885         vmsumudm        11, 8, 3, 9               
886         blr                                       
887 SYM_FUNC_END(Poly1305_mult)                       
888                                                   
889 #                                                 
890 # carry reduction                                 
891 # h %=p                                           
892 #                                                 
893 # Input: v7, v10, v11                             
894 # Output: r27, r28, r29                           
895 #                                                 
896 SYM_FUNC_START_LOCAL(Carry_reduction)             
897         mfvsrld 27, 32+7                          
898         mfvsrld 28, 32+10                         
899         mfvsrld 29, 32+11                         
900         mfvsrd  20, 32+7        # h0.h            
901         mfvsrd  21, 32+10       # h1.h            
902                                                   
903         addc    28, 28, 20                        
904         adde    29, 29, 21                        
905         srdi    22, 29, 0x2                       
906         sldi    23, 22, 0x2                       
907         add     23, 23, 22      # (h2 & 3) * 5    
908         addc    27, 27, 23      # h0              
909         addze   28, 28          # h1              
910         andi.   29, 29, 0x3     # h2              
911         blr                                       
912 SYM_FUNC_END(Carry_reduction)                     
913                                                   
914 #                                                 
915 # poly1305 multiplication                         
916 # h *= r, h %= p                                  
917 #       d0 = h0 * r0 + h1 * s1                    
918 #       d1 = h0 * r1 + h1 * r0 + h2 * s1          
919 #       d2 = h0 * r0                              
920 #                                                 
921 #                                                 
922 # unsigned int poly1305_test_64s(unisgned char    
923 #   - no highbit if final leftover block (high    
924 #                                                 
925 SYM_FUNC_START(poly1305_64s)                      
926         cmpdi   5, 0                              
927         ble     Out_no_poly1305_64                
928                                                   
929         mflr 0                                    
930         std 0, 16(1)                              
931         stdu 1,-400(1)                            
932                                                   
933         SAVE_GPR 14, 112, 1                       
934         SAVE_GPR 15, 120, 1                       
935         SAVE_GPR 16, 128, 1                       
936         SAVE_GPR 17, 136, 1                       
937         SAVE_GPR 18, 144, 1                       
938         SAVE_GPR 19, 152, 1                       
939         SAVE_GPR 20, 160, 1                       
940         SAVE_GPR 21, 168, 1                       
941         SAVE_GPR 22, 176, 1                       
942         SAVE_GPR 23, 184, 1                       
943         SAVE_GPR 24, 192, 1                       
944         SAVE_GPR 25, 200, 1                       
945         SAVE_GPR 26, 208, 1                       
946         SAVE_GPR 27, 216, 1                       
947         SAVE_GPR 28, 224, 1                       
948         SAVE_GPR 29, 232, 1                       
949         SAVE_GPR 30, 240, 1                       
950         SAVE_GPR 31, 248, 1                       
951                                                   
952         # Init poly1305                           
953         bl Poly1305_init_64                       
954                                                   
955         li 25, 0                        # offs    
956                                                   
957         add 11, 25, 4                             
958                                                   
959         # load h                                  
960         # h0, h1, h2?                             
961         ld      27, 0(3)                          
962         ld      28, 8(3)                          
963         lwz     29, 16(3)                         
964                                                   
965         li      30, 16                            
966         divdu   31, 5, 30                         
967                                                   
968         mtctr   31                                
969                                                   
970         mr      24, 6           # highbit         
971                                                   
972 Loop_block_64:                                    
973         vxor    9, 9, 9                           
974                                                   
975         ld      20, 0(11)                         
976         ld      21, 8(11)                         
977         addi    11, 11, 16                        
978                                                   
979         addc    27, 27, 20                        
980         adde    28, 28, 21                        
981         adde    29, 29, 24                        
982                                                   
983         li      22, 0                             
984         mtvsrdd 32+6, 27, 28    # h0, h1          
985         mtvsrdd 32+8, 29, 22    # h2              
986                                                   
987         bl      Poly1305_mult                     
988                                                   
989         bl      Carry_reduction                   
990                                                   
991         bdnz    Loop_block_64                     
992                                                   
993         std     27, 0(3)                          
994         std     28, 8(3)                          
995         stw     29, 16(3)                         
996                                                   
997         li      3, 0                              
998                                                   
999         RESTORE_GPR 14, 112, 1                    
1000         RESTORE_GPR 15, 120, 1                   
1001         RESTORE_GPR 16, 128, 1                   
1002         RESTORE_GPR 17, 136, 1                   
1003         RESTORE_GPR 18, 144, 1                   
1004         RESTORE_GPR 19, 152, 1                   
1005         RESTORE_GPR 20, 160, 1                   
1006         RESTORE_GPR 21, 168, 1                   
1007         RESTORE_GPR 22, 176, 1                   
1008         RESTORE_GPR 23, 184, 1                   
1009         RESTORE_GPR 24, 192, 1                   
1010         RESTORE_GPR 25, 200, 1                   
1011         RESTORE_GPR 26, 208, 1                   
1012         RESTORE_GPR 27, 216, 1                   
1013         RESTORE_GPR 28, 224, 1                   
1014         RESTORE_GPR 29, 232, 1                   
1015         RESTORE_GPR 30, 240, 1                   
1016         RESTORE_GPR 31, 248, 1                   
1017                                                  
1018         addi    1, 1, 400                        
1019         ld 0, 16(1)                              
1020         mtlr 0                                   
1021                                                  
1022         blr                                      
1023                                                  
1024 Out_no_poly1305_64:                              
1025         li      3, 0                             
1026         blr                                      
1027 SYM_FUNC_END(poly1305_64s)                       
1028                                                  
1029 #                                                
1030 # Input: r3 = h, r4 = s, r5 = mac                
1031 # mac = h + s                                    
1032 #                                                
1033 SYM_FUNC_START(poly1305_emit_64)                 
1034         ld      10, 0(3)                         
1035         ld      11, 8(3)                         
1036         ld      12, 16(3)                        
1037                                                  
1038         # compare modulus                        
1039         # h + 5 + (-p)                           
1040         mr      6, 10                            
1041         mr      7, 11                            
1042         mr      8, 12                            
1043         addic.  6, 6, 5                          
1044         addze   7, 7                             
1045         addze   8, 8                             
1046         srdi    9, 8, 2         # overflow?      
1047         cmpdi   9, 0                             
1048         beq     Skip_h64                         
1049         mr      10, 6                            
1050         mr      11, 7                            
1051         mr      12, 8                            
1052                                                  
1053 Skip_h64:                                        
1054         ld      6, 0(4)                          
1055         ld      7, 8(4)                          
1056         addc    10, 10, 6                        
1057         adde    11, 11, 7                        
1058         addze   12, 12                           
1059                                                  
1060         std     10, 0(5)                         
1061         std     11, 8(5)                         
1062         blr                                      
1063 SYM_FUNC_END(poly1305_emit_64)                   
1064                                                  
1065 SYM_DATA_START_LOCAL(RMASK)                      
1066 .align 5                                         
1067 rmask:                                           
1068 .byte   0xff, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0    
1069 cnum:                                            
1070 .long   0x03ffffff, 0x00000000, 0x03ffffff, 0    
1071 .long   0x1a, 0x00, 0x1a, 0x00                   
1072 .long   0x01000000, 0x01000000, 0x01000000, 0    
1073 .long   0x00010203, 0x04050607, 0x10111213, 0    
1074 .long   0x08090a0b, 0x0c0d0e0f, 0x18191a1b, 0    
1075 SYM_DATA_END(RMASK)                              
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php