~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm/crypto/sha512-armv4.pl

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/arm/crypto/sha512-armv4.pl (Version linux-6.12-rc7) and /arch/mips/crypto/sha512-armv4.pl (Version linux-5.8.18)


  1 #!/usr/bin/env perl                               
  2 # SPDX-License-Identifier: GPL-2.0                
  3                                                   
  4 # This code is taken from the OpenSSL project     
  5 # has relicensed it under the GPLv2. Therefore    
  6 # you can redistribute it and/or modify it und    
  7 # Public License version 2 as published by the    
  8 #                                                 
  9 # The original headers, including the original    
 10 # included below for completeness.                
 11                                                   
 12 # ============================================    
 13 # Written by Andy Polyakov <appro@openssl.org>     
 14 # project. The module is, however, dual licens    
 15 # CRYPTOGAMS licenses depending on where you o    
 16 # details see https://www.openssl.org/~appro/c    
 17 # ============================================    
 18                                                   
 19 # SHA512 block procedure for ARMv4. September     
 20                                                   
 21 # This code is ~4.5 (four and a half) times fa    
 22 # by gcc 3.4 and it spends ~72 clock cycles pe    
 23 # Xscale PXA250 core].                            
 24 #                                                 
 25 # July 2010.                                      
 26 #                                                 
 27 # Rescheduling for dual-issue pipeline resulte    
 28 # Cortex A8 core and ~40 cycles per processed     
 29                                                   
 30 # February 2011.                                  
 31 #                                                 
 32 # Profiler-assisted and platform-specific opti    
 33 # improvement on Coxtex A8 core and ~38 cycles    
 34                                                   
 35 # March 2011.                                     
 36 #                                                 
 37 # Add NEON implementation. On Cortex A8 it was    
 38 # one byte in 23.3 cycles or ~60% faster than     
 39                                                   
 40 # August 2012.                                    
 41 #                                                 
 42 # Improve NEON performance by 12% on Snapdrago    
 43 # terms it's 22.6 cycles per byte, which is di    
 44 # Technical writers asserted that 3-way S4 pip    
 45 # multiple NEON instructions per cycle, but du    
 46 # not be observed, see https://www.openssl.org    
 47 # for further details. On side note Cortex-A15    
 48 # 16 cycles.                                      
 49                                                   
 50 # Byte order [in]dependence. =================    
 51 #                                                 
 52 # Originally caller was expected to maintain s    
 53 # h[0-7], namely with most significant dword a    
 54 # was reflected in below two parameters as 0 a    
 55 # expected to maintain native byte order for w    
 56 $hi="HI";                                         
 57 $lo="LO";                                         
 58 # ============================================    
 59                                                   
 60 while (($output=shift) && ($output!~/^\w[\w\-]    
 61 open STDOUT,">$output";                           
 62                                                   
 63 $ctx="r0";      # parameter block                 
 64 $inp="r1";                                        
 65 $len="r2";                                        
 66                                                   
 67 $Tlo="r3";                                        
 68 $Thi="r4";                                        
 69 $Alo="r5";                                        
 70 $Ahi="r6";                                        
 71 $Elo="r7";                                        
 72 $Ehi="r8";                                        
 73 $t0="r9";                                         
 74 $t1="r10";                                        
 75 $t2="r11";                                        
 76 $t3="r12";                                        
 77 ############    r13 is stack pointer              
 78 $Ktbl="r14";                                      
 79 ############    r15 is program counter            
 80                                                   
 81 $Aoff=8*0;                                        
 82 $Boff=8*1;                                        
 83 $Coff=8*2;                                        
 84 $Doff=8*3;                                        
 85 $Eoff=8*4;                                        
 86 $Foff=8*5;                                        
 87 $Goff=8*6;                                        
 88 $Hoff=8*7;                                        
 89 $Xoff=8*8;                                        
 90                                                   
 91 sub BODY_00_15() {                                
 92 my $magic = shift;                                
 93 $code.=<<___;                                     
 94         @ Sigma1(x)     (ROTR((x),14) ^ ROTR((    
 95         @ LO            lo>>14^hi<<18 ^ lo>>18    
 96         @ HI            hi>>14^lo<<18 ^ hi>>18    
 97         mov     $t0,$Elo,lsr#14                   
 98         str     $Tlo,[sp,#$Xoff+0]                
 99         mov     $t1,$Ehi,lsr#14                   
100         str     $Thi,[sp,#$Xoff+4]                
101         eor     $t0,$t0,$Ehi,lsl#18               
102         ldr     $t2,[sp,#$Hoff+0]       @ h.lo    
103         eor     $t1,$t1,$Elo,lsl#18               
104         ldr     $t3,[sp,#$Hoff+4]       @ h.hi    
105         eor     $t0,$t0,$Elo,lsr#18               
106         eor     $t1,$t1,$Ehi,lsr#18               
107         eor     $t0,$t0,$Ehi,lsl#14               
108         eor     $t1,$t1,$Elo,lsl#14               
109         eor     $t0,$t0,$Ehi,lsr#9                
110         eor     $t1,$t1,$Elo,lsr#9                
111         eor     $t0,$t0,$Elo,lsl#23               
112         eor     $t1,$t1,$Ehi,lsl#23     @ Sigm    
113         adds    $Tlo,$Tlo,$t0                     
114         ldr     $t0,[sp,#$Foff+0]       @ f.lo    
115         adc     $Thi,$Thi,$t1           @ T +=    
116         ldr     $t1,[sp,#$Foff+4]       @ f.hi    
117         adds    $Tlo,$Tlo,$t2                     
118         ldr     $t2,[sp,#$Goff+0]       @ g.lo    
119         adc     $Thi,$Thi,$t3           @ T +=    
120         ldr     $t3,[sp,#$Goff+4]       @ g.hi    
121                                                   
122         eor     $t0,$t0,$t2                       
123         str     $Elo,[sp,#$Eoff+0]                
124         eor     $t1,$t1,$t3                       
125         str     $Ehi,[sp,#$Eoff+4]                
126         and     $t0,$t0,$Elo                      
127         str     $Alo,[sp,#$Aoff+0]                
128         and     $t1,$t1,$Ehi                      
129         str     $Ahi,[sp,#$Aoff+4]                
130         eor     $t0,$t0,$t2                       
131         ldr     $t2,[$Ktbl,#$lo]        @ K[i]    
132         eor     $t1,$t1,$t3             @ Ch(e    
133         ldr     $t3,[$Ktbl,#$hi]        @ K[i]    
134                                                   
135         adds    $Tlo,$Tlo,$t0                     
136         ldr     $Elo,[sp,#$Doff+0]      @ d.lo    
137         adc     $Thi,$Thi,$t1           @ T +=    
138         ldr     $Ehi,[sp,#$Doff+4]      @ d.hi    
139         adds    $Tlo,$Tlo,$t2                     
140         and     $t0,$t2,#0xff                     
141         adc     $Thi,$Thi,$t3           @ T +=    
142         adds    $Elo,$Elo,$Tlo                    
143         ldr     $t2,[sp,#$Boff+0]       @ b.lo    
144         adc     $Ehi,$Ehi,$Thi          @ d +=    
145         teq     $t0,#$magic                       
146                                                   
147         ldr     $t3,[sp,#$Coff+0]       @ c.lo    
148 #if __ARM_ARCH__>=7                               
149         it      eq                      @ Thum    
150 #endif                                            
151         orreq   $Ktbl,$Ktbl,#1                    
152         @ Sigma0(x)     (ROTR((x),28) ^ ROTR((    
153         @ LO            lo>>28^hi<<4  ^ hi>>2^    
154         @ HI            hi>>28^lo<<4  ^ lo>>2^    
155         mov     $t0,$Alo,lsr#28                   
156         mov     $t1,$Ahi,lsr#28                   
157         eor     $t0,$t0,$Ahi,lsl#4                
158         eor     $t1,$t1,$Alo,lsl#4                
159         eor     $t0,$t0,$Ahi,lsr#2                
160         eor     $t1,$t1,$Alo,lsr#2                
161         eor     $t0,$t0,$Alo,lsl#30               
162         eor     $t1,$t1,$Ahi,lsl#30               
163         eor     $t0,$t0,$Ahi,lsr#7                
164         eor     $t1,$t1,$Alo,lsr#7                
165         eor     $t0,$t0,$Alo,lsl#25               
166         eor     $t1,$t1,$Ahi,lsl#25     @ Sigm    
167         adds    $Tlo,$Tlo,$t0                     
168         and     $t0,$Alo,$t2                      
169         adc     $Thi,$Thi,$t1           @ T +=    
170                                                   
171         ldr     $t1,[sp,#$Boff+4]       @ b.hi    
172         orr     $Alo,$Alo,$t2                     
173         ldr     $t2,[sp,#$Coff+4]       @ c.hi    
174         and     $Alo,$Alo,$t3                     
175         and     $t3,$Ahi,$t1                      
176         orr     $Ahi,$Ahi,$t1                     
177         orr     $Alo,$Alo,$t0           @ Maj(    
178         and     $Ahi,$Ahi,$t2                     
179         adds    $Alo,$Alo,$Tlo                    
180         orr     $Ahi,$Ahi,$t3           @ Maj(    
181         sub     sp,sp,#8                          
182         adc     $Ahi,$Ahi,$Thi          @ h +=    
183         tst     $Ktbl,#1                          
184         add     $Ktbl,$Ktbl,#8                    
185 ___                                               
186 }                                                 
187 $code=<<___;                                      
188 #ifndef __KERNEL__                                
189 # include "arm_arch.h"                            
190 # define VFP_ABI_PUSH   vstmdb  sp!,{d8-d15}      
191 # define VFP_ABI_POP    vldmia  sp!,{d8-d15}      
192 #else                                             
193 # define __ARM_ARCH__ __LINUX_ARM_ARCH__          
194 # define __ARM_MAX_ARCH__ 7                       
195 # define VFP_ABI_PUSH                             
196 # define VFP_ABI_POP                              
197 #endif                                            
198                                                   
199 #ifdef __ARMEL__                                  
200 # define LO 0                                     
201 # define HI 4                                     
202 # define WORD64(hi0,lo0,hi1,lo1)        .word     
203 #else                                             
204 # define HI 0                                     
205 # define LO 4                                     
206 # define WORD64(hi0,lo0,hi1,lo1)        .word     
207 #endif                                            
208                                                   
209 .text                                             
210 #if __ARM_ARCH__<7                                
211 .code   32                                        
212 #else                                             
213 .syntax unified                                   
214 # ifdef __thumb2__                                
215 .thumb                                            
216 # else                                            
217 .code   32                                        
218 # endif                                           
219 #endif                                            
220                                                   
221 .type   K512,%object                              
222 .align  5                                         
223 K512:                                             
224 WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23e    
225 WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x818    
226 WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb60    
227 WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6    
228 WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x457    
229 WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5f    
230 WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1    
231 WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf6    
232 WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384    
233 WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77a    
234 WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea    
235 WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831    
236 WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db    
237 WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbee    
238 WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930    
239 WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0    
240 WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c2    
241 WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d9    
242 WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c7    
243 WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x148    
244 WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc4    
245 WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x065    
246 WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x556    
247 WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32b    
248 WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x514    
249 WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19    
250 WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe34    
251 WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b    
252 WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x431    
253 WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6    
254 WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde8    
255 WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe37    
256 WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c    
257 WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6    
258 WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c    
259 WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131    
260 WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c    
261 WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c1    
262 WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc6    
263 WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a4    
264 .size   K512,.-K512                               
265 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__    
266 .LOPENSSL_armcap:                                 
267 .word   OPENSSL_armcap_P-sha512_block_data_ord    
268 .skip   32-4                                      
269 #else                                             
270 .skip   32                                        
271 #endif                                            
272                                                   
273 .global sha512_block_data_order                   
274 .type   sha512_block_data_order,%function         
275 sha512_block_data_order:                          
276 .Lsha512_block_data_order:                        
277 #if __ARM_ARCH__<7                                
278         sub     r3,pc,#8                @ sha5    
279 #else                                             
280         adr     r3,.Lsha512_block_data_order      
281 #endif                                            
282 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__    
283         ldr     r12,.LOPENSSL_armcap              
284         ldr     r12,[r3,r12]            @ OPEN    
285         tst     r12,#1                            
286         bne     .LNEON                            
287 #endif                                            
288         add     $len,$inp,$len,lsl#7    @ len     
289         stmdb   sp!,{r4-r12,lr}                   
290         sub     $Ktbl,r3,#672           @ K512    
291         sub     sp,sp,#9*8                        
292                                                   
293         ldr     $Elo,[$ctx,#$Eoff+$lo]            
294         ldr     $Ehi,[$ctx,#$Eoff+$hi]            
295         ldr     $t0, [$ctx,#$Goff+$lo]            
296         ldr     $t1, [$ctx,#$Goff+$hi]            
297         ldr     $t2, [$ctx,#$Hoff+$lo]            
298         ldr     $t3, [$ctx,#$Hoff+$hi]            
299 .Loop:                                            
300         str     $t0, [sp,#$Goff+0]                
301         str     $t1, [sp,#$Goff+4]                
302         str     $t2, [sp,#$Hoff+0]                
303         str     $t3, [sp,#$Hoff+4]                
304         ldr     $Alo,[$ctx,#$Aoff+$lo]            
305         ldr     $Ahi,[$ctx,#$Aoff+$hi]            
306         ldr     $Tlo,[$ctx,#$Boff+$lo]            
307         ldr     $Thi,[$ctx,#$Boff+$hi]            
308         ldr     $t0, [$ctx,#$Coff+$lo]            
309         ldr     $t1, [$ctx,#$Coff+$hi]            
310         ldr     $t2, [$ctx,#$Doff+$lo]            
311         ldr     $t3, [$ctx,#$Doff+$hi]            
312         str     $Tlo,[sp,#$Boff+0]                
313         str     $Thi,[sp,#$Boff+4]                
314         str     $t0, [sp,#$Coff+0]                
315         str     $t1, [sp,#$Coff+4]                
316         str     $t2, [sp,#$Doff+0]                
317         str     $t3, [sp,#$Doff+4]                
318         ldr     $Tlo,[$ctx,#$Foff+$lo]            
319         ldr     $Thi,[$ctx,#$Foff+$hi]            
320         str     $Tlo,[sp,#$Foff+0]                
321         str     $Thi,[sp,#$Foff+4]                
322                                                   
323 .L00_15:                                          
324 #if __ARM_ARCH__<7                                
325         ldrb    $Tlo,[$inp,#7]                    
326         ldrb    $t0, [$inp,#6]                    
327         ldrb    $t1, [$inp,#5]                    
328         ldrb    $t2, [$inp,#4]                    
329         ldrb    $Thi,[$inp,#3]                    
330         ldrb    $t3, [$inp,#2]                    
331         orr     $Tlo,$Tlo,$t0,lsl#8               
332         ldrb    $t0, [$inp,#1]                    
333         orr     $Tlo,$Tlo,$t1,lsl#16              
334         ldrb    $t1, [$inp],#8                    
335         orr     $Tlo,$Tlo,$t2,lsl#24              
336         orr     $Thi,$Thi,$t3,lsl#8               
337         orr     $Thi,$Thi,$t0,lsl#16              
338         orr     $Thi,$Thi,$t1,lsl#24              
339 #else                                             
340         ldr     $Tlo,[$inp,#4]                    
341         ldr     $Thi,[$inp],#8                    
342 #ifdef __ARMEL__                                  
343         rev     $Tlo,$Tlo                         
344         rev     $Thi,$Thi                         
345 #endif                                            
346 #endif                                            
347 ___                                               
348         &BODY_00_15(0x94);                        
349 $code.=<<___;                                     
350         tst     $Ktbl,#1                          
351         beq     .L00_15                           
352         ldr     $t0,[sp,#`$Xoff+8*(16-1)`+0]      
353         ldr     $t1,[sp,#`$Xoff+8*(16-1)`+4]      
354         bic     $Ktbl,$Ktbl,#1                    
355 .L16_79:                                          
356         @ sigma0(x)     (ROTR((x),1)  ^ ROTR((    
357         @ LO            lo>>1^hi<<31  ^ lo>>8^    
358         @ HI            hi>>1^lo<<31  ^ hi>>8^    
359         mov     $Tlo,$t0,lsr#1                    
360         ldr     $t2,[sp,#`$Xoff+8*(16-14)`+0]     
361         mov     $Thi,$t1,lsr#1                    
362         ldr     $t3,[sp,#`$Xoff+8*(16-14)`+4]     
363         eor     $Tlo,$Tlo,$t1,lsl#31              
364         eor     $Thi,$Thi,$t0,lsl#31              
365         eor     $Tlo,$Tlo,$t0,lsr#8               
366         eor     $Thi,$Thi,$t1,lsr#8               
367         eor     $Tlo,$Tlo,$t1,lsl#24              
368         eor     $Thi,$Thi,$t0,lsl#24              
369         eor     $Tlo,$Tlo,$t0,lsr#7               
370         eor     $Thi,$Thi,$t1,lsr#7               
371         eor     $Tlo,$Tlo,$t1,lsl#25              
372                                                   
373         @ sigma1(x)     (ROTR((x),19) ^ ROTR((    
374         @ LO            lo>>19^hi<<13 ^ hi>>29    
375         @ HI            hi>>19^lo<<13 ^ lo>>29    
376         mov     $t0,$t2,lsr#19                    
377         mov     $t1,$t3,lsr#19                    
378         eor     $t0,$t0,$t3,lsl#13                
379         eor     $t1,$t1,$t2,lsl#13                
380         eor     $t0,$t0,$t3,lsr#29                
381         eor     $t1,$t1,$t2,lsr#29                
382         eor     $t0,$t0,$t2,lsl#3                 
383         eor     $t1,$t1,$t3,lsl#3                 
384         eor     $t0,$t0,$t2,lsr#6                 
385         eor     $t1,$t1,$t3,lsr#6                 
386         ldr     $t2,[sp,#`$Xoff+8*(16-9)`+0]      
387         eor     $t0,$t0,$t3,lsl#26                
388                                                   
389         ldr     $t3,[sp,#`$Xoff+8*(16-9)`+4]      
390         adds    $Tlo,$Tlo,$t0                     
391         ldr     $t0,[sp,#`$Xoff+8*16`+0]          
392         adc     $Thi,$Thi,$t1                     
393                                                   
394         ldr     $t1,[sp,#`$Xoff+8*16`+4]          
395         adds    $Tlo,$Tlo,$t2                     
396         adc     $Thi,$Thi,$t3                     
397         adds    $Tlo,$Tlo,$t0                     
398         adc     $Thi,$Thi,$t1                     
399 ___                                               
400         &BODY_00_15(0x17);                        
401 $code.=<<___;                                     
402 #if __ARM_ARCH__>=7                               
403         ittt    eq                      @ Thum    
404 #endif                                            
405         ldreq   $t0,[sp,#`$Xoff+8*(16-1)`+0]      
406         ldreq   $t1,[sp,#`$Xoff+8*(16-1)`+4]      
407         beq     .L16_79                           
408         bic     $Ktbl,$Ktbl,#1                    
409                                                   
410         ldr     $Tlo,[sp,#$Boff+0]                
411         ldr     $Thi,[sp,#$Boff+4]                
412         ldr     $t0, [$ctx,#$Aoff+$lo]            
413         ldr     $t1, [$ctx,#$Aoff+$hi]            
414         ldr     $t2, [$ctx,#$Boff+$lo]            
415         ldr     $t3, [$ctx,#$Boff+$hi]            
416         adds    $t0,$Alo,$t0                      
417         str     $t0, [$ctx,#$Aoff+$lo]            
418         adc     $t1,$Ahi,$t1                      
419         str     $t1, [$ctx,#$Aoff+$hi]            
420         adds    $t2,$Tlo,$t2                      
421         str     $t2, [$ctx,#$Boff+$lo]            
422         adc     $t3,$Thi,$t3                      
423         str     $t3, [$ctx,#$Boff+$hi]            
424                                                   
425         ldr     $Alo,[sp,#$Coff+0]                
426         ldr     $Ahi,[sp,#$Coff+4]                
427         ldr     $Tlo,[sp,#$Doff+0]                
428         ldr     $Thi,[sp,#$Doff+4]                
429         ldr     $t0, [$ctx,#$Coff+$lo]            
430         ldr     $t1, [$ctx,#$Coff+$hi]            
431         ldr     $t2, [$ctx,#$Doff+$lo]            
432         ldr     $t3, [$ctx,#$Doff+$hi]            
433         adds    $t0,$Alo,$t0                      
434         str     $t0, [$ctx,#$Coff+$lo]            
435         adc     $t1,$Ahi,$t1                      
436         str     $t1, [$ctx,#$Coff+$hi]            
437         adds    $t2,$Tlo,$t2                      
438         str     $t2, [$ctx,#$Doff+$lo]            
439         adc     $t3,$Thi,$t3                      
440         str     $t3, [$ctx,#$Doff+$hi]            
441                                                   
442         ldr     $Tlo,[sp,#$Foff+0]                
443         ldr     $Thi,[sp,#$Foff+4]                
444         ldr     $t0, [$ctx,#$Eoff+$lo]            
445         ldr     $t1, [$ctx,#$Eoff+$hi]            
446         ldr     $t2, [$ctx,#$Foff+$lo]            
447         ldr     $t3, [$ctx,#$Foff+$hi]            
448         adds    $Elo,$Elo,$t0                     
449         str     $Elo,[$ctx,#$Eoff+$lo]            
450         adc     $Ehi,$Ehi,$t1                     
451         str     $Ehi,[$ctx,#$Eoff+$hi]            
452         adds    $t2,$Tlo,$t2                      
453         str     $t2, [$ctx,#$Foff+$lo]            
454         adc     $t3,$Thi,$t3                      
455         str     $t3, [$ctx,#$Foff+$hi]            
456                                                   
457         ldr     $Alo,[sp,#$Goff+0]                
458         ldr     $Ahi,[sp,#$Goff+4]                
459         ldr     $Tlo,[sp,#$Hoff+0]                
460         ldr     $Thi,[sp,#$Hoff+4]                
461         ldr     $t0, [$ctx,#$Goff+$lo]            
462         ldr     $t1, [$ctx,#$Goff+$hi]            
463         ldr     $t2, [$ctx,#$Hoff+$lo]            
464         ldr     $t3, [$ctx,#$Hoff+$hi]            
465         adds    $t0,$Alo,$t0                      
466         str     $t0, [$ctx,#$Goff+$lo]            
467         adc     $t1,$Ahi,$t1                      
468         str     $t1, [$ctx,#$Goff+$hi]            
469         adds    $t2,$Tlo,$t2                      
470         str     $t2, [$ctx,#$Hoff+$lo]            
471         adc     $t3,$Thi,$t3                      
472         str     $t3, [$ctx,#$Hoff+$hi]            
473                                                   
474         add     sp,sp,#640                        
475         sub     $Ktbl,$Ktbl,#640                  
476                                                   
477         teq     $inp,$len                         
478         bne     .Loop                             
479                                                   
480         add     sp,sp,#8*9              @ dest    
481 #if __ARM_ARCH__>=5                               
482         ldmia   sp!,{r4-r12,pc}                   
483 #else                                             
484         ldmia   sp!,{r4-r12,lr}                   
485         tst     lr,#1                             
486         moveq   pc,lr                   @ be b    
487         bx      lr                      @ inte    
488 #endif                                            
489 .size   sha512_block_data_order,.-sha512_block    
490 ___                                               
491                                                   
492 {                                                 
493 my @Sigma0=(28,34,39);                            
494 my @Sigma1=(14,18,41);                            
495 my @sigma0=(1, 8, 7);                             
496 my @sigma1=(19,61,6);                             
497                                                   
498 my $Ktbl="r3";                                    
499 my $cnt="r12";  # volatile register known as i    
500                                                   
501 my @X=map("d$_",(0..15));                         
502 my @V=($A,$B,$C,$D,$E,$F,$G,$H)=map("d$_",(16.    
503                                                   
504 sub NEON_00_15() {                                
505 my $i=shift;                                      
506 my ($a,$b,$c,$d,$e,$f,$g,$h)=@_;                  
507 my ($t0,$t1,$t2,$T1,$K,$Ch,$Maj)=map("d$_",(24    
508                                                   
509 $code.=<<___ if ($i<16 || $i&1);                  
510         vshr.u64        $t0,$e,#@Sigma1[0]        
511 #if $i<16                                         
512         vld1.64         {@X[$i%16]},[$inp]!       
513 #endif                                            
514         vshr.u64        $t1,$e,#@Sigma1[1]        
515 #if $i>0                                          
516          vadd.i64       $a,$Maj                   
517 #endif                                            
518         vshr.u64        $t2,$e,#@Sigma1[2]        
519 ___                                               
520 $code.=<<___;                                     
521         vld1.64         {$K},[$Ktbl,:64]!         
522         vsli.64         $t0,$e,#`64-@Sigma1[0]    
523         vsli.64         $t1,$e,#`64-@Sigma1[1]    
524         vmov            $Ch,$e                    
525         vsli.64         $t2,$e,#`64-@Sigma1[2]    
526 #if $i<16 && defined(__ARMEL__)                   
527         vrev64.8        @X[$i],@X[$i]             
528 #endif                                            
529         veor            $t1,$t0                   
530         vbsl            $Ch,$f,$g                 
531         vshr.u64        $t0,$a,#@Sigma0[0]        
532         veor            $t2,$t1                   
533         vadd.i64        $T1,$Ch,$h                
534         vshr.u64        $t1,$a,#@Sigma0[1]        
535         vsli.64         $t0,$a,#`64-@Sigma0[0]    
536         vadd.i64        $T1,$t2                   
537         vshr.u64        $t2,$a,#@Sigma0[2]        
538         vadd.i64        $K,@X[$i%16]              
539         vsli.64         $t1,$a,#`64-@Sigma0[1]    
540         veor            $Maj,$a,$b                
541         vsli.64         $t2,$a,#`64-@Sigma0[2]    
542         veor            $h,$t0,$t1                
543         vadd.i64        $T1,$K                    
544         vbsl            $Maj,$c,$b                
545         veor            $h,$t2                    
546         vadd.i64        $d,$T1                    
547         vadd.i64        $Maj,$T1                  
548         @ vadd.i64      $h,$Maj                   
549 ___                                               
550 }                                                 
551                                                   
552 sub NEON_16_79() {                                
553 my $i=shift;                                      
554                                                   
555 if ($i&1)       { &NEON_00_15($i,@_); return;     
556                                                   
557 # 2x-vectorized, therefore runs every 2nd roun    
558 my @X=map("q$_",(0..7));                          
559 my ($t0,$t1,$s0,$s1) = map("q$_",(12..15));       
560 my ($d0,$d1,$d2) = map("d$_",(24..26));           
561 my $e=@_[4];                                      
562 $i /= 2;                                          
563 $code.=<<___;                                     
564         vshr.u64        $t0,@X[($i+7)%8],#@sig    
565         vshr.u64        $t1,@X[($i+7)%8],#@sig    
566          vadd.i64       @_[0],d30                 
567         vshr.u64        $s1,@X[($i+7)%8],#@sig    
568         vsli.64         $t0,@X[($i+7)%8],#`64-    
569         vext.8          $s0,@X[$i%8],@X[($i+1)    
570         vsli.64         $t1,@X[($i+7)%8],#`64-    
571         veor            $s1,$t0                   
572         vshr.u64        $t0,$s0,#@sigma0[0]       
573         veor            $s1,$t1                   
574         vshr.u64        $t1,$s0,#@sigma0[1]       
575         vadd.i64        @X[$i%8],$s1              
576         vshr.u64        $s1,$s0,#@sigma0[2]       
577         vsli.64         $t0,$s0,#`64-@sigma0[0    
578         vsli.64         $t1,$s0,#`64-@sigma0[1    
579         vext.8          $s0,@X[($i+4)%8],@X[($    
580         veor            $s1,$t0                   
581         vshr.u64        $d0,$e,#@Sigma1[0]        
582         vadd.i64        @X[$i%8],$s0              
583         vshr.u64        $d1,$e,#@Sigma1[1]        
584         veor            $s1,$t1                   
585         vshr.u64        $d2,$e,#@Sigma1[2]        
586         vadd.i64        @X[$i%8],$s1              
587 ___                                               
588         &NEON_00_15(2*$i,@_);                     
589 }                                                 
590                                                   
591 $code.=<<___;                                     
592 #if __ARM_MAX_ARCH__>=7                           
593 .arch   armv7-a                                   
594 .fpu    neon                                      
595                                                   
596 .global sha512_block_data_order_neon              
597 .type   sha512_block_data_order_neon,%function    
598 .align  4                                         
599 sha512_block_data_order_neon:                     
600 .LNEON:                                           
601         dmb                             @ erra    
602         add     $len,$inp,$len,lsl#7    @ len     
603         VFP_ABI_PUSH                              
604         adr     $Ktbl,.Lsha512_block_data_orde    
605         sub     $Ktbl,$Ktbl,.Lsha512_block_dat    
606         vldmia  $ctx,{$A-$H}            @ load    
607 .Loop_neon:                                       
608 ___                                               
609 for($i=0;$i<16;$i++)    { &NEON_00_15($i,@V);     
610 $code.=<<___;                                     
611         mov             $cnt,#4                   
612 .L16_79_neon:                                     
613         subs            $cnt,#1                   
614 ___                                               
615 for(;$i<32;$i++)        { &NEON_16_79($i,@V);     
616 $code.=<<___;                                     
617         bne             .L16_79_neon              
618                                                   
619          vadd.i64       $A,d30          @ h+=M    
620         vldmia          $ctx,{d24-d31}  @ load    
621         vadd.i64        q8,q12          @ vect    
622         vadd.i64        q9,q13                    
623         vadd.i64        q10,q14                   
624         vadd.i64        q11,q15                   
625         vstmia          $ctx,{$A-$H}    @ save    
626         teq             $inp,$len                 
627         sub             $Ktbl,#640      @ rewi    
628         bne             .Loop_neon                
629                                                   
630         VFP_ABI_POP                               
631         ret                             @ bx l    
632 .size   sha512_block_data_order_neon,.-sha512_    
633 #endif                                            
634 ___                                               
635 }                                                 
636 $code.=<<___;                                     
637 .asciz  "SHA512 block transform for ARMv4/NEON<    
638 .align  2                                         
639 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__    
640 .comm   OPENSSL_armcap_P,4,4                      
641 #endif                                            
642 ___                                               
643                                                   
644 $code =~ s/\`([^\`]*)\`/eval $1/gem;              
645 $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;      
646 $code =~ s/\bret\b/bx   lr/gm;                    
647                                                   
648 open SELF,$0;                                     
649 while(<SELF>) {                                   
650         next if (/^#!/);                          
651         last if (!s/^#/@/ and !/^$/);             
652         print;                                    
653 }                                                 
654 close SELF;                                       
655                                                   
656 print $code;                                      
657 close STDOUT; # enforce flush                     
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php