~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm/crypto/sha256-armv4.pl

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/arm/crypto/sha256-armv4.pl (Version linux-6.12-rc7) and /arch/sparc/crypto/sha256-armv4.pl (Version linux-5.13.19)


  1 #!/usr/bin/env perl                               
  2 # SPDX-License-Identifier: GPL-2.0                
  3                                                   
  4 # This code is taken from the OpenSSL project     
  5 # has relicensed it under the GPLv2. Therefore    
  6 # you can redistribute it and/or modify it und    
  7 # Public License version 2 as published by the    
  8 #                                                 
  9 # The original headers, including the original    
 10 # included below for completeness.                
 11                                                   
 12 # ============================================    
 13 # Written by Andy Polyakov <appro@openssl.org>     
 14 # project. The module is, however, dual licens    
 15 # CRYPTOGAMS licenses depending on where you o    
 16 # details see https://www.openssl.org/~appro/c    
 17 # ============================================    
 18                                                   
 19 # SHA256 block procedure for ARMv4. May 2007.     
 20                                                   
 21 # Performance is ~2x better than gcc 3.4 gener    
 22 # lute" terms is ~2250 cycles per 64-byte bloc    
 23 # byte [on single-issue Xscale PXA250 core].      
 24                                                   
 25 # July 2010.                                      
 26 #                                                 
 27 # Rescheduling for dual-issue pipeline resulte    
 28 # Cortex A8 core and ~20 cycles per processed     
 29                                                   
 30 # February 2011.                                  
 31 #                                                 
 32 # Profiler-assisted and platform-specific opti    
 33 # improvement on Cortex A8 core and ~15.4 cycl    
 34                                                   
 35 # September 2013.                                 
 36 #                                                 
 37 # Add NEON implementation. On Cortex A8 it was    
 38 # byte in 12.5 cycles or 23% faster than integ    
 39 # S4 does it in 12.5 cycles too, but it's 50%     
 40 # code (meaning that latter performs sub-optim    
 41 # about it).                                      
 42                                                   
 43 # May 2014.                                       
 44 #                                                 
 45 # Add ARMv8 code path performing at 2.0 cpb on    
 46                                                   
 47 while (($output=shift) && ($output!~/^\w[\w\-]    
 48 open STDOUT,">$output";                           
 49                                                   
 50 $ctx="r0";      $t0="r0";                         
 51 $inp="r1";      $t4="r1";                         
 52 $len="r2";      $t1="r2";                         
 53 $T1="r3";       $t3="r3";                         
 54 $A="r4";                                          
 55 $B="r5";                                          
 56 $C="r6";                                          
 57 $D="r7";                                          
 58 $E="r8";                                          
 59 $F="r9";                                          
 60 $G="r10";                                         
 61 $H="r11";                                         
 62 @V=($A,$B,$C,$D,$E,$F,$G,$H);                     
 63 $t2="r12";                                        
 64 $Ktbl="r14";                                      
 65                                                   
 66 @Sigma0=( 2,13,22);                               
 67 @Sigma1=( 6,11,25);                               
 68 @sigma0=( 7,18, 3);                               
 69 @sigma1=(17,19,10);                               
 70                                                   
 71 sub BODY_00_15 {                                  
 72 my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;             
 73                                                   
 74 $code.=<<___ if ($i<16);                          
 75 #if __ARM_ARCH__>=7                               
 76         @ ldr   $t1,[$inp],#4                     
 77 # if $i==15                                       
 78         str     $inp,[sp,#17*4]                   
 79 # endif                                           
 80         eor     $t0,$e,$e,ror#`$Sigma1[1]-$Sig    
 81         add     $a,$a,$t2                         
 82         eor     $t0,$t0,$e,ror#`$Sigma1[2]-$Si    
 83 # ifndef __ARMEB__                                
 84         rev     $t1,$t1                           
 85 # endif                                           
 86 #else                                             
 87         @ ldrb  $t1,[$inp,#3]                     
 88         add     $a,$a,$t2                         
 89         ldrb    $t2,[$inp,#2]                     
 90         ldrb    $t0,[$inp,#1]                     
 91         orr     $t1,$t1,$t2,lsl#8                 
 92         ldrb    $t2,[$inp],#4                     
 93         orr     $t1,$t1,$t0,lsl#16                
 94 # if $i==15                                       
 95         str     $inp,[sp,#17*4]                   
 96 # endif                                           
 97         eor     $t0,$e,$e,ror#`$Sigma1[1]-$Sig    
 98         orr     $t1,$t1,$t2,lsl#24                
 99         eor     $t0,$t0,$e,ror#`$Sigma1[2]-$Si    
100 #endif                                            
101 ___                                               
102 $code.=<<___;                                     
103         ldr     $t2,[$Ktbl],#4                    
104         add     $h,$h,$t1                         
105         str     $t1,[sp,#`$i%16`*4]               
106         eor     $t1,$f,$g                         
107         add     $h,$h,$t0,ror#$Sigma1[0]          
108         and     $t1,$t1,$e                        
109         add     $h,$h,$t2                         
110         eor     $t1,$t1,$g                        
111         eor     $t0,$a,$a,ror#`$Sigma0[1]-$Sig    
112         add     $h,$h,$t1                         
113 #if $i==31                                        
114         and     $t2,$t2,#0xff                     
115         cmp     $t2,#0xf2                         
116 #endif                                            
117 #if $i<15                                         
118 # if __ARM_ARCH__>=7                              
119         ldr     $t1,[$inp],#4                     
120 # else                                            
121         ldrb    $t1,[$inp,#3]                     
122 # endif                                           
123         eor     $t2,$a,$b                         
124 #else                                             
125         ldr     $t1,[sp,#`($i+2)%16`*4]           
126         eor     $t2,$a,$b                         
127         ldr     $t4,[sp,#`($i+15)%16`*4]          
128 #endif                                            
129         eor     $t0,$t0,$a,ror#`$Sigma0[2]-$Si    
130         and     $t3,$t3,$t2                       
131         add     $d,$d,$h                          
132         eor     $t3,$t3,$b                        
133         add     $h,$h,$t0,ror#$Sigma0[0]          
134         @ add   $h,$h,$t3                         
135 ___                                               
136         ($t2,$t3)=($t3,$t2);                      
137 }                                                 
138                                                   
139 sub BODY_16_XX {                                  
140 my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;             
141                                                   
142 $code.=<<___;                                     
143         @ ldr   $t1,[sp,#`($i+1)%16`*4]           
144         @ ldr   $t4,[sp,#`($i+14)%16`*4]          
145         mov     $t0,$t1,ror#$sigma0[0]            
146         add     $a,$a,$t2                         
147         mov     $t2,$t4,ror#$sigma1[0]            
148         eor     $t0,$t0,$t1,ror#$sigma0[1]        
149         eor     $t2,$t2,$t4,ror#$sigma1[1]        
150         eor     $t0,$t0,$t1,lsr#$sigma0[2]        
151         ldr     $t1,[sp,#`($i+0)%16`*4]           
152         eor     $t2,$t2,$t4,lsr#$sigma1[2]        
153         ldr     $t4,[sp,#`($i+9)%16`*4]           
154                                                   
155         add     $t2,$t2,$t0                       
156         eor     $t0,$e,$e,ror#`$Sigma1[1]-$Sig    
157         add     $t1,$t1,$t2                       
158         eor     $t0,$t0,$e,ror#`$Sigma1[2]-$Si    
159         add     $t1,$t1,$t4                       
160 ___                                               
161         &BODY_00_15(@_);                          
162 }                                                 
163                                                   
164 $code=<<___;                                      
165 #ifndef __KERNEL__                                
166 # include "arm_arch.h"                            
167 #else                                             
168 # define __ARM_ARCH__ __LINUX_ARM_ARCH__          
169 # define __ARM_MAX_ARCH__ 7                       
170 #endif                                            
171                                                   
172 .text                                             
173 #if __ARM_ARCH__<7                                
174 .code   32                                        
175 #else                                             
176 .syntax unified                                   
177 # ifdef __thumb2__                                
178 .thumb                                            
179 # else                                            
180 .code   32                                        
181 # endif                                           
182 #endif                                            
183                                                   
184 .type   K256,%object                              
185 .align  5                                         
186 K256:                                             
187 .word   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b    
188 .word   0x3956c25b,0x59f111f1,0x923f82a4,0xab1    
189 .word   0xd807aa98,0x12835b01,0x243185be,0x550    
190 .word   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19    
191 .word   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240    
192 .word   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f    
193 .word   0x983e5152,0xa831c66d,0xb00327c8,0xbf5    
194 .word   0xc6e00bf3,0xd5a79147,0x06ca6351,0x142    
195 .word   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x533    
196 .word   0x650a7354,0x766a0abb,0x81c2c92e,0x927    
197 .word   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76    
198 .word   0xd192e819,0xd6990624,0xf40e3585,0x106    
199 .word   0x19a4c116,0x1e376c08,0x2748774c,0x34b    
200 .word   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682    
201 .word   0x748f82ee,0x78a5636f,0x84c87814,0x8cc    
202 .word   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67    
203 .size   K256,.-K256                               
204 .word   0                               @ term    
205 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__    
206 .LOPENSSL_armcap:                                 
207 .word   OPENSSL_armcap_P-sha256_block_data_ord    
208 #endif                                            
209 .align  5                                         
210                                                   
211 .global sha256_block_data_order                   
212 .type   sha256_block_data_order,%function         
213 sha256_block_data_order:                          
214 .Lsha256_block_data_order:                        
215 #if __ARM_ARCH__<7                                
216         sub     r3,pc,#8                @ sha2    
217 #else                                             
218         adr     r3,.Lsha256_block_data_order      
219 #endif                                            
220 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__    
221         ldr     r12,.LOPENSSL_armcap              
222         ldr     r12,[r3,r12]            @ OPEN    
223         tst     r12,#ARMV8_SHA256                 
224         bne     .LARMv8                           
225         tst     r12,#ARMV7_NEON                   
226         bne     .LNEON                            
227 #endif                                            
228         add     $len,$inp,$len,lsl#6    @ len     
229         stmdb   sp!,{$ctx,$inp,$len,r4-r11,lr}    
230         ldmia   $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}    
231         sub     $Ktbl,r3,#256+32        @ K256    
232         sub     sp,sp,#16*4             @ allo    
233 .Loop:                                            
234 # if __ARM_ARCH__>=7                              
235         ldr     $t1,[$inp],#4                     
236 # else                                            
237         ldrb    $t1,[$inp,#3]                     
238 # endif                                           
239         eor     $t3,$B,$C               @ magi    
240         eor     $t2,$t2,$t2                       
241 ___                                               
242 for($i=0;$i<16;$i++)    { &BODY_00_15($i,@V);     
243 $code.=".Lrounds_16_xx:\n";                       
244 for (;$i<32;$i++)       { &BODY_16_XX($i,@V);     
245 $code.=<<___;                                     
246 #if __ARM_ARCH__>=7                               
247         ite     eq                      @ Thum    
248 #endif                                            
249         ldreq   $t3,[sp,#16*4]          @ pull    
250         bne     .Lrounds_16_xx                    
251                                                   
252         add     $A,$A,$t2               @ h+=M    
253         ldr     $t0,[$t3,#0]                      
254         ldr     $t1,[$t3,#4]                      
255         ldr     $t2,[$t3,#8]                      
256         add     $A,$A,$t0                         
257         ldr     $t0,[$t3,#12]                     
258         add     $B,$B,$t1                         
259         ldr     $t1,[$t3,#16]                     
260         add     $C,$C,$t2                         
261         ldr     $t2,[$t3,#20]                     
262         add     $D,$D,$t0                         
263         ldr     $t0,[$t3,#24]                     
264         add     $E,$E,$t1                         
265         ldr     $t1,[$t3,#28]                     
266         add     $F,$F,$t2                         
267         ldr     $inp,[sp,#17*4]         @ pull    
268         ldr     $t2,[sp,#18*4]          @ pull    
269         add     $G,$G,$t0                         
270         add     $H,$H,$t1                         
271         stmia   $t3,{$A,$B,$C,$D,$E,$F,$G,$H}     
272         cmp     $inp,$t2                          
273         sub     $Ktbl,$Ktbl,#256        @ rewi    
274         bne     .Loop                             
275                                                   
276         add     sp,sp,#`16+3`*4 @ destroy fram    
277 #if __ARM_ARCH__>=5                               
278         ldmia   sp!,{r4-r11,pc}                   
279 #else                                             
280         ldmia   sp!,{r4-r11,lr}                   
281         tst     lr,#1                             
282         moveq   pc,lr                   @ be b    
283         bx      lr                      @ inte    
284 #endif                                            
285 .size   sha256_block_data_order,.-sha256_block    
286 ___                                               
287 ##############################################    
288 # NEON stuff                                      
289 #                                                 
290 {{{                                               
291 my @X=map("q$_",(0..3));                          
292 my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10",    
293 my $Xfer=$t4;                                     
294 my $j=0;                                          
295                                                   
296 sub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2    
297 sub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2    
298                                                   
299 sub AUTOLOAD()          # thunk [simplified] x    
300 { my $opcode = $AUTOLOAD; $opcode =~ s/.*:://;    
301   my $arg = pop;                                  
302     $arg = "#$arg" if ($arg*1 eq $arg);           
303     $code .= "\t$opcode\t".join(',',@_,$arg)."    
304 }                                                 
305                                                   
306 sub Xupdate()                                     
307 { use integer;                                    
308   my $body = shift;                               
309   my @insns = (&$body,&$body,&$body,&$body);      
310   my ($a,$b,$c,$d,$e,$f,$g,$h);                   
311                                                   
312         &vext_8         ($T0,@X[0],@X[1],4);      
313          eval(shift(@insns));                     
314          eval(shift(@insns));                     
315          eval(shift(@insns));                     
316         &vext_8         ($T1,@X[2],@X[3],4);      
317          eval(shift(@insns));                     
318          eval(shift(@insns));                     
319          eval(shift(@insns));                     
320         &vshr_u32       ($T2,$T0,$sigma0[0]);     
321          eval(shift(@insns));                     
322          eval(shift(@insns));                     
323         &vadd_i32       (@X[0],@X[0],$T1);        
324          eval(shift(@insns));                     
325          eval(shift(@insns));                     
326         &vshr_u32       ($T1,$T0,$sigma0[2]);     
327          eval(shift(@insns));                     
328          eval(shift(@insns));                     
329         &vsli_32        ($T2,$T0,32-$sigma0[0]    
330          eval(shift(@insns));                     
331          eval(shift(@insns));                     
332         &vshr_u32       ($T3,$T0,$sigma0[1]);     
333          eval(shift(@insns));                     
334          eval(shift(@insns));                     
335         &veor           ($T1,$T1,$T2);            
336          eval(shift(@insns));                     
337          eval(shift(@insns));                     
338         &vsli_32        ($T3,$T0,32-$sigma0[1]    
339          eval(shift(@insns));                     
340          eval(shift(@insns));                     
341           &vshr_u32     ($T4,&Dhi(@X[3]),$sigm    
342          eval(shift(@insns));                     
343          eval(shift(@insns));                     
344         &veor           ($T1,$T1,$T3);            
345          eval(shift(@insns));                     
346          eval(shift(@insns));                     
347           &vsli_32      ($T4,&Dhi(@X[3]),32-$s    
348          eval(shift(@insns));                     
349          eval(shift(@insns));                     
350           &vshr_u32     ($T5,&Dhi(@X[3]),$sigm    
351          eval(shift(@insns));                     
352          eval(shift(@insns));                     
353         &vadd_i32       (@X[0],@X[0],$T1);        
354          eval(shift(@insns));                     
355          eval(shift(@insns));                     
356           &veor         ($T5,$T5,$T4);            
357          eval(shift(@insns));                     
358          eval(shift(@insns));                     
359           &vshr_u32     ($T4,&Dhi(@X[3]),$sigm    
360          eval(shift(@insns));                     
361          eval(shift(@insns));                     
362           &vsli_32      ($T4,&Dhi(@X[3]),32-$s    
363          eval(shift(@insns));                     
364          eval(shift(@insns));                     
365           &veor         ($T5,$T5,$T4);            
366          eval(shift(@insns));                     
367          eval(shift(@insns));                     
368         &vadd_i32       (&Dlo(@X[0]),&Dlo(@X[0    
369          eval(shift(@insns));                     
370          eval(shift(@insns));                     
371           &vshr_u32     ($T4,&Dlo(@X[0]),$sigm    
372          eval(shift(@insns));                     
373          eval(shift(@insns));                     
374           &vsli_32      ($T4,&Dlo(@X[0]),32-$s    
375          eval(shift(@insns));                     
376          eval(shift(@insns));                     
377           &vshr_u32     ($T5,&Dlo(@X[0]),$sigm    
378          eval(shift(@insns));                     
379          eval(shift(@insns));                     
380           &veor         ($T5,$T5,$T4);            
381          eval(shift(@insns));                     
382          eval(shift(@insns));                     
383           &vshr_u32     ($T4,&Dlo(@X[0]),$sigm    
384          eval(shift(@insns));                     
385          eval(shift(@insns));                     
386         &vld1_32        ("{$T0}","[$Ktbl,:128]    
387          eval(shift(@insns));                     
388          eval(shift(@insns));                     
389           &vsli_32      ($T4,&Dlo(@X[0]),32-$s    
390          eval(shift(@insns));                     
391          eval(shift(@insns));                     
392           &veor         ($T5,$T5,$T4);            
393          eval(shift(@insns));                     
394          eval(shift(@insns));                     
395         &vadd_i32       (&Dhi(@X[0]),&Dhi(@X[0    
396          eval(shift(@insns));                     
397          eval(shift(@insns));                     
398         &vadd_i32       ($T0,$T0,@X[0]);          
399          while($#insns>=2) { eval(shift(@insns    
400         &vst1_32        ("{$T0}","[$Xfer,:128]    
401          eval(shift(@insns));                     
402          eval(shift(@insns));                     
403                                                   
404         push(@X,shift(@X));             # "rot    
405 }                                                 
406                                                   
407 sub Xpreload()                                    
408 { use integer;                                    
409   my $body = shift;                               
410   my @insns = (&$body,&$body,&$body,&$body);      
411   my ($a,$b,$c,$d,$e,$f,$g,$h);                   
412                                                   
413          eval(shift(@insns));                     
414          eval(shift(@insns));                     
415          eval(shift(@insns));                     
416          eval(shift(@insns));                     
417         &vld1_32        ("{$T0}","[$Ktbl,:128]    
418          eval(shift(@insns));                     
419          eval(shift(@insns));                     
420          eval(shift(@insns));                     
421          eval(shift(@insns));                     
422         &vrev32_8       (@X[0],@X[0]);            
423          eval(shift(@insns));                     
424          eval(shift(@insns));                     
425          eval(shift(@insns));                     
426          eval(shift(@insns));                     
427         &vadd_i32       ($T0,$T0,@X[0]);          
428          foreach (@insns) { eval; }     # rema    
429         &vst1_32        ("{$T0}","[$Xfer,:128]    
430                                                   
431         push(@X,shift(@X));             # "rot    
432 }                                                 
433                                                   
434 sub body_00_15 () {                               
435         (                                         
436         '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.          
437         '&add   ($h,$h,$t1)',                     
438         '&eor   ($t1,$f,$g)',                     
439         '&eor   ($t0,$e,$e,"ror#".($Sigma1[1]-    
440         '&add   ($a,$a,$t2)',                     
441         '&and   ($t1,$t1,$e)',                    
442         '&eor   ($t2,$t0,$e,"ror#".($Sigma1[2]    
443         '&eor   ($t0,$a,$a,"ror#".($Sigma0[1]-    
444         '&eor   ($t1,$t1,$g)',                    
445         '&add   ($h,$h,$t2,"ror#$Sigma1[0]")',    
446         '&eor   ($t2,$a,$b)',                     
447         '&eor   ($t0,$t0,$a,"ror#".($Sigma0[2]    
448         '&add   ($h,$h,$t1)',                     
449         '&ldr   ($t1,sprintf "[sp,#%d]",4*(($j    
450         '&ldr   ($t1,"[$Ktbl]")                   
451         '&ldr   ($t1,"[sp,#64]")                  
452         '&and   ($t3,$t3,$t2)',                   
453         '&add   ($d,$d,$h)',                      
454         '&add   ($h,$h,$t0,"ror#$Sigma0[0]");'    
455         '&eor   ($t3,$t3,$b)',                    
456         '$j++;  unshift(@V,pop(@V)); ($t2,$t3)    
457         )                                         
458 }                                                 
459                                                   
460 $code.=<<___;                                     
461 #if __ARM_MAX_ARCH__>=7                           
462 .arch   armv7-a                                   
463 .fpu    neon                                      
464                                                   
465 .global sha256_block_data_order_neon              
466 .type   sha256_block_data_order_neon,%function    
467 .align  4                                         
468 sha256_block_data_order_neon:                     
469 .LNEON:                                           
470         stmdb   sp!,{r4-r12,lr}                   
471                                                   
472         sub     $H,sp,#16*4+16                    
473         adr     $Ktbl,.Lsha256_block_data_orde    
474         sub     $Ktbl,$Ktbl,#.Lsha256_block_da    
475         bic     $H,$H,#15               @ alig    
476         mov     $t2,sp                            
477         mov     sp,$H                   @ allo    
478         add     $len,$inp,$len,lsl#6    @ len     
479                                                   
480         vld1.8          {@X[0]},[$inp]!           
481         vld1.8          {@X[1]},[$inp]!           
482         vld1.8          {@X[2]},[$inp]!           
483         vld1.8          {@X[3]},[$inp]!           
484         vld1.32         {$T0},[$Ktbl,:128]!       
485         vld1.32         {$T1},[$Ktbl,:128]!       
486         vld1.32         {$T2},[$Ktbl,:128]!       
487         vld1.32         {$T3},[$Ktbl,:128]!       
488         vrev32.8        @X[0],@X[0]               
489         str             $ctx,[sp,#64]             
490         vrev32.8        @X[1],@X[1]               
491         str             $inp,[sp,#68]             
492         mov             $Xfer,sp                  
493         vrev32.8        @X[2],@X[2]               
494         str             $len,[sp,#72]             
495         vrev32.8        @X[3],@X[3]               
496         str             $t2,[sp,#76]              
497         vadd.i32        $T0,$T0,@X[0]             
498         vadd.i32        $T1,$T1,@X[1]             
499         vst1.32         {$T0},[$Xfer,:128]!       
500         vadd.i32        $T2,$T2,@X[2]             
501         vst1.32         {$T1},[$Xfer,:128]!       
502         vadd.i32        $T3,$T3,@X[3]             
503         vst1.32         {$T2},[$Xfer,:128]!       
504         vst1.32         {$T3},[$Xfer,:128]!       
505                                                   
506         ldmia           $ctx,{$A-$H}              
507         sub             $Xfer,$Xfer,#64           
508         ldr             $t1,[sp,#0]               
509         eor             $t2,$t2,$t2               
510         eor             $t3,$B,$C                 
511         b               .L_00_48                  
512                                                   
513 .align  4                                         
514 .L_00_48:                                         
515 ___                                               
516         &Xupdate(\&body_00_15);                   
517         &Xupdate(\&body_00_15);                   
518         &Xupdate(\&body_00_15);                   
519         &Xupdate(\&body_00_15);                   
520 $code.=<<___;                                     
521         teq     $t1,#0                            
522         ldr     $t1,[sp,#0]                       
523         sub     $Xfer,$Xfer,#64                   
524         bne     .L_00_48                          
525                                                   
526         ldr             $inp,[sp,#68]             
527         ldr             $t0,[sp,#72]              
528         sub             $Ktbl,$Ktbl,#256          
529         teq             $inp,$t0                  
530         it              eq                        
531         subeq           $inp,$inp,#64             
532         vld1.8          {@X[0]},[$inp]!           
533         vld1.8          {@X[1]},[$inp]!           
534         vld1.8          {@X[2]},[$inp]!           
535         vld1.8          {@X[3]},[$inp]!           
536         it              ne                        
537         strne           $inp,[sp,#68]             
538         mov             $Xfer,sp                  
539 ___                                               
540         &Xpreload(\&body_00_15);                  
541         &Xpreload(\&body_00_15);                  
542         &Xpreload(\&body_00_15);                  
543         &Xpreload(\&body_00_15);                  
544 $code.=<<___;                                     
545         ldr     $t0,[$t1,#0]                      
546         add     $A,$A,$t2                         
547         ldr     $t2,[$t1,#4]                      
548         ldr     $t3,[$t1,#8]                      
549         ldr     $t4,[$t1,#12]                     
550         add     $A,$A,$t0                         
551         ldr     $t0,[$t1,#16]                     
552         add     $B,$B,$t2                         
553         ldr     $t2,[$t1,#20]                     
554         add     $C,$C,$t3                         
555         ldr     $t3,[$t1,#24]                     
556         add     $D,$D,$t4                         
557         ldr     $t4,[$t1,#28]                     
558         add     $E,$E,$t0                         
559         str     $A,[$t1],#4                       
560         add     $F,$F,$t2                         
561         str     $B,[$t1],#4                       
562         add     $G,$G,$t3                         
563         str     $C,[$t1],#4                       
564         add     $H,$H,$t4                         
565         str     $D,[$t1],#4                       
566         stmia   $t1,{$E-$H}                       
567                                                   
568         ittte   ne                                
569         movne   $Xfer,sp                          
570         ldrne   $t1,[sp,#0]                       
571         eorne   $t2,$t2,$t2                       
572         ldreq   sp,[sp,#76]                       
573         itt     ne                                
574         eorne   $t3,$B,$C                         
575         bne     .L_00_48                          
576                                                   
577         ldmia   sp!,{r4-r12,pc}                   
578 .size   sha256_block_data_order_neon,.-sha256_    
579 #endif                                            
580 ___                                               
581 }}}                                               
582 ##############################################    
583 # ARMv8 stuff                                     
584 #                                                 
585 {{{                                               
586 my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));         
587 my @MSG=map("q$_",(8..11));                       
588 my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(    
589 my $Ktbl="r3";                                    
590                                                   
591 $code.=<<___;                                     
592 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__    
593                                                   
594 # ifdef __thumb2__                                
595 #  define INST(a,b,c,d) .byte   c,d|0xc,a,b       
596 # else                                            
597 #  define INST(a,b,c,d) .byte   a,b,c,d           
598 # endif                                           
599                                                   
600 .type   sha256_block_data_order_armv8,%functio    
601 .align  5                                         
602 sha256_block_data_order_armv8:                    
603 .LARMv8:                                          
604         vld1.32 {$ABCD,$EFGH},[$ctx]              
605 # ifdef __thumb2__                                
606         adr     $Ktbl,.LARMv8                     
607         sub     $Ktbl,$Ktbl,#.LARMv8-K256         
608 # else                                            
609         adrl    $Ktbl,K256                        
610 # endif                                           
611         add     $len,$inp,$len,lsl#6    @ len     
612                                                   
613 .Loop_v8:                                         
614         vld1.8          {@MSG[0]-@MSG[1]},[$in    
615         vld1.8          {@MSG[2]-@MSG[3]},[$in    
616         vld1.32         {$W0},[$Ktbl]!            
617         vrev32.8        @MSG[0],@MSG[0]           
618         vrev32.8        @MSG[1],@MSG[1]           
619         vrev32.8        @MSG[2],@MSG[2]           
620         vrev32.8        @MSG[3],@MSG[3]           
621         vmov            $ABCD_SAVE,$ABCD          
622         vmov            $EFGH_SAVE,$EFGH          
623         teq             $inp,$len                 
624 ___                                               
625 for($i=0;$i<12;$i++) {                            
626 $code.=<<___;                                     
627         vld1.32         {$W1},[$Ktbl]!            
628         vadd.i32        $W0,$W0,@MSG[0]           
629         sha256su0       @MSG[0],@MSG[1]           
630         vmov            $abcd,$ABCD               
631         sha256h         $ABCD,$EFGH,$W0           
632         sha256h2        $EFGH,$abcd,$W0           
633         sha256su1       @MSG[0],@MSG[2],@MSG[3    
634 ___                                               
635         ($W0,$W1)=($W1,$W0);    push(@MSG,shif    
636 }                                                 
637 $code.=<<___;                                     
638         vld1.32         {$W1},[$Ktbl]!            
639         vadd.i32        $W0,$W0,@MSG[0]           
640         vmov            $abcd,$ABCD               
641         sha256h         $ABCD,$EFGH,$W0           
642         sha256h2        $EFGH,$abcd,$W0           
643                                                   
644         vld1.32         {$W0},[$Ktbl]!            
645         vadd.i32        $W1,$W1,@MSG[1]           
646         vmov            $abcd,$ABCD               
647         sha256h         $ABCD,$EFGH,$W1           
648         sha256h2        $EFGH,$abcd,$W1           
649                                                   
650         vld1.32         {$W1},[$Ktbl]             
651         vadd.i32        $W0,$W0,@MSG[2]           
652         sub             $Ktbl,$Ktbl,#256-16       
653         vmov            $abcd,$ABCD               
654         sha256h         $ABCD,$EFGH,$W0           
655         sha256h2        $EFGH,$abcd,$W0           
656                                                   
657         vadd.i32        $W1,$W1,@MSG[3]           
658         vmov            $abcd,$ABCD               
659         sha256h         $ABCD,$EFGH,$W1           
660         sha256h2        $EFGH,$abcd,$W1           
661                                                   
662         vadd.i32        $ABCD,$ABCD,$ABCD_SAVE    
663         vadd.i32        $EFGH,$EFGH,$EFGH_SAVE    
664         it              ne                        
665         bne             .Loop_v8                  
666                                                   
667         vst1.32         {$ABCD,$EFGH},[$ctx]      
668                                                   
669         ret             @ bx lr                   
670 .size   sha256_block_data_order_armv8,.-sha256    
671 #endif                                            
672 ___                                               
673 }}}                                               
674 $code.=<<___;                                     
675 .asciz  "SHA256 block transform for ARMv4/NEON<    
676 .align  2                                         
677 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__    
678 .comm   OPENSSL_armcap_P,4,4                      
679 #endif                                            
680 ___                                               
681                                                   
682 open SELF,$0;                                     
683 while(<SELF>) {                                   
684         next if (/^#!/);                          
685         last if (!s/^#/@/ and !/^$/);             
686         print;                                    
687 }                                                 
688 close SELF;                                       
689                                                   
690 {   my  %opcode = (                               
691         "sha256h"       => 0xf3000c40,  "sha25    
692         "sha256su0"     => 0xf3ba03c0,  "sha25    
693                                                   
694     sub unsha256 {                                
695         my ($mnemonic,$arg)=@_;                   
696                                                   
697         if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+    
698             my $word = $opcode{$mnemonic}|(($1    
699                                          |(($2    
700                                          |(($3    
701             # since ARMv7 instructions are alw    
702             # correct solution is to use .inst    
703             # assemblers don't implement it:-(    
704             sprintf "INST(0x%02x,0x%02x,0x%02x    
705                         $word&0xff,($word>>8)&    
706                         ($word>>16)&0xff,($wor    
707                         $mnemonic,$arg;           
708         }                                         
709     }                                             
710 }                                                 
711                                                   
712 foreach (split($/,$code)) {                       
713                                                   
714         s/\`([^\`]*)\`/eval $1/geo;               
715                                                   
716         s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2    
717                                                   
718         s/\bret\b/bx    lr/go           or        
719         s/\bbx\s+lr\b/.word\t0xe12fff1e/go;       
720                                                   
721         print $_,"\n";                            
722 }                                                 
723                                                   
724 close STDOUT; # enforce flush                     
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php