1 #! /usr/bin/env perl 2 # SPDX-License-Identifier: GPL-2.0 3 4 # This code is taken from CRYPTOGAMs[1] and is 5 # in the license to distribute the code under 6 # is free software; you can redistribute it an 7 # the GNU General Public License version 2 as 8 # Foundation. 9 # 10 # [1] https://www.openssl.org/~appro/cryptogam 11 12 # Copyright (c) 2006-2017, CRYPTOGAMS by <appro 13 # All rights reserved. 14 # 15 # Redistribution and use in source and binary 16 # modification, are permitted provided that th 17 # are met: 18 # 19 # * Redistributions of source code must 20 # this list of conditions and the foll 21 # 22 # * Redistributions in binary form must 23 # copyright notice, this list of condi 24 # disclaimer in the documentation and/ 25 # provided with the distribution. 26 # 27 # * Neither the name of the CRYPTOGAMS n 28 # copyright holder and contributors ma 29 # promote products derived from this s 30 # prior written permission. 31 # 32 # ALTERNATIVELY, provided that this notice is 33 # product may be distributed under the terms o 34 # License (GPL), in which case the provisions 35 # those given above. 36 # 37 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT H 38 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIE 39 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHA 40 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO E 41 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRE 42 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 43 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 44 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 45 # THEORY OF LIABILITY, WHETHER IN CONTRACT, ST 46 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 47 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POS 48 49 # ============================================ 50 # Written by Andy Polyakov <appro@openssl.org> 51 # project. The module is, however, dual licens 52 # CRYPTOGAMS licenses depending on where you o 53 # details see https://www.openssl.org/~appro/c 54 # ============================================ 55 # 56 # This module implements support for AES instr 57 # specification version 2.07, first implemente 58 # The module is endian-agnostic in sense that 59 # and little-endian cases. Data alignment in p 60 # handled with VSX loads and stores, which imp 61 # set. It should also be noted that ISA specif 62 # alignment exceptions for these instructions 63 # Initially alignment was handled in pure Alti 64 # is aligned programmatically, which in turn g 65 # free execution], but it turned to hamper per 66 # instructions are interleaved. It's reckoned 67 # misalignment penalties at page boundaries ar 68 # than additional overhead in pure AltiVec app 69 # 70 # May 2016 71 # 72 # Add XTS subroutine, 9x on little- and 12x im 73 # systems were measured. 74 # 75 ############################################## 76 # Current large-block performance in cycles pe 77 # 128-bit key (less is better). 78 # 79 # CBC en-/decrypt CTR XTS 80 # POWER8[le] 3.96/0.72 0.74 1.1 81 # POWER8[be] 3.75/0.65 0.66 1.0 82 83 $flavour = shift; 84 85 if ($flavour =~ /64/) { 86 $SIZE_T =8; 87 $LRSAVE =2*$SIZE_T; 88 $STU ="stdu"; 89 $POP ="ld"; 90 $PUSH ="std"; 91 $UCMP ="cmpld"; 92 $SHL ="sldi"; 93 } elsif ($flavour =~ /32/) { 94 $SIZE_T =4; 95 $LRSAVE =$SIZE_T; 96 $STU ="stwu"; 97 $POP ="lwz"; 98 $PUSH ="stw"; 99 $UCMP ="cmplw"; 100 $SHL ="slwi"; 101 } else { die "nonsense $flavour"; } 102 103 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 104 105 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 106 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) 107 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" an 108 die "can't locate ppc-xlate.pl"; 109 110 open STDOUT,"| $^X $xlate $flavour ".shift || 111 112 $FRAME=8*$SIZE_T; 113 $prefix="aes_p10"; 114 115 $sp="r1"; 116 $vrsave="r12"; 117 118 ############################################## 119 {{{ # Key setup procedures 120 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$ 121 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map 122 my ($stage,$outperm,$outmask,$outhead,$outtail 123 124 $code.=<<___; 125 .machine "any" 126 127 .text 128 129 .align 7 130 rcon: 131 .long 0x01000000, 0x01000000, 0x01000000, 0x 132 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x 133 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x 134 .long 0,0,0,0 135 Lconsts: 136 mflr r0 137 bcl 20,31,\$+4 138 mflr $ptr #vvvvv "distance betw 139 addi $ptr,$ptr,-0x48 140 mtlr r0 141 blr 142 .long 0 143 .byte 0,12,0x14,0,0,0,0,0 144 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by < 145 146 .globl .${prefix}_set_encrypt_key 147 Lset_encrypt_key: 148 mflr r11 149 $PUSH r11,$LRSAVE($sp) 150 151 li $ptr,-1 152 ${UCMP}i $inp,0 153 beq- Lenc_key_abort 154 ${UCMP}i $out,0 155 beq- Lenc_key_abort 156 li $ptr,-2 157 cmpwi $bits,128 158 blt- Lenc_key_abort 159 cmpwi $bits,256 160 bgt- Lenc_key_abort 161 andi. r0,$bits,0x3f 162 bne- Lenc_key_abort 163 164 lis r0,0xfff0 165 mfspr $vrsave,256 166 mtspr 256,r0 167 168 bl Lconsts 169 mtlr r11 170 171 neg r9,$inp 172 lvx $in0,0,$inp 173 addi $inp,$inp,15 174 lvsr $key,0,r9 175 li r8,0x20 176 cmpwi $bits,192 177 lvx $in1,0,$inp 178 le?vspltisb $mask,0x0f 179 lvx $rcon,0,$ptr 180 le?vxor $key,$key,$mask 181 lvx $mask,r8,$ptr 182 addi $ptr,$ptr,0x10 183 vperm $in0,$in0,$in1,$key 184 li $cnt,8 185 vxor $zero,$zero,$zero 186 mtctr $cnt 187 188 ?lvsr $outperm,0,$out 189 vspltisb $outmask,-1 190 lvx $outhead,0,$out 191 ?vperm $outmask,$zero,$outmas 192 193 blt Loop128 194 addi $inp,$inp,8 195 beq L192 196 addi $inp,$inp,8 197 b L256 198 199 .align 4 200 Loop128: 201 vperm $key,$in0,$in0,$mask 202 vsldoi $tmp,$zero,$in0,12 203 vperm $outtail,$in0,$in0,$ou 204 vsel $stage,$outhead,$outta 205 vmr $outhead,$outtail 206 vcipherlast $key,$key,$rcon 207 stvx $stage,0,$out 208 addi $out,$out,16 209 210 vxor $in0,$in0,$tmp 211 vsldoi $tmp,$zero,$tmp,12 212 vxor $in0,$in0,$tmp 213 vsldoi $tmp,$zero,$tmp,12 214 vxor $in0,$in0,$tmp 215 vadduwm $rcon,$rcon,$rcon 216 vxor $in0,$in0,$key 217 bdnz Loop128 218 219 lvx $rcon,0,$ptr 220 221 vperm $key,$in0,$in0,$mask 222 vsldoi $tmp,$zero,$in0,12 223 vperm $outtail,$in0,$in0,$ou 224 vsel $stage,$outhead,$outta 225 vmr $outhead,$outtail 226 vcipherlast $key,$key,$rcon 227 stvx $stage,0,$out 228 addi $out,$out,16 229 230 vxor $in0,$in0,$tmp 231 vsldoi $tmp,$zero,$tmp,12 232 vxor $in0,$in0,$tmp 233 vsldoi $tmp,$zero,$tmp,12 234 vxor $in0,$in0,$tmp 235 vadduwm $rcon,$rcon,$rcon 236 vxor $in0,$in0,$key 237 238 vperm $key,$in0,$in0,$mask 239 vsldoi $tmp,$zero,$in0,12 240 vperm $outtail,$in0,$in0,$ou 241 vsel $stage,$outhead,$outta 242 vmr $outhead,$outtail 243 vcipherlast $key,$key,$rcon 244 stvx $stage,0,$out 245 addi $out,$out,16 246 247 vxor $in0,$in0,$tmp 248 vsldoi $tmp,$zero,$tmp,12 249 vxor $in0,$in0,$tmp 250 vsldoi $tmp,$zero,$tmp,12 251 vxor $in0,$in0,$tmp 252 vxor $in0,$in0,$key 253 vperm $outtail,$in0,$in0,$ou 254 vsel $stage,$outhead,$outta 255 vmr $outhead,$outtail 256 stvx $stage,0,$out 257 258 addi $inp,$out,15 259 addi $out,$out,0x50 260 261 li $rounds,10 262 b Ldone 263 264 .align 4 265 L192: 266 lvx $tmp,0,$inp 267 li $cnt,4 268 vperm $outtail,$in0,$in0,$ou 269 vsel $stage,$outhead,$outta 270 vmr $outhead,$outtail 271 stvx $stage,0,$out 272 addi $out,$out,16 273 vperm $in1,$in1,$tmp,$key 274 vspltisb $key,8 275 mtctr $cnt 276 vsububm $mask,$mask,$key 277 278 Loop192: 279 vperm $key,$in1,$in1,$mask 280 vsldoi $tmp,$zero,$in0,12 281 vcipherlast $key,$key,$rcon 282 283 vxor $in0,$in0,$tmp 284 vsldoi $tmp,$zero,$tmp,12 285 vxor $in0,$in0,$tmp 286 vsldoi $tmp,$zero,$tmp,12 287 vxor $in0,$in0,$tmp 288 289 vsldoi $stage,$zero,$in1,8 290 vspltw $tmp,$in0,3 291 vxor $tmp,$tmp,$in1 292 vsldoi $in1,$zero,$in1,12 293 vadduwm $rcon,$rcon,$rcon 294 vxor $in1,$in1,$tmp 295 vxor $in0,$in0,$key 296 vxor $in1,$in1,$key 297 vsldoi $stage,$stage,$in0,8 298 299 vperm $key,$in1,$in1,$mask 300 vsldoi $tmp,$zero,$in0,12 301 vperm $outtail,$stage,$stage 302 vsel $stage,$outhead,$outta 303 vmr $outhead,$outtail 304 vcipherlast $key,$key,$rcon 305 stvx $stage,0,$out 306 addi $out,$out,16 307 308 vsldoi $stage,$in0,$in1,8 309 vxor $in0,$in0,$tmp 310 vsldoi $tmp,$zero,$tmp,12 311 vperm $outtail,$stage,$stage 312 vsel $stage,$outhead,$outta 313 vmr $outhead,$outtail 314 vxor $in0,$in0,$tmp 315 vsldoi $tmp,$zero,$tmp,12 316 vxor $in0,$in0,$tmp 317 stvx $stage,0,$out 318 addi $out,$out,16 319 320 vspltw $tmp,$in0,3 321 vxor $tmp,$tmp,$in1 322 vsldoi $in1,$zero,$in1,12 323 vadduwm $rcon,$rcon,$rcon 324 vxor $in1,$in1,$tmp 325 vxor $in0,$in0,$key 326 vxor $in1,$in1,$key 327 vperm $outtail,$in0,$in0,$ou 328 vsel $stage,$outhead,$outta 329 vmr $outhead,$outtail 330 stvx $stage,0,$out 331 addi $inp,$out,15 332 addi $out,$out,16 333 bdnz Loop192 334 335 li $rounds,12 336 addi $out,$out,0x20 337 b Ldone 338 339 .align 4 340 L256: 341 lvx $tmp,0,$inp 342 li $cnt,7 343 li $rounds,14 344 vperm $outtail,$in0,$in0,$ou 345 vsel $stage,$outhead,$outta 346 vmr $outhead,$outtail 347 stvx $stage,0,$out 348 addi $out,$out,16 349 vperm $in1,$in1,$tmp,$key 350 mtctr $cnt 351 352 Loop256: 353 vperm $key,$in1,$in1,$mask 354 vsldoi $tmp,$zero,$in0,12 355 vperm $outtail,$in1,$in1,$ou 356 vsel $stage,$outhead,$outta 357 vmr $outhead,$outtail 358 vcipherlast $key,$key,$rcon 359 stvx $stage,0,$out 360 addi $out,$out,16 361 362 vxor $in0,$in0,$tmp 363 vsldoi $tmp,$zero,$tmp,12 364 vxor $in0,$in0,$tmp 365 vsldoi $tmp,$zero,$tmp,12 366 vxor $in0,$in0,$tmp 367 vadduwm $rcon,$rcon,$rcon 368 vxor $in0,$in0,$key 369 vperm $outtail,$in0,$in0,$ou 370 vsel $stage,$outhead,$outta 371 vmr $outhead,$outtail 372 stvx $stage,0,$out 373 addi $inp,$out,15 374 addi $out,$out,16 375 bdz Ldone 376 377 vspltw $key,$in0,3 378 vsldoi $tmp,$zero,$in1,12 379 vsbox $key,$key 380 381 vxor $in1,$in1,$tmp 382 vsldoi $tmp,$zero,$tmp,12 383 vxor $in1,$in1,$tmp 384 vsldoi $tmp,$zero,$tmp,12 385 vxor $in1,$in1,$tmp 386 387 vxor $in1,$in1,$key 388 b Loop256 389 390 .align 4 391 Ldone: 392 lvx $in1,0,$inp 393 vsel $in1,$outhead,$in1,$ou 394 stvx $in1,0,$inp 395 li $ptr,0 396 mtspr 256,$vrsave 397 stw $rounds,0($out) 398 399 Lenc_key_abort: 400 mr r3,$ptr 401 blr 402 .long 0 403 .byte 0,12,0x14,1,0,0,3,0 404 .long 0 405 .size .${prefix}_set_encrypt_key,.-.${prefix 406 407 .globl .${prefix}_set_decrypt_key 408 $STU $sp,-$FRAME($sp) 409 mflr r10 410 $PUSH r10,$FRAME+$LRSAVE($sp 411 bl Lset_encrypt_key 412 mtlr r10 413 414 cmpwi r3,0 415 bne- Ldec_key_abort 416 417 slwi $cnt,$rounds,4 418 subi $inp,$out,240 419 srwi $rounds,$rounds,1 420 add $out,$inp,$cnt 421 mtctr $rounds 422 423 Ldeckey: 424 lwz r0, 0($inp) 425 lwz r6, 4($inp) 426 lwz r7, 8($inp) 427 lwz r8, 12($inp) 428 addi $inp,$inp,16 429 lwz r9, 0($out) 430 lwz r10,4($out) 431 lwz r11,8($out) 432 lwz r12,12($out) 433 stw r0, 0($out) 434 stw r6, 4($out) 435 stw r7, 8($out) 436 stw r8, 12($out) 437 subi $out,$out,16 438 stw r9, -16($inp) 439 stw r10,-12($inp) 440 stw r11,-8($inp) 441 stw r12,-4($inp) 442 bdnz Ldeckey 443 444 xor r3,r3,r3 445 Ldec_key_abort: 446 addi $sp,$sp,$FRAME 447 blr 448 .long 0 449 .byte 0,12,4,1,0x80,0,3,0 450 .long 0 451 .size .${prefix}_set_decrypt_key,.-.${prefix 452 ___ 453 }}} 454 ############################################## 455 {{{ # Single block en- and decrypt procedu 456 sub gen_block () { 457 my $dir = shift; 458 my $n = $dir eq "de" ? "n" : ""; 459 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3. 460 461 $code.=<<___; 462 .globl .${prefix}_${dir}crypt 463 lwz $rounds,240($key) 464 lis r0,0xfc00 465 mfspr $vrsave,256 466 li $idx,15 467 mtspr 256,r0 468 469 lvx v0,0,$inp 470 neg r11,$out 471 lvx v1,$idx,$inp 472 lvsl v2,0,$inp 473 le?vspltisb v4,0x0f 474 ?lvsl v3,0,r11 475 le?vxor v2,v2,v4 476 li $idx,16 477 vperm v0,v0,v1,v2 478 lvx v1,0,$key 479 ?lvsl v5,0,$key 480 srwi $rounds,$rounds,1 481 lvx v2,$idx,$key 482 addi $idx,$idx,16 483 subi $rounds,$rounds,1 484 ?vperm v1,v1,v2,v5 485 486 vxor v0,v0,v1 487 lvx v1,$idx,$key 488 addi $idx,$idx,16 489 mtctr $rounds 490 491 Loop_${dir}c: 492 ?vperm v2,v2,v1,v5 493 v${n}cipher v0,v0,v2 494 lvx v2,$idx,$key 495 addi $idx,$idx,16 496 ?vperm v1,v1,v2,v5 497 v${n}cipher v0,v0,v1 498 lvx v1,$idx,$key 499 addi $idx,$idx,16 500 bdnz Loop_${dir}c 501 502 ?vperm v2,v2,v1,v5 503 v${n}cipher v0,v0,v2 504 lvx v2,$idx,$key 505 ?vperm v1,v1,v2,v5 506 v${n}cipherlast v0,v0,v1 507 508 vspltisb v2,-1 509 vxor v1,v1,v1 510 li $idx,15 511 ?vperm v2,v1,v2,v3 512 le?vxor v3,v3,v4 513 lvx v1,0,$out 514 vperm v0,v0,v0,v3 515 vsel v1,v1,v0,v2 516 lvx v4,$idx,$out 517 stvx v1,0,$out 518 vsel v0,v0,v4,v2 519 stvx v0,$idx,$out 520 521 mtspr 256,$vrsave 522 blr 523 .long 0 524 .byte 0,12,0x14,0,0,0,3,0 525 .long 0 526 .size .${prefix}_${dir}crypt,.-.${prefix}_${ 527 ___ 528 } 529 &gen_block("en"); 530 &gen_block("de"); 531 }}} 532 533 my $consts=1; 534 foreach(split("\n",$code)) { 535 s/\`([^\`]*)\`/eval($1)/geo; 536 537 # constants table endian-specific conv 538 if ($consts && m/\.(long|byte)\s+(.+)\ 539 my $conv=$3; 540 my @bytes=(); 541 542 # convert to endian-agnostic forma 543 if ($1 eq "long") { 544 foreach (split(/,\s*/,$2)) { 545 my $l = /^0/?oct:int; 546 push @bytes,($l>>24)&0xff,($l> 547 } 548 } else { 549 @bytes = map(/^0/?oct:int,spli 550 } 551 552 # little-endian conversion 553 if ($flavour =~ /le$/o) { 554 SWITCH: for($conv) { 555 /\?inv/ && do { @bytes=m 556 /\?rev/ && do { @bytes=r 557 } 558 } 559 560 #emit 561 print ".byte\t",join(',',map (spri 562 next; 563 } 564 $consts=0 if (m/Lconsts:/o); # end 565 566 # instructions prefixed with '?' are e 567 # to be adjusted accordingly... 568 if ($flavour =~ /le$/o) { # litt 569 s/le\?//o or 570 s/be\?/#be#/o or 571 s/\?lvsr/lvsl/o or 572 s/\?lvsl/lvsr/o or 573 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+, 574 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+ 575 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+ 576 } else { # big- 577 s/le\?/#le#/o or 578 s/be\?//o or 579 s/\?([a-z]+)/$1/o; 580 } 581 582 print $_,"\n"; 583 } 584 585 close STDOUT;
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.