1 #! /usr/bin/env perl 2 # SPDX-License-Identifier: GPL-2.0 3 4 # This code is taken from CRYPTOGAMs[1] and is included here using the option 5 # in the license to distribute the code under the GPL. Therefore this program 6 # is free software; you can redistribute it and/or modify it under the terms of 7 # the GNU General Public License version 2 as published by the Free Software 8 # Foundation. 9 # 10 # [1] https://www.openssl.org/~appro/cryptogams/ 11 12 # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org> 13 # All rights reserved. 14 # 15 # Redistribution and use in source and binary forms, with or without 16 # modification, are permitted provided that the following conditions 17 # are met: 18 # 19 # * Redistributions of source code must retain copyright notices, 20 # this list of conditions and the following disclaimer. 21 # 22 # * Redistributions in binary form must reproduce the above 23 # copyright notice, this list of conditions and the following 24 # disclaimer in the documentation and/or other materials 25 # provided with the distribution. 26 # 27 # * Neither the name of the CRYPTOGAMS nor the names of its 28 # copyright holder and contributors may be used to endorse or 29 # promote products derived from this software without specific 30 # prior written permission. 31 # 32 # ALTERNATIVELY, provided that this notice is retained in full, this 33 # product may be distributed under the terms of the GNU General Public 34 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF 35 # those given above. 36 # 37 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 38 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 40 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 41 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 43 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 45 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 46 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 47 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 49 # ==================================================================== 50 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 51 # project. The module is, however, dual licensed under OpenSSL and 52 # CRYPTOGAMS licenses depending on where you obtain it. For further 53 # details see https://www.openssl.org/~appro/cryptogams/. 54 # ==================================================================== 55 # 56 # This module implements support for AES instructions as per PowerISA 57 # specification version 2.07, first implemented by POWER8 processor. 58 # The module is endian-agnostic in sense that it supports both big- 59 # and little-endian cases. Data alignment in parallelizable modes is 60 # handled with VSX loads and stores, which implies MSR.VSX flag being 61 # set. It should also be noted that ISA specification doesn't prohibit 62 # alignment exceptions for these instructions on page boundaries. 63 # Initially alignment was handled in pure AltiVec/VMX way [when data 64 # is aligned programmatically, which in turn guarantees exception- 65 # free execution], but it turned to hamper performance when vcipher 66 # instructions are interleaved. It's reckoned that eventual 67 # misalignment penalties at page boundaries are in average lower 68 # than additional overhead in pure AltiVec approach. 69 # 70 # May 2016 71 # 72 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian 73 # systems were measured. 74 # 75 ###################################################################### 76 # Current large-block performance in cycles per byte processed with 77 # 128-bit key (less is better). 78 # 79 # CBC en-/decrypt CTR XTS 80 # POWER8[le] 3.96/0.72 0.74 1.1 81 # POWER8[be] 3.75/0.65 0.66 1.0 82 83 $flavour = shift; 84 85 if ($flavour =~ /64/) { 86 $SIZE_T =8; 87 $LRSAVE =2*$SIZE_T; 88 $STU ="stdu"; 89 $POP ="ld"; 90 $PUSH ="std"; 91 $UCMP ="cmpld"; 92 $SHL ="sldi"; 93 } elsif ($flavour =~ /32/) { 94 $SIZE_T =4; 95 $LRSAVE =$SIZE_T; 96 $STU ="stwu"; 97 $POP ="lwz"; 98 $PUSH ="stw"; 99 $UCMP ="cmplw"; 100 $SHL ="slwi"; 101 } else { die "nonsense $flavour"; } 102 103 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 104 105 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 106 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 107 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 108 die "can't locate ppc-xlate.pl"; 109 110 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 111 112 $FRAME=8*$SIZE_T; 113 $prefix="aes_p8"; 114 115 $sp="r1"; 116 $vrsave="r12"; 117 118 ######################################################################### 119 {{{ # Key setup procedures # 120 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); 121 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); 122 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); 123 124 $code.=<<___; 125 .machine "any" 126 127 .text 128 129 .align 7 130 rcon: 131 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev 132 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev 133 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev 134 .long 0,0,0,0 ?asis 135 .long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe 136 Lconsts: 137 mflr r0 138 bcl 20,31,\$+4 139 mflr $ptr #vvvvv "distance between . and rcon 140 addi $ptr,$ptr,-0x58 141 mtlr r0 142 blr 143 .long 0 144 .byte 0,12,0x14,0,0,0,0,0 145 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 146 147 .globl .${prefix}_set_encrypt_key 148 Lset_encrypt_key: 149 mflr r11 150 $PUSH r11,$LRSAVE($sp) 151 152 li $ptr,-1 153 ${UCMP}i $inp,0 154 beq- Lenc_key_abort # if ($inp==0) return -1; 155 ${UCMP}i $out,0 156 beq- Lenc_key_abort # if ($out==0) return -1; 157 li $ptr,-2 158 cmpwi $bits,128 159 blt- Lenc_key_abort 160 cmpwi $bits,256 161 bgt- Lenc_key_abort 162 andi. r0,$bits,0x3f 163 bne- Lenc_key_abort 164 165 lis r0,0xfff0 166 mfspr $vrsave,256 167 mtspr 256,r0 168 169 bl Lconsts 170 mtlr r11 171 172 neg r9,$inp 173 lvx $in0,0,$inp 174 addi $inp,$inp,15 # 15 is not typo 175 lvsr $key,0,r9 # borrow $key 176 li r8,0x20 177 cmpwi $bits,192 178 lvx $in1,0,$inp 179 le?vspltisb $mask,0x0f # borrow $mask 180 lvx $rcon,0,$ptr 181 le?vxor $key,$key,$mask # adjust for byte swap 182 lvx $mask,r8,$ptr 183 addi $ptr,$ptr,0x10 184 vperm $in0,$in0,$in1,$key # align [and byte swap in LE] 185 li $cnt,8 186 vxor $zero,$zero,$zero 187 mtctr $cnt 188 189 ?lvsr $outperm,0,$out 190 vspltisb $outmask,-1 191 lvx $outhead,0,$out 192 ?vperm $outmask,$zero,$outmask,$outperm 193 194 blt Loop128 195 addi $inp,$inp,8 196 beq L192 197 addi $inp,$inp,8 198 b L256 199 200 .align 4 201 Loop128: 202 vperm $key,$in0,$in0,$mask # rotate-n-splat 203 vsldoi $tmp,$zero,$in0,12 # >>32 204 vperm $outtail,$in0,$in0,$outperm # rotate 205 vsel $stage,$outhead,$outtail,$outmask 206 vmr $outhead,$outtail 207 vcipherlast $key,$key,$rcon 208 stvx $stage,0,$out 209 addi $out,$out,16 210 211 vxor $in0,$in0,$tmp 212 vsldoi $tmp,$zero,$tmp,12 # >>32 213 vxor $in0,$in0,$tmp 214 vsldoi $tmp,$zero,$tmp,12 # >>32 215 vxor $in0,$in0,$tmp 216 vadduwm $rcon,$rcon,$rcon 217 vxor $in0,$in0,$key 218 bdnz Loop128 219 220 lvx $rcon,0,$ptr # last two round keys 221 222 vperm $key,$in0,$in0,$mask # rotate-n-splat 223 vsldoi $tmp,$zero,$in0,12 # >>32 224 vperm $outtail,$in0,$in0,$outperm # rotate 225 vsel $stage,$outhead,$outtail,$outmask 226 vmr $outhead,$outtail 227 vcipherlast $key,$key,$rcon 228 stvx $stage,0,$out 229 addi $out,$out,16 230 231 vxor $in0,$in0,$tmp 232 vsldoi $tmp,$zero,$tmp,12 # >>32 233 vxor $in0,$in0,$tmp 234 vsldoi $tmp,$zero,$tmp,12 # >>32 235 vxor $in0,$in0,$tmp 236 vadduwm $rcon,$rcon,$rcon 237 vxor $in0,$in0,$key 238 239 vperm $key,$in0,$in0,$mask # rotate-n-splat 240 vsldoi $tmp,$zero,$in0,12 # >>32 241 vperm $outtail,$in0,$in0,$outperm # rotate 242 vsel $stage,$outhead,$outtail,$outmask 243 vmr $outhead,$outtail 244 vcipherlast $key,$key,$rcon 245 stvx $stage,0,$out 246 addi $out,$out,16 247 248 vxor $in0,$in0,$tmp 249 vsldoi $tmp,$zero,$tmp,12 # >>32 250 vxor $in0,$in0,$tmp 251 vsldoi $tmp,$zero,$tmp,12 # >>32 252 vxor $in0,$in0,$tmp 253 vxor $in0,$in0,$key 254 vperm $outtail,$in0,$in0,$outperm # rotate 255 vsel $stage,$outhead,$outtail,$outmask 256 vmr $outhead,$outtail 257 stvx $stage,0,$out 258 259 addi $inp,$out,15 # 15 is not typo 260 addi $out,$out,0x50 261 262 li $rounds,10 263 b Ldone 264 265 .align 4 266 L192: 267 lvx $tmp,0,$inp 268 li $cnt,4 269 vperm $outtail,$in0,$in0,$outperm # rotate 270 vsel $stage,$outhead,$outtail,$outmask 271 vmr $outhead,$outtail 272 stvx $stage,0,$out 273 addi $out,$out,16 274 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 275 vspltisb $key,8 # borrow $key 276 mtctr $cnt 277 vsububm $mask,$mask,$key # adjust the mask 278 279 Loop192: 280 vperm $key,$in1,$in1,$mask # roate-n-splat 281 vsldoi $tmp,$zero,$in0,12 # >>32 282 vcipherlast $key,$key,$rcon 283 284 vxor $in0,$in0,$tmp 285 vsldoi $tmp,$zero,$tmp,12 # >>32 286 vxor $in0,$in0,$tmp 287 vsldoi $tmp,$zero,$tmp,12 # >>32 288 vxor $in0,$in0,$tmp 289 290 vsldoi $stage,$zero,$in1,8 291 vspltw $tmp,$in0,3 292 vxor $tmp,$tmp,$in1 293 vsldoi $in1,$zero,$in1,12 # >>32 294 vadduwm $rcon,$rcon,$rcon 295 vxor $in1,$in1,$tmp 296 vxor $in0,$in0,$key 297 vxor $in1,$in1,$key 298 vsldoi $stage,$stage,$in0,8 299 300 vperm $key,$in1,$in1,$mask # rotate-n-splat 301 vsldoi $tmp,$zero,$in0,12 # >>32 302 vperm $outtail,$stage,$stage,$outperm # rotate 303 vsel $stage,$outhead,$outtail,$outmask 304 vmr $outhead,$outtail 305 vcipherlast $key,$key,$rcon 306 stvx $stage,0,$out 307 addi $out,$out,16 308 309 vsldoi $stage,$in0,$in1,8 310 vxor $in0,$in0,$tmp 311 vsldoi $tmp,$zero,$tmp,12 # >>32 312 vperm $outtail,$stage,$stage,$outperm # rotate 313 vsel $stage,$outhead,$outtail,$outmask 314 vmr $outhead,$outtail 315 vxor $in0,$in0,$tmp 316 vsldoi $tmp,$zero,$tmp,12 # >>32 317 vxor $in0,$in0,$tmp 318 stvx $stage,0,$out 319 addi $out,$out,16 320 321 vspltw $tmp,$in0,3 322 vxor $tmp,$tmp,$in1 323 vsldoi $in1,$zero,$in1,12 # >>32 324 vadduwm $rcon,$rcon,$rcon 325 vxor $in1,$in1,$tmp 326 vxor $in0,$in0,$key 327 vxor $in1,$in1,$key 328 vperm $outtail,$in0,$in0,$outperm # rotate 329 vsel $stage,$outhead,$outtail,$outmask 330 vmr $outhead,$outtail 331 stvx $stage,0,$out 332 addi $inp,$out,15 # 15 is not typo 333 addi $out,$out,16 334 bdnz Loop192 335 336 li $rounds,12 337 addi $out,$out,0x20 338 b Ldone 339 340 .align 4 341 L256: 342 lvx $tmp,0,$inp 343 li $cnt,7 344 li $rounds,14 345 vperm $outtail,$in0,$in0,$outperm # rotate 346 vsel $stage,$outhead,$outtail,$outmask 347 vmr $outhead,$outtail 348 stvx $stage,0,$out 349 addi $out,$out,16 350 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 351 mtctr $cnt 352 353 Loop256: 354 vperm $key,$in1,$in1,$mask # rotate-n-splat 355 vsldoi $tmp,$zero,$in0,12 # >>32 356 vperm $outtail,$in1,$in1,$outperm # rotate 357 vsel $stage,$outhead,$outtail,$outmask 358 vmr $outhead,$outtail 359 vcipherlast $key,$key,$rcon 360 stvx $stage,0,$out 361 addi $out,$out,16 362 363 vxor $in0,$in0,$tmp 364 vsldoi $tmp,$zero,$tmp,12 # >>32 365 vxor $in0,$in0,$tmp 366 vsldoi $tmp,$zero,$tmp,12 # >>32 367 vxor $in0,$in0,$tmp 368 vadduwm $rcon,$rcon,$rcon 369 vxor $in0,$in0,$key 370 vperm $outtail,$in0,$in0,$outperm # rotate 371 vsel $stage,$outhead,$outtail,$outmask 372 vmr $outhead,$outtail 373 stvx $stage,0,$out 374 addi $inp,$out,15 # 15 is not typo 375 addi $out,$out,16 376 bdz Ldone 377 378 vspltw $key,$in0,3 # just splat 379 vsldoi $tmp,$zero,$in1,12 # >>32 380 vsbox $key,$key 381 382 vxor $in1,$in1,$tmp 383 vsldoi $tmp,$zero,$tmp,12 # >>32 384 vxor $in1,$in1,$tmp 385 vsldoi $tmp,$zero,$tmp,12 # >>32 386 vxor $in1,$in1,$tmp 387 388 vxor $in1,$in1,$key 389 b Loop256 390 391 .align 4 392 Ldone: 393 lvx $in1,0,$inp # redundant in aligned case 394 vsel $in1,$outhead,$in1,$outmask 395 stvx $in1,0,$inp 396 li $ptr,0 397 mtspr 256,$vrsave 398 stw $rounds,0($out) 399 400 Lenc_key_abort: 401 mr r3,$ptr 402 blr 403 .long 0 404 .byte 0,12,0x14,1,0,0,3,0 405 .long 0 406 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key 407 408 .globl .${prefix}_set_decrypt_key 409 $STU $sp,-$FRAME($sp) 410 mflr r10 411 $PUSH r10,$FRAME+$LRSAVE($sp) 412 bl Lset_encrypt_key 413 mtlr r10 414 415 cmpwi r3,0 416 bne- Ldec_key_abort 417 418 slwi $cnt,$rounds,4 419 subi $inp,$out,240 # first round key 420 srwi $rounds,$rounds,1 421 add $out,$inp,$cnt # last round key 422 mtctr $rounds 423 424 Ldeckey: 425 lwz r0, 0($inp) 426 lwz r6, 4($inp) 427 lwz r7, 8($inp) 428 lwz r8, 12($inp) 429 addi $inp,$inp,16 430 lwz r9, 0($out) 431 lwz r10,4($out) 432 lwz r11,8($out) 433 lwz r12,12($out) 434 stw r0, 0($out) 435 stw r6, 4($out) 436 stw r7, 8($out) 437 stw r8, 12($out) 438 subi $out,$out,16 439 stw r9, -16($inp) 440 stw r10,-12($inp) 441 stw r11,-8($inp) 442 stw r12,-4($inp) 443 bdnz Ldeckey 444 445 xor r3,r3,r3 # return value 446 Ldec_key_abort: 447 addi $sp,$sp,$FRAME 448 blr 449 .long 0 450 .byte 0,12,4,1,0x80,0,3,0 451 .long 0 452 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key 453 ___ 454 }}} 455 ######################################################################### 456 {{{ # Single block en- and decrypt procedures # 457 sub gen_block () { 458 my $dir = shift; 459 my $n = $dir eq "de" ? "n" : ""; 460 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); 461 462 $code.=<<___; 463 .globl .${prefix}_${dir}crypt 464 lwz $rounds,240($key) 465 lis r0,0xfc00 466 mfspr $vrsave,256 467 li $idx,15 # 15 is not typo 468 mtspr 256,r0 469 470 lvx v0,0,$inp 471 neg r11,$out 472 lvx v1,$idx,$inp 473 lvsl v2,0,$inp # inpperm 474 le?vspltisb v4,0x0f 475 ?lvsl v3,0,r11 # outperm 476 le?vxor v2,v2,v4 477 li $idx,16 478 vperm v0,v0,v1,v2 # align [and byte swap in LE] 479 lvx v1,0,$key 480 ?lvsl v5,0,$key # keyperm 481 srwi $rounds,$rounds,1 482 lvx v2,$idx,$key 483 addi $idx,$idx,16 484 subi $rounds,$rounds,1 485 ?vperm v1,v1,v2,v5 # align round key 486 487 vxor v0,v0,v1 488 lvx v1,$idx,$key 489 addi $idx,$idx,16 490 mtctr $rounds 491 492 Loop_${dir}c: 493 ?vperm v2,v2,v1,v5 494 v${n}cipher v0,v0,v2 495 lvx v2,$idx,$key 496 addi $idx,$idx,16 497 ?vperm v1,v1,v2,v5 498 v${n}cipher v0,v0,v1 499 lvx v1,$idx,$key 500 addi $idx,$idx,16 501 bdnz Loop_${dir}c 502 503 ?vperm v2,v2,v1,v5 504 v${n}cipher v0,v0,v2 505 lvx v2,$idx,$key 506 ?vperm v1,v1,v2,v5 507 v${n}cipherlast v0,v0,v1 508 509 vspltisb v2,-1 510 vxor v1,v1,v1 511 li $idx,15 # 15 is not typo 512 ?vperm v2,v1,v2,v3 # outmask 513 le?vxor v3,v3,v4 514 lvx v1,0,$out # outhead 515 vperm v0,v0,v0,v3 # rotate [and byte swap in LE] 516 vsel v1,v1,v0,v2 517 lvx v4,$idx,$out 518 stvx v1,0,$out 519 vsel v0,v0,v4,v2 520 stvx v0,$idx,$out 521 522 mtspr 256,$vrsave 523 blr 524 .long 0 525 .byte 0,12,0x14,0,0,0,3,0 526 .long 0 527 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt 528 ___ 529 } 530 &gen_block("en"); 531 &gen_block("de"); 532 }}} 533 ######################################################################### 534 {{{ # CBC en- and decrypt procedures # 535 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); 536 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 537 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= 538 map("v$_",(4..10)); 539 $code.=<<___; 540 .globl .${prefix}_cbc_encrypt 541 ${UCMP}i $len,16 542 bltlr- 543 544 cmpwi $enc,0 # test direction 545 lis r0,0xffe0 546 mfspr $vrsave,256 547 mtspr 256,r0 548 549 li $idx,15 550 vxor $rndkey0,$rndkey0,$rndkey0 551 le?vspltisb $tmp,0x0f 552 553 lvx $ivec,0,$ivp # load [unaligned] iv 554 lvsl $inpperm,0,$ivp 555 lvx $inptail,$idx,$ivp 556 le?vxor $inpperm,$inpperm,$tmp 557 vperm $ivec,$ivec,$inptail,$inpperm 558 559 neg r11,$inp 560 ?lvsl $keyperm,0,$key # prepare for unaligned key 561 lwz $rounds,240($key) 562 563 lvsr $inpperm,0,r11 # prepare for unaligned load 564 lvx $inptail,0,$inp 565 addi $inp,$inp,15 # 15 is not typo 566 le?vxor $inpperm,$inpperm,$tmp 567 568 ?lvsr $outperm,0,$out # prepare for unaligned store 569 vspltisb $outmask,-1 570 lvx $outhead,0,$out 571 ?vperm $outmask,$rndkey0,$outmask,$outperm 572 le?vxor $outperm,$outperm,$tmp 573 574 srwi $rounds,$rounds,1 575 li $idx,16 576 subi $rounds,$rounds,1 577 beq Lcbc_dec 578 579 Lcbc_enc: 580 vmr $inout,$inptail 581 lvx $inptail,0,$inp 582 addi $inp,$inp,16 583 mtctr $rounds 584 subi $len,$len,16 # len-=16 585 586 lvx $rndkey0,0,$key 587 vperm $inout,$inout,$inptail,$inpperm 588 lvx $rndkey1,$idx,$key 589 addi $idx,$idx,16 590 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 591 vxor $inout,$inout,$rndkey0 592 lvx $rndkey0,$idx,$key 593 addi $idx,$idx,16 594 vxor $inout,$inout,$ivec 595 596 Loop_cbc_enc: 597 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 598 vcipher $inout,$inout,$rndkey1 599 lvx $rndkey1,$idx,$key 600 addi $idx,$idx,16 601 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 602 vcipher $inout,$inout,$rndkey0 603 lvx $rndkey0,$idx,$key 604 addi $idx,$idx,16 605 bdnz Loop_cbc_enc 606 607 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 608 vcipher $inout,$inout,$rndkey1 609 lvx $rndkey1,$idx,$key 610 li $idx,16 611 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 612 vcipherlast $ivec,$inout,$rndkey0 613 ${UCMP}i $len,16 614 615 vperm $tmp,$ivec,$ivec,$outperm 616 vsel $inout,$outhead,$tmp,$outmask 617 vmr $outhead,$tmp 618 stvx $inout,0,$out 619 addi $out,$out,16 620 bge Lcbc_enc 621 622 b Lcbc_done 623 624 .align 4 625 Lcbc_dec: 626 ${UCMP}i $len,128 627 bge _aesp8_cbc_decrypt8x 628 vmr $tmp,$inptail 629 lvx $inptail,0,$inp 630 addi $inp,$inp,16 631 mtctr $rounds 632 subi $len,$len,16 # len-=16 633 634 lvx $rndkey0,0,$key 635 vperm $tmp,$tmp,$inptail,$inpperm 636 lvx $rndkey1,$idx,$key 637 addi $idx,$idx,16 638 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 639 vxor $inout,$tmp,$rndkey0 640 lvx $rndkey0,$idx,$key 641 addi $idx,$idx,16 642 643 Loop_cbc_dec: 644 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 645 vncipher $inout,$inout,$rndkey1 646 lvx $rndkey1,$idx,$key 647 addi $idx,$idx,16 648 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 649 vncipher $inout,$inout,$rndkey0 650 lvx $rndkey0,$idx,$key 651 addi $idx,$idx,16 652 bdnz Loop_cbc_dec 653 654 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 655 vncipher $inout,$inout,$rndkey1 656 lvx $rndkey1,$idx,$key 657 li $idx,16 658 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 659 vncipherlast $inout,$inout,$rndkey0 660 ${UCMP}i $len,16 661 662 vxor $inout,$inout,$ivec 663 vmr $ivec,$tmp 664 vperm $tmp,$inout,$inout,$outperm 665 vsel $inout,$outhead,$tmp,$outmask 666 vmr $outhead,$tmp 667 stvx $inout,0,$out 668 addi $out,$out,16 669 bge Lcbc_dec 670 671 Lcbc_done: 672 addi $out,$out,-1 673 lvx $inout,0,$out # redundant in aligned case 674 vsel $inout,$outhead,$inout,$outmask 675 stvx $inout,0,$out 676 677 neg $enc,$ivp # write [unaligned] iv 678 li $idx,15 # 15 is not typo 679 vxor $rndkey0,$rndkey0,$rndkey0 680 vspltisb $outmask,-1 681 le?vspltisb $tmp,0x0f 682 ?lvsl $outperm,0,$enc 683 ?vperm $outmask,$rndkey0,$outmask,$outperm 684 le?vxor $outperm,$outperm,$tmp 685 lvx $outhead,0,$ivp 686 vperm $ivec,$ivec,$ivec,$outperm 687 vsel $inout,$outhead,$ivec,$outmask 688 lvx $inptail,$idx,$ivp 689 stvx $inout,0,$ivp 690 vsel $inout,$ivec,$inptail,$outmask 691 stvx $inout,$idx,$ivp 692 693 mtspr 256,$vrsave 694 blr 695 .long 0 696 .byte 0,12,0x14,0,0,0,6,0 697 .long 0 698 ___ 699 ######################################################################### 700 {{ # Optimized CBC decrypt procedure # 701 my $key_="r11"; 702 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 703 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); 704 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); 705 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 706 # v26-v31 last 6 round keys 707 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 708 709 $code.=<<___; 710 .align 5 711 _aesp8_cbc_decrypt8x: 712 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 713 li r10,`$FRAME+8*16+15` 714 li r11,`$FRAME+8*16+31` 715 stvx v20,r10,$sp # ABI says so 716 addi r10,r10,32 717 stvx v21,r11,$sp 718 addi r11,r11,32 719 stvx v22,r10,$sp 720 addi r10,r10,32 721 stvx v23,r11,$sp 722 addi r11,r11,32 723 stvx v24,r10,$sp 724 addi r10,r10,32 725 stvx v25,r11,$sp 726 addi r11,r11,32 727 stvx v26,r10,$sp 728 addi r10,r10,32 729 stvx v27,r11,$sp 730 addi r11,r11,32 731 stvx v28,r10,$sp 732 addi r10,r10,32 733 stvx v29,r11,$sp 734 addi r11,r11,32 735 stvx v30,r10,$sp 736 stvx v31,r11,$sp 737 li r0,-1 738 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 739 li $x10,0x10 740 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 741 li $x20,0x20 742 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 743 li $x30,0x30 744 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 745 li $x40,0x40 746 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 747 li $x50,0x50 748 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 749 li $x60,0x60 750 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 751 li $x70,0x70 752 mtspr 256,r0 753 754 subi $rounds,$rounds,3 # -4 in total 755 subi $len,$len,128 # bias 756 757 lvx $rndkey0,$x00,$key # load key schedule 758 lvx v30,$x10,$key 759 addi $key,$key,0x20 760 lvx v31,$x00,$key 761 ?vperm $rndkey0,$rndkey0,v30,$keyperm 762 addi $key_,$sp,$FRAME+15 763 mtctr $rounds 764 765 Load_cbc_dec_key: 766 ?vperm v24,v30,v31,$keyperm 767 lvx v30,$x10,$key 768 addi $key,$key,0x20 769 stvx v24,$x00,$key_ # off-load round[1] 770 ?vperm v25,v31,v30,$keyperm 771 lvx v31,$x00,$key 772 stvx v25,$x10,$key_ # off-load round[2] 773 addi $key_,$key_,0x20 774 bdnz Load_cbc_dec_key 775 776 lvx v26,$x10,$key 777 ?vperm v24,v30,v31,$keyperm 778 lvx v27,$x20,$key 779 stvx v24,$x00,$key_ # off-load round[3] 780 ?vperm v25,v31,v26,$keyperm 781 lvx v28,$x30,$key 782 stvx v25,$x10,$key_ # off-load round[4] 783 addi $key_,$sp,$FRAME+15 # rewind $key_ 784 ?vperm v26,v26,v27,$keyperm 785 lvx v29,$x40,$key 786 ?vperm v27,v27,v28,$keyperm 787 lvx v30,$x50,$key 788 ?vperm v28,v28,v29,$keyperm 789 lvx v31,$x60,$key 790 ?vperm v29,v29,v30,$keyperm 791 lvx $out0,$x70,$key # borrow $out0 792 ?vperm v30,v30,v31,$keyperm 793 lvx v24,$x00,$key_ # pre-load round[1] 794 ?vperm v31,v31,$out0,$keyperm 795 lvx v25,$x10,$key_ # pre-load round[2] 796 797 #lvx $inptail,0,$inp # "caller" already did this 798 #addi $inp,$inp,15 # 15 is not typo 799 subi $inp,$inp,15 # undo "caller" 800 801 le?li $idx,8 802 lvx_u $in0,$x00,$inp # load first 8 "words" 803 le?lvsl $inpperm,0,$idx 804 le?vspltisb $tmp,0x0f 805 lvx_u $in1,$x10,$inp 806 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 807 lvx_u $in2,$x20,$inp 808 le?vperm $in0,$in0,$in0,$inpperm 809 lvx_u $in3,$x30,$inp 810 le?vperm $in1,$in1,$in1,$inpperm 811 lvx_u $in4,$x40,$inp 812 le?vperm $in2,$in2,$in2,$inpperm 813 vxor $out0,$in0,$rndkey0 814 lvx_u $in5,$x50,$inp 815 le?vperm $in3,$in3,$in3,$inpperm 816 vxor $out1,$in1,$rndkey0 817 lvx_u $in6,$x60,$inp 818 le?vperm $in4,$in4,$in4,$inpperm 819 vxor $out2,$in2,$rndkey0 820 lvx_u $in7,$x70,$inp 821 addi $inp,$inp,0x80 822 le?vperm $in5,$in5,$in5,$inpperm 823 vxor $out3,$in3,$rndkey0 824 le?vperm $in6,$in6,$in6,$inpperm 825 vxor $out4,$in4,$rndkey0 826 le?vperm $in7,$in7,$in7,$inpperm 827 vxor $out5,$in5,$rndkey0 828 vxor $out6,$in6,$rndkey0 829 vxor $out7,$in7,$rndkey0 830 831 mtctr $rounds 832 b Loop_cbc_dec8x 833 .align 5 834 Loop_cbc_dec8x: 835 vncipher $out0,$out0,v24 836 vncipher $out1,$out1,v24 837 vncipher $out2,$out2,v24 838 vncipher $out3,$out3,v24 839 vncipher $out4,$out4,v24 840 vncipher $out5,$out5,v24 841 vncipher $out6,$out6,v24 842 vncipher $out7,$out7,v24 843 lvx v24,$x20,$key_ # round[3] 844 addi $key_,$key_,0x20 845 846 vncipher $out0,$out0,v25 847 vncipher $out1,$out1,v25 848 vncipher $out2,$out2,v25 849 vncipher $out3,$out3,v25 850 vncipher $out4,$out4,v25 851 vncipher $out5,$out5,v25 852 vncipher $out6,$out6,v25 853 vncipher $out7,$out7,v25 854 lvx v25,$x10,$key_ # round[4] 855 bdnz Loop_cbc_dec8x 856 857 subic $len,$len,128 # $len-=128 858 vncipher $out0,$out0,v24 859 vncipher $out1,$out1,v24 860 vncipher $out2,$out2,v24 861 vncipher $out3,$out3,v24 862 vncipher $out4,$out4,v24 863 vncipher $out5,$out5,v24 864 vncipher $out6,$out6,v24 865 vncipher $out7,$out7,v24 866 867 subfe. r0,r0,r0 # borrow?-1:0 868 vncipher $out0,$out0,v25 869 vncipher $out1,$out1,v25 870 vncipher $out2,$out2,v25 871 vncipher $out3,$out3,v25 872 vncipher $out4,$out4,v25 873 vncipher $out5,$out5,v25 874 vncipher $out6,$out6,v25 875 vncipher $out7,$out7,v25 876 877 and r0,r0,$len 878 vncipher $out0,$out0,v26 879 vncipher $out1,$out1,v26 880 vncipher $out2,$out2,v26 881 vncipher $out3,$out3,v26 882 vncipher $out4,$out4,v26 883 vncipher $out5,$out5,v26 884 vncipher $out6,$out6,v26 885 vncipher $out7,$out7,v26 886 887 add $inp,$inp,r0 # $inp is adjusted in such 888 # way that at exit from the 889 # loop inX-in7 are loaded 890 # with last "words" 891 vncipher $out0,$out0,v27 892 vncipher $out1,$out1,v27 893 vncipher $out2,$out2,v27 894 vncipher $out3,$out3,v27 895 vncipher $out4,$out4,v27 896 vncipher $out5,$out5,v27 897 vncipher $out6,$out6,v27 898 vncipher $out7,$out7,v27 899 900 addi $key_,$sp,$FRAME+15 # rewind $key_ 901 vncipher $out0,$out0,v28 902 vncipher $out1,$out1,v28 903 vncipher $out2,$out2,v28 904 vncipher $out3,$out3,v28 905 vncipher $out4,$out4,v28 906 vncipher $out5,$out5,v28 907 vncipher $out6,$out6,v28 908 vncipher $out7,$out7,v28 909 lvx v24,$x00,$key_ # re-pre-load round[1] 910 911 vncipher $out0,$out0,v29 912 vncipher $out1,$out1,v29 913 vncipher $out2,$out2,v29 914 vncipher $out3,$out3,v29 915 vncipher $out4,$out4,v29 916 vncipher $out5,$out5,v29 917 vncipher $out6,$out6,v29 918 vncipher $out7,$out7,v29 919 lvx v25,$x10,$key_ # re-pre-load round[2] 920 921 vncipher $out0,$out0,v30 922 vxor $ivec,$ivec,v31 # xor with last round key 923 vncipher $out1,$out1,v30 924 vxor $in0,$in0,v31 925 vncipher $out2,$out2,v30 926 vxor $in1,$in1,v31 927 vncipher $out3,$out3,v30 928 vxor $in2,$in2,v31 929 vncipher $out4,$out4,v30 930 vxor $in3,$in3,v31 931 vncipher $out5,$out5,v30 932 vxor $in4,$in4,v31 933 vncipher $out6,$out6,v30 934 vxor $in5,$in5,v31 935 vncipher $out7,$out7,v30 936 vxor $in6,$in6,v31 937 938 vncipherlast $out0,$out0,$ivec 939 vncipherlast $out1,$out1,$in0 940 lvx_u $in0,$x00,$inp # load next input block 941 vncipherlast $out2,$out2,$in1 942 lvx_u $in1,$x10,$inp 943 vncipherlast $out3,$out3,$in2 944 le?vperm $in0,$in0,$in0,$inpperm 945 lvx_u $in2,$x20,$inp 946 vncipherlast $out4,$out4,$in3 947 le?vperm $in1,$in1,$in1,$inpperm 948 lvx_u $in3,$x30,$inp 949 vncipherlast $out5,$out5,$in4 950 le?vperm $in2,$in2,$in2,$inpperm 951 lvx_u $in4,$x40,$inp 952 vncipherlast $out6,$out6,$in5 953 le?vperm $in3,$in3,$in3,$inpperm 954 lvx_u $in5,$x50,$inp 955 vncipherlast $out7,$out7,$in6 956 le?vperm $in4,$in4,$in4,$inpperm 957 lvx_u $in6,$x60,$inp 958 vmr $ivec,$in7 959 le?vperm $in5,$in5,$in5,$inpperm 960 lvx_u $in7,$x70,$inp 961 addi $inp,$inp,0x80 962 963 le?vperm $out0,$out0,$out0,$inpperm 964 le?vperm $out1,$out1,$out1,$inpperm 965 stvx_u $out0,$x00,$out 966 le?vperm $in6,$in6,$in6,$inpperm 967 vxor $out0,$in0,$rndkey0 968 le?vperm $out2,$out2,$out2,$inpperm 969 stvx_u $out1,$x10,$out 970 le?vperm $in7,$in7,$in7,$inpperm 971 vxor $out1,$in1,$rndkey0 972 le?vperm $out3,$out3,$out3,$inpperm 973 stvx_u $out2,$x20,$out 974 vxor $out2,$in2,$rndkey0 975 le?vperm $out4,$out4,$out4,$inpperm 976 stvx_u $out3,$x30,$out 977 vxor $out3,$in3,$rndkey0 978 le?vperm $out5,$out5,$out5,$inpperm 979 stvx_u $out4,$x40,$out 980 vxor $out4,$in4,$rndkey0 981 le?vperm $out6,$out6,$out6,$inpperm 982 stvx_u $out5,$x50,$out 983 vxor $out5,$in5,$rndkey0 984 le?vperm $out7,$out7,$out7,$inpperm 985 stvx_u $out6,$x60,$out 986 vxor $out6,$in6,$rndkey0 987 stvx_u $out7,$x70,$out 988 addi $out,$out,0x80 989 vxor $out7,$in7,$rndkey0 990 991 mtctr $rounds 992 beq Loop_cbc_dec8x # did $len-=128 borrow? 993 994 addic. $len,$len,128 995 beq Lcbc_dec8x_done 996 nop 997 nop 998 999 Loop_cbc_dec8x_tail: # up to 7 "words" tail... 1000 vncipher $out1,$out1,v24 1001 vncipher $out2,$out2,v24 1002 vncipher $out3,$out3,v24 1003 vncipher $out4,$out4,v24 1004 vncipher $out5,$out5,v24 1005 vncipher $out6,$out6,v24 1006 vncipher $out7,$out7,v24 1007 lvx v24,$x20,$key_ # round[3] 1008 addi $key_,$key_,0x20 1009 1010 vncipher $out1,$out1,v25 1011 vncipher $out2,$out2,v25 1012 vncipher $out3,$out3,v25 1013 vncipher $out4,$out4,v25 1014 vncipher $out5,$out5,v25 1015 vncipher $out6,$out6,v25 1016 vncipher $out7,$out7,v25 1017 lvx v25,$x10,$key_ # round[4] 1018 bdnz Loop_cbc_dec8x_tail 1019 1020 vncipher $out1,$out1,v24 1021 vncipher $out2,$out2,v24 1022 vncipher $out3,$out3,v24 1023 vncipher $out4,$out4,v24 1024 vncipher $out5,$out5,v24 1025 vncipher $out6,$out6,v24 1026 vncipher $out7,$out7,v24 1027 1028 vncipher $out1,$out1,v25 1029 vncipher $out2,$out2,v25 1030 vncipher $out3,$out3,v25 1031 vncipher $out4,$out4,v25 1032 vncipher $out5,$out5,v25 1033 vncipher $out6,$out6,v25 1034 vncipher $out7,$out7,v25 1035 1036 vncipher $out1,$out1,v26 1037 vncipher $out2,$out2,v26 1038 vncipher $out3,$out3,v26 1039 vncipher $out4,$out4,v26 1040 vncipher $out5,$out5,v26 1041 vncipher $out6,$out6,v26 1042 vncipher $out7,$out7,v26 1043 1044 vncipher $out1,$out1,v27 1045 vncipher $out2,$out2,v27 1046 vncipher $out3,$out3,v27 1047 vncipher $out4,$out4,v27 1048 vncipher $out5,$out5,v27 1049 vncipher $out6,$out6,v27 1050 vncipher $out7,$out7,v27 1051 1052 vncipher $out1,$out1,v28 1053 vncipher $out2,$out2,v28 1054 vncipher $out3,$out3,v28 1055 vncipher $out4,$out4,v28 1056 vncipher $out5,$out5,v28 1057 vncipher $out6,$out6,v28 1058 vncipher $out7,$out7,v28 1059 1060 vncipher $out1,$out1,v29 1061 vncipher $out2,$out2,v29 1062 vncipher $out3,$out3,v29 1063 vncipher $out4,$out4,v29 1064 vncipher $out5,$out5,v29 1065 vncipher $out6,$out6,v29 1066 vncipher $out7,$out7,v29 1067 1068 vncipher $out1,$out1,v30 1069 vxor $ivec,$ivec,v31 # last round key 1070 vncipher $out2,$out2,v30 1071 vxor $in1,$in1,v31 1072 vncipher $out3,$out3,v30 1073 vxor $in2,$in2,v31 1074 vncipher $out4,$out4,v30 1075 vxor $in3,$in3,v31 1076 vncipher $out5,$out5,v30 1077 vxor $in4,$in4,v31 1078 vncipher $out6,$out6,v30 1079 vxor $in5,$in5,v31 1080 vncipher $out7,$out7,v30 1081 vxor $in6,$in6,v31 1082 1083 cmplwi $len,32 # switch($len) 1084 blt Lcbc_dec8x_one 1085 nop 1086 beq Lcbc_dec8x_two 1087 cmplwi $len,64 1088 blt Lcbc_dec8x_three 1089 nop 1090 beq Lcbc_dec8x_four 1091 cmplwi $len,96 1092 blt Lcbc_dec8x_five 1093 nop 1094 beq Lcbc_dec8x_six 1095 1096 Lcbc_dec8x_seven: 1097 vncipherlast $out1,$out1,$ivec 1098 vncipherlast $out2,$out2,$in1 1099 vncipherlast $out3,$out3,$in2 1100 vncipherlast $out4,$out4,$in3 1101 vncipherlast $out5,$out5,$in4 1102 vncipherlast $out6,$out6,$in5 1103 vncipherlast $out7,$out7,$in6 1104 vmr $ivec,$in7 1105 1106 le?vperm $out1,$out1,$out1,$inpperm 1107 le?vperm $out2,$out2,$out2,$inpperm 1108 stvx_u $out1,$x00,$out 1109 le?vperm $out3,$out3,$out3,$inpperm 1110 stvx_u $out2,$x10,$out 1111 le?vperm $out4,$out4,$out4,$inpperm 1112 stvx_u $out3,$x20,$out 1113 le?vperm $out5,$out5,$out5,$inpperm 1114 stvx_u $out4,$x30,$out 1115 le?vperm $out6,$out6,$out6,$inpperm 1116 stvx_u $out5,$x40,$out 1117 le?vperm $out7,$out7,$out7,$inpperm 1118 stvx_u $out6,$x50,$out 1119 stvx_u $out7,$x60,$out 1120 addi $out,$out,0x70 1121 b Lcbc_dec8x_done 1122 1123 .align 5 1124 Lcbc_dec8x_six: 1125 vncipherlast $out2,$out2,$ivec 1126 vncipherlast $out3,$out3,$in2 1127 vncipherlast $out4,$out4,$in3 1128 vncipherlast $out5,$out5,$in4 1129 vncipherlast $out6,$out6,$in5 1130 vncipherlast $out7,$out7,$in6 1131 vmr $ivec,$in7 1132 1133 le?vperm $out2,$out2,$out2,$inpperm 1134 le?vperm $out3,$out3,$out3,$inpperm 1135 stvx_u $out2,$x00,$out 1136 le?vperm $out4,$out4,$out4,$inpperm 1137 stvx_u $out3,$x10,$out 1138 le?vperm $out5,$out5,$out5,$inpperm 1139 stvx_u $out4,$x20,$out 1140 le?vperm $out6,$out6,$out6,$inpperm 1141 stvx_u $out5,$x30,$out 1142 le?vperm $out7,$out7,$out7,$inpperm 1143 stvx_u $out6,$x40,$out 1144 stvx_u $out7,$x50,$out 1145 addi $out,$out,0x60 1146 b Lcbc_dec8x_done 1147 1148 .align 5 1149 Lcbc_dec8x_five: 1150 vncipherlast $out3,$out3,$ivec 1151 vncipherlast $out4,$out4,$in3 1152 vncipherlast $out5,$out5,$in4 1153 vncipherlast $out6,$out6,$in5 1154 vncipherlast $out7,$out7,$in6 1155 vmr $ivec,$in7 1156 1157 le?vperm $out3,$out3,$out3,$inpperm 1158 le?vperm $out4,$out4,$out4,$inpperm 1159 stvx_u $out3,$x00,$out 1160 le?vperm $out5,$out5,$out5,$inpperm 1161 stvx_u $out4,$x10,$out 1162 le?vperm $out6,$out6,$out6,$inpperm 1163 stvx_u $out5,$x20,$out 1164 le?vperm $out7,$out7,$out7,$inpperm 1165 stvx_u $out6,$x30,$out 1166 stvx_u $out7,$x40,$out 1167 addi $out,$out,0x50 1168 b Lcbc_dec8x_done 1169 1170 .align 5 1171 Lcbc_dec8x_four: 1172 vncipherlast $out4,$out4,$ivec 1173 vncipherlast $out5,$out5,$in4 1174 vncipherlast $out6,$out6,$in5 1175 vncipherlast $out7,$out7,$in6 1176 vmr $ivec,$in7 1177 1178 le?vperm $out4,$out4,$out4,$inpperm 1179 le?vperm $out5,$out5,$out5,$inpperm 1180 stvx_u $out4,$x00,$out 1181 le?vperm $out6,$out6,$out6,$inpperm 1182 stvx_u $out5,$x10,$out 1183 le?vperm $out7,$out7,$out7,$inpperm 1184 stvx_u $out6,$x20,$out 1185 stvx_u $out7,$x30,$out 1186 addi $out,$out,0x40 1187 b Lcbc_dec8x_done 1188 1189 .align 5 1190 Lcbc_dec8x_three: 1191 vncipherlast $out5,$out5,$ivec 1192 vncipherlast $out6,$out6,$in5 1193 vncipherlast $out7,$out7,$in6 1194 vmr $ivec,$in7 1195 1196 le?vperm $out5,$out5,$out5,$inpperm 1197 le?vperm $out6,$out6,$out6,$inpperm 1198 stvx_u $out5,$x00,$out 1199 le?vperm $out7,$out7,$out7,$inpperm 1200 stvx_u $out6,$x10,$out 1201 stvx_u $out7,$x20,$out 1202 addi $out,$out,0x30 1203 b Lcbc_dec8x_done 1204 1205 .align 5 1206 Lcbc_dec8x_two: 1207 vncipherlast $out6,$out6,$ivec 1208 vncipherlast $out7,$out7,$in6 1209 vmr $ivec,$in7 1210 1211 le?vperm $out6,$out6,$out6,$inpperm 1212 le?vperm $out7,$out7,$out7,$inpperm 1213 stvx_u $out6,$x00,$out 1214 stvx_u $out7,$x10,$out 1215 addi $out,$out,0x20 1216 b Lcbc_dec8x_done 1217 1218 .align 5 1219 Lcbc_dec8x_one: 1220 vncipherlast $out7,$out7,$ivec 1221 vmr $ivec,$in7 1222 1223 le?vperm $out7,$out7,$out7,$inpperm 1224 stvx_u $out7,0,$out 1225 addi $out,$out,0x10 1226 1227 Lcbc_dec8x_done: 1228 le?vperm $ivec,$ivec,$ivec,$inpperm 1229 stvx_u $ivec,0,$ivp # write [unaligned] iv 1230 1231 li r10,`$FRAME+15` 1232 li r11,`$FRAME+31` 1233 stvx $inpperm,r10,$sp # wipe copies of round keys 1234 addi r10,r10,32 1235 stvx $inpperm,r11,$sp 1236 addi r11,r11,32 1237 stvx $inpperm,r10,$sp 1238 addi r10,r10,32 1239 stvx $inpperm,r11,$sp 1240 addi r11,r11,32 1241 stvx $inpperm,r10,$sp 1242 addi r10,r10,32 1243 stvx $inpperm,r11,$sp 1244 addi r11,r11,32 1245 stvx $inpperm,r10,$sp 1246 addi r10,r10,32 1247 stvx $inpperm,r11,$sp 1248 addi r11,r11,32 1249 1250 mtspr 256,$vrsave 1251 lvx v20,r10,$sp # ABI says so 1252 addi r10,r10,32 1253 lvx v21,r11,$sp 1254 addi r11,r11,32 1255 lvx v22,r10,$sp 1256 addi r10,r10,32 1257 lvx v23,r11,$sp 1258 addi r11,r11,32 1259 lvx v24,r10,$sp 1260 addi r10,r10,32 1261 lvx v25,r11,$sp 1262 addi r11,r11,32 1263 lvx v26,r10,$sp 1264 addi r10,r10,32 1265 lvx v27,r11,$sp 1266 addi r11,r11,32 1267 lvx v28,r10,$sp 1268 addi r10,r10,32 1269 lvx v29,r11,$sp 1270 addi r11,r11,32 1271 lvx v30,r10,$sp 1272 lvx v31,r11,$sp 1273 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1274 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1275 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1276 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1277 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1278 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1279 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1280 blr 1281 .long 0 1282 .byte 0,12,0x14,0,0x80,6,6,0 1283 .long 0 1284 .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt 1285 ___ 1286 }} }}} 1287 1288 ######################################################################### 1289 {{{ # CTR procedure[s] # 1290 1291 ####################### WARNING: Here be dragons! ####################### 1292 # 1293 # This code is written as 'ctr32', based on a 32-bit counter used 1294 # upstream. The kernel does *not* use a 32-bit counter. The kernel uses 1295 # a 128-bit counter. 1296 # 1297 # This leads to subtle changes from the upstream code: the counter 1298 # is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in 1299 # both the bulk (8 blocks at a time) path, and in the individual block 1300 # path. Be aware of this when doing updates. 1301 # 1302 # See: 1303 # 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug") 1304 # 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword") 1305 # https://github.com/openssl/openssl/pull/8942 1306 # 1307 ######################################################################### 1308 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); 1309 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 1310 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= 1311 map("v$_",(4..11)); 1312 my $dat=$tmp; 1313 1314 $code.=<<___; 1315 .globl .${prefix}_ctr32_encrypt_blocks 1316 ${UCMP}i $len,1 1317 bltlr- 1318 1319 lis r0,0xfff0 1320 mfspr $vrsave,256 1321 mtspr 256,r0 1322 1323 li $idx,15 1324 vxor $rndkey0,$rndkey0,$rndkey0 1325 le?vspltisb $tmp,0x0f 1326 1327 lvx $ivec,0,$ivp # load [unaligned] iv 1328 lvsl $inpperm,0,$ivp 1329 lvx $inptail,$idx,$ivp 1330 vspltisb $one,1 1331 le?vxor $inpperm,$inpperm,$tmp 1332 vperm $ivec,$ivec,$inptail,$inpperm 1333 vsldoi $one,$rndkey0,$one,1 1334 1335 neg r11,$inp 1336 ?lvsl $keyperm,0,$key # prepare for unaligned key 1337 lwz $rounds,240($key) 1338 1339 lvsr $inpperm,0,r11 # prepare for unaligned load 1340 lvx $inptail,0,$inp 1341 addi $inp,$inp,15 # 15 is not typo 1342 le?vxor $inpperm,$inpperm,$tmp 1343 1344 srwi $rounds,$rounds,1 1345 li $idx,16 1346 subi $rounds,$rounds,1 1347 1348 ${UCMP}i $len,8 1349 bge _aesp8_ctr32_encrypt8x 1350 1351 ?lvsr $outperm,0,$out # prepare for unaligned store 1352 vspltisb $outmask,-1 1353 lvx $outhead,0,$out 1354 ?vperm $outmask,$rndkey0,$outmask,$outperm 1355 le?vxor $outperm,$outperm,$tmp 1356 1357 lvx $rndkey0,0,$key 1358 mtctr $rounds 1359 lvx $rndkey1,$idx,$key 1360 addi $idx,$idx,16 1361 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1362 vxor $inout,$ivec,$rndkey0 1363 lvx $rndkey0,$idx,$key 1364 addi $idx,$idx,16 1365 b Loop_ctr32_enc 1366 1367 .align 5 1368 Loop_ctr32_enc: 1369 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1370 vcipher $inout,$inout,$rndkey1 1371 lvx $rndkey1,$idx,$key 1372 addi $idx,$idx,16 1373 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1374 vcipher $inout,$inout,$rndkey0 1375 lvx $rndkey0,$idx,$key 1376 addi $idx,$idx,16 1377 bdnz Loop_ctr32_enc 1378 1379 vadduqm $ivec,$ivec,$one # Kernel change for 128-bit 1380 vmr $dat,$inptail 1381 lvx $inptail,0,$inp 1382 addi $inp,$inp,16 1383 subic. $len,$len,1 # blocks-- 1384 1385 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1386 vcipher $inout,$inout,$rndkey1 1387 lvx $rndkey1,$idx,$key 1388 vperm $dat,$dat,$inptail,$inpperm 1389 li $idx,16 1390 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm 1391 lvx $rndkey0,0,$key 1392 vxor $dat,$dat,$rndkey1 # last round key 1393 vcipherlast $inout,$inout,$dat 1394 1395 lvx $rndkey1,$idx,$key 1396 addi $idx,$idx,16 1397 vperm $inout,$inout,$inout,$outperm 1398 vsel $dat,$outhead,$inout,$outmask 1399 mtctr $rounds 1400 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1401 vmr $outhead,$inout 1402 vxor $inout,$ivec,$rndkey0 1403 lvx $rndkey0,$idx,$key 1404 addi $idx,$idx,16 1405 stvx $dat,0,$out 1406 addi $out,$out,16 1407 bne Loop_ctr32_enc 1408 1409 addi $out,$out,-1 1410 lvx $inout,0,$out # redundant in aligned case 1411 vsel $inout,$outhead,$inout,$outmask 1412 stvx $inout,0,$out 1413 1414 mtspr 256,$vrsave 1415 blr 1416 .long 0 1417 .byte 0,12,0x14,0,0,0,6,0 1418 .long 0 1419 ___ 1420 ######################################################################### 1421 {{ # Optimized CTR procedure # 1422 my $key_="r11"; 1423 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 1424 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); 1425 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); 1426 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 1427 # v26-v31 last 6 round keys 1428 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 1429 my ($two,$three,$four)=($outhead,$outperm,$outmask); 1430 1431 $code.=<<___; 1432 .align 5 1433 _aesp8_ctr32_encrypt8x: 1434 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 1435 li r10,`$FRAME+8*16+15` 1436 li r11,`$FRAME+8*16+31` 1437 stvx v20,r10,$sp # ABI says so 1438 addi r10,r10,32 1439 stvx v21,r11,$sp 1440 addi r11,r11,32 1441 stvx v22,r10,$sp 1442 addi r10,r10,32 1443 stvx v23,r11,$sp 1444 addi r11,r11,32 1445 stvx v24,r10,$sp 1446 addi r10,r10,32 1447 stvx v25,r11,$sp 1448 addi r11,r11,32 1449 stvx v26,r10,$sp 1450 addi r10,r10,32 1451 stvx v27,r11,$sp 1452 addi r11,r11,32 1453 stvx v28,r10,$sp 1454 addi r10,r10,32 1455 stvx v29,r11,$sp 1456 addi r11,r11,32 1457 stvx v30,r10,$sp 1458 stvx v31,r11,$sp 1459 li r0,-1 1460 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 1461 li $x10,0x10 1462 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1463 li $x20,0x20 1464 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1465 li $x30,0x30 1466 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1467 li $x40,0x40 1468 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1469 li $x50,0x50 1470 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1471 li $x60,0x60 1472 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1473 li $x70,0x70 1474 mtspr 256,r0 1475 1476 subi $rounds,$rounds,3 # -4 in total 1477 1478 lvx $rndkey0,$x00,$key # load key schedule 1479 lvx v30,$x10,$key 1480 addi $key,$key,0x20 1481 lvx v31,$x00,$key 1482 ?vperm $rndkey0,$rndkey0,v30,$keyperm 1483 addi $key_,$sp,$FRAME+15 1484 mtctr $rounds 1485 1486 Load_ctr32_enc_key: 1487 ?vperm v24,v30,v31,$keyperm 1488 lvx v30,$x10,$key 1489 addi $key,$key,0x20 1490 stvx v24,$x00,$key_ # off-load round[1] 1491 ?vperm v25,v31,v30,$keyperm 1492 lvx v31,$x00,$key 1493 stvx v25,$x10,$key_ # off-load round[2] 1494 addi $key_,$key_,0x20 1495 bdnz Load_ctr32_enc_key 1496 1497 lvx v26,$x10,$key 1498 ?vperm v24,v30,v31,$keyperm 1499 lvx v27,$x20,$key 1500 stvx v24,$x00,$key_ # off-load round[3] 1501 ?vperm v25,v31,v26,$keyperm 1502 lvx v28,$x30,$key 1503 stvx v25,$x10,$key_ # off-load round[4] 1504 addi $key_,$sp,$FRAME+15 # rewind $key_ 1505 ?vperm v26,v26,v27,$keyperm 1506 lvx v29,$x40,$key 1507 ?vperm v27,v27,v28,$keyperm 1508 lvx v30,$x50,$key 1509 ?vperm v28,v28,v29,$keyperm 1510 lvx v31,$x60,$key 1511 ?vperm v29,v29,v30,$keyperm 1512 lvx $out0,$x70,$key # borrow $out0 1513 ?vperm v30,v30,v31,$keyperm 1514 lvx v24,$x00,$key_ # pre-load round[1] 1515 ?vperm v31,v31,$out0,$keyperm 1516 lvx v25,$x10,$key_ # pre-load round[2] 1517 1518 vadduqm $two,$one,$one 1519 subi $inp,$inp,15 # undo "caller" 1520 $SHL $len,$len,4 1521 1522 vadduqm $out1,$ivec,$one # counter values ... 1523 vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit) 1524 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1525 le?li $idx,8 1526 vadduqm $out3,$out1,$two 1527 vxor $out1,$out1,$rndkey0 1528 le?lvsl $inpperm,0,$idx 1529 vadduqm $out4,$out2,$two 1530 vxor $out2,$out2,$rndkey0 1531 le?vspltisb $tmp,0x0f 1532 vadduqm $out5,$out3,$two 1533 vxor $out3,$out3,$rndkey0 1534 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 1535 vadduqm $out6,$out4,$two 1536 vxor $out4,$out4,$rndkey0 1537 vadduqm $out7,$out5,$two 1538 vxor $out5,$out5,$rndkey0 1539 vadduqm $ivec,$out6,$two # next counter value 1540 vxor $out6,$out6,$rndkey0 1541 vxor $out7,$out7,$rndkey0 1542 1543 mtctr $rounds 1544 b Loop_ctr32_enc8x 1545 .align 5 1546 Loop_ctr32_enc8x: 1547 vcipher $out0,$out0,v24 1548 vcipher $out1,$out1,v24 1549 vcipher $out2,$out2,v24 1550 vcipher $out3,$out3,v24 1551 vcipher $out4,$out4,v24 1552 vcipher $out5,$out5,v24 1553 vcipher $out6,$out6,v24 1554 vcipher $out7,$out7,v24 1555 Loop_ctr32_enc8x_middle: 1556 lvx v24,$x20,$key_ # round[3] 1557 addi $key_,$key_,0x20 1558 1559 vcipher $out0,$out0,v25 1560 vcipher $out1,$out1,v25 1561 vcipher $out2,$out2,v25 1562 vcipher $out3,$out3,v25 1563 vcipher $out4,$out4,v25 1564 vcipher $out5,$out5,v25 1565 vcipher $out6,$out6,v25 1566 vcipher $out7,$out7,v25 1567 lvx v25,$x10,$key_ # round[4] 1568 bdnz Loop_ctr32_enc8x 1569 1570 subic r11,$len,256 # $len-256, borrow $key_ 1571 vcipher $out0,$out0,v24 1572 vcipher $out1,$out1,v24 1573 vcipher $out2,$out2,v24 1574 vcipher $out3,$out3,v24 1575 vcipher $out4,$out4,v24 1576 vcipher $out5,$out5,v24 1577 vcipher $out6,$out6,v24 1578 vcipher $out7,$out7,v24 1579 1580 subfe r0,r0,r0 # borrow?-1:0 1581 vcipher $out0,$out0,v25 1582 vcipher $out1,$out1,v25 1583 vcipher $out2,$out2,v25 1584 vcipher $out3,$out3,v25 1585 vcipher $out4,$out4,v25 1586 vcipher $out5,$out5,v25 1587 vcipher $out6,$out6,v25 1588 vcipher $out7,$out7,v25 1589 1590 and r0,r0,r11 1591 addi $key_,$sp,$FRAME+15 # rewind $key_ 1592 vcipher $out0,$out0,v26 1593 vcipher $out1,$out1,v26 1594 vcipher $out2,$out2,v26 1595 vcipher $out3,$out3,v26 1596 vcipher $out4,$out4,v26 1597 vcipher $out5,$out5,v26 1598 vcipher $out6,$out6,v26 1599 vcipher $out7,$out7,v26 1600 lvx v24,$x00,$key_ # re-pre-load round[1] 1601 1602 subic $len,$len,129 # $len-=129 1603 vcipher $out0,$out0,v27 1604 addi $len,$len,1 # $len-=128 really 1605 vcipher $out1,$out1,v27 1606 vcipher $out2,$out2,v27 1607 vcipher $out3,$out3,v27 1608 vcipher $out4,$out4,v27 1609 vcipher $out5,$out5,v27 1610 vcipher $out6,$out6,v27 1611 vcipher $out7,$out7,v27 1612 lvx v25,$x10,$key_ # re-pre-load round[2] 1613 1614 vcipher $out0,$out0,v28 1615 lvx_u $in0,$x00,$inp # load input 1616 vcipher $out1,$out1,v28 1617 lvx_u $in1,$x10,$inp 1618 vcipher $out2,$out2,v28 1619 lvx_u $in2,$x20,$inp 1620 vcipher $out3,$out3,v28 1621 lvx_u $in3,$x30,$inp 1622 vcipher $out4,$out4,v28 1623 lvx_u $in4,$x40,$inp 1624 vcipher $out5,$out5,v28 1625 lvx_u $in5,$x50,$inp 1626 vcipher $out6,$out6,v28 1627 lvx_u $in6,$x60,$inp 1628 vcipher $out7,$out7,v28 1629 lvx_u $in7,$x70,$inp 1630 addi $inp,$inp,0x80 1631 1632 vcipher $out0,$out0,v29 1633 le?vperm $in0,$in0,$in0,$inpperm 1634 vcipher $out1,$out1,v29 1635 le?vperm $in1,$in1,$in1,$inpperm 1636 vcipher $out2,$out2,v29 1637 le?vperm $in2,$in2,$in2,$inpperm 1638 vcipher $out3,$out3,v29 1639 le?vperm $in3,$in3,$in3,$inpperm 1640 vcipher $out4,$out4,v29 1641 le?vperm $in4,$in4,$in4,$inpperm 1642 vcipher $out5,$out5,v29 1643 le?vperm $in5,$in5,$in5,$inpperm 1644 vcipher $out6,$out6,v29 1645 le?vperm $in6,$in6,$in6,$inpperm 1646 vcipher $out7,$out7,v29 1647 le?vperm $in7,$in7,$in7,$inpperm 1648 1649 add $inp,$inp,r0 # $inp is adjusted in such 1650 # way that at exit from the 1651 # loop inX-in7 are loaded 1652 # with last "words" 1653 subfe. r0,r0,r0 # borrow?-1:0 1654 vcipher $out0,$out0,v30 1655 vxor $in0,$in0,v31 # xor with last round key 1656 vcipher $out1,$out1,v30 1657 vxor $in1,$in1,v31 1658 vcipher $out2,$out2,v30 1659 vxor $in2,$in2,v31 1660 vcipher $out3,$out3,v30 1661 vxor $in3,$in3,v31 1662 vcipher $out4,$out4,v30 1663 vxor $in4,$in4,v31 1664 vcipher $out5,$out5,v30 1665 vxor $in5,$in5,v31 1666 vcipher $out6,$out6,v30 1667 vxor $in6,$in6,v31 1668 vcipher $out7,$out7,v30 1669 vxor $in7,$in7,v31 1670 1671 bne Lctr32_enc8x_break # did $len-129 borrow? 1672 1673 vcipherlast $in0,$out0,$in0 1674 vcipherlast $in1,$out1,$in1 1675 vadduqm $out1,$ivec,$one # counter values ... 1676 vcipherlast $in2,$out2,$in2 1677 vadduqm $out2,$ivec,$two 1678 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1679 vcipherlast $in3,$out3,$in3 1680 vadduqm $out3,$out1,$two 1681 vxor $out1,$out1,$rndkey0 1682 vcipherlast $in4,$out4,$in4 1683 vadduqm $out4,$out2,$two 1684 vxor $out2,$out2,$rndkey0 1685 vcipherlast $in5,$out5,$in5 1686 vadduqm $out5,$out3,$two 1687 vxor $out3,$out3,$rndkey0 1688 vcipherlast $in6,$out6,$in6 1689 vadduqm $out6,$out4,$two 1690 vxor $out4,$out4,$rndkey0 1691 vcipherlast $in7,$out7,$in7 1692 vadduqm $out7,$out5,$two 1693 vxor $out5,$out5,$rndkey0 1694 le?vperm $in0,$in0,$in0,$inpperm 1695 vadduqm $ivec,$out6,$two # next counter value 1696 vxor $out6,$out6,$rndkey0 1697 le?vperm $in1,$in1,$in1,$inpperm 1698 vxor $out7,$out7,$rndkey0 1699 mtctr $rounds 1700 1701 vcipher $out0,$out0,v24 1702 stvx_u $in0,$x00,$out 1703 le?vperm $in2,$in2,$in2,$inpperm 1704 vcipher $out1,$out1,v24 1705 stvx_u $in1,$x10,$out 1706 le?vperm $in3,$in3,$in3,$inpperm 1707 vcipher $out2,$out2,v24 1708 stvx_u $in2,$x20,$out 1709 le?vperm $in4,$in4,$in4,$inpperm 1710 vcipher $out3,$out3,v24 1711 stvx_u $in3,$x30,$out 1712 le?vperm $in5,$in5,$in5,$inpperm 1713 vcipher $out4,$out4,v24 1714 stvx_u $in4,$x40,$out 1715 le?vperm $in6,$in6,$in6,$inpperm 1716 vcipher $out5,$out5,v24 1717 stvx_u $in5,$x50,$out 1718 le?vperm $in7,$in7,$in7,$inpperm 1719 vcipher $out6,$out6,v24 1720 stvx_u $in6,$x60,$out 1721 vcipher $out7,$out7,v24 1722 stvx_u $in7,$x70,$out 1723 addi $out,$out,0x80 1724 1725 b Loop_ctr32_enc8x_middle 1726 1727 .align 5 1728 Lctr32_enc8x_break: 1729 cmpwi $len,-0x60 1730 blt Lctr32_enc8x_one 1731 nop 1732 beq Lctr32_enc8x_two 1733 cmpwi $len,-0x40 1734 blt Lctr32_enc8x_three 1735 nop 1736 beq Lctr32_enc8x_four 1737 cmpwi $len,-0x20 1738 blt Lctr32_enc8x_five 1739 nop 1740 beq Lctr32_enc8x_six 1741 cmpwi $len,0x00 1742 blt Lctr32_enc8x_seven 1743 1744 Lctr32_enc8x_eight: 1745 vcipherlast $out0,$out0,$in0 1746 vcipherlast $out1,$out1,$in1 1747 vcipherlast $out2,$out2,$in2 1748 vcipherlast $out3,$out3,$in3 1749 vcipherlast $out4,$out4,$in4 1750 vcipherlast $out5,$out5,$in5 1751 vcipherlast $out6,$out6,$in6 1752 vcipherlast $out7,$out7,$in7 1753 1754 le?vperm $out0,$out0,$out0,$inpperm 1755 le?vperm $out1,$out1,$out1,$inpperm 1756 stvx_u $out0,$x00,$out 1757 le?vperm $out2,$out2,$out2,$inpperm 1758 stvx_u $out1,$x10,$out 1759 le?vperm $out3,$out3,$out3,$inpperm 1760 stvx_u $out2,$x20,$out 1761 le?vperm $out4,$out4,$out4,$inpperm 1762 stvx_u $out3,$x30,$out 1763 le?vperm $out5,$out5,$out5,$inpperm 1764 stvx_u $out4,$x40,$out 1765 le?vperm $out6,$out6,$out6,$inpperm 1766 stvx_u $out5,$x50,$out 1767 le?vperm $out7,$out7,$out7,$inpperm 1768 stvx_u $out6,$x60,$out 1769 stvx_u $out7,$x70,$out 1770 addi $out,$out,0x80 1771 b Lctr32_enc8x_done 1772 1773 .align 5 1774 Lctr32_enc8x_seven: 1775 vcipherlast $out0,$out0,$in1 1776 vcipherlast $out1,$out1,$in2 1777 vcipherlast $out2,$out2,$in3 1778 vcipherlast $out3,$out3,$in4 1779 vcipherlast $out4,$out4,$in5 1780 vcipherlast $out5,$out5,$in6 1781 vcipherlast $out6,$out6,$in7 1782 1783 le?vperm $out0,$out0,$out0,$inpperm 1784 le?vperm $out1,$out1,$out1,$inpperm 1785 stvx_u $out0,$x00,$out 1786 le?vperm $out2,$out2,$out2,$inpperm 1787 stvx_u $out1,$x10,$out 1788 le?vperm $out3,$out3,$out3,$inpperm 1789 stvx_u $out2,$x20,$out 1790 le?vperm $out4,$out4,$out4,$inpperm 1791 stvx_u $out3,$x30,$out 1792 le?vperm $out5,$out5,$out5,$inpperm 1793 stvx_u $out4,$x40,$out 1794 le?vperm $out6,$out6,$out6,$inpperm 1795 stvx_u $out5,$x50,$out 1796 stvx_u $out6,$x60,$out 1797 addi $out,$out,0x70 1798 b Lctr32_enc8x_done 1799 1800 .align 5 1801 Lctr32_enc8x_six: 1802 vcipherlast $out0,$out0,$in2 1803 vcipherlast $out1,$out1,$in3 1804 vcipherlast $out2,$out2,$in4 1805 vcipherlast $out3,$out3,$in5 1806 vcipherlast $out4,$out4,$in6 1807 vcipherlast $out5,$out5,$in7 1808 1809 le?vperm $out0,$out0,$out0,$inpperm 1810 le?vperm $out1,$out1,$out1,$inpperm 1811 stvx_u $out0,$x00,$out 1812 le?vperm $out2,$out2,$out2,$inpperm 1813 stvx_u $out1,$x10,$out 1814 le?vperm $out3,$out3,$out3,$inpperm 1815 stvx_u $out2,$x20,$out 1816 le?vperm $out4,$out4,$out4,$inpperm 1817 stvx_u $out3,$x30,$out 1818 le?vperm $out5,$out5,$out5,$inpperm 1819 stvx_u $out4,$x40,$out 1820 stvx_u $out5,$x50,$out 1821 addi $out,$out,0x60 1822 b Lctr32_enc8x_done 1823 1824 .align 5 1825 Lctr32_enc8x_five: 1826 vcipherlast $out0,$out0,$in3 1827 vcipherlast $out1,$out1,$in4 1828 vcipherlast $out2,$out2,$in5 1829 vcipherlast $out3,$out3,$in6 1830 vcipherlast $out4,$out4,$in7 1831 1832 le?vperm $out0,$out0,$out0,$inpperm 1833 le?vperm $out1,$out1,$out1,$inpperm 1834 stvx_u $out0,$x00,$out 1835 le?vperm $out2,$out2,$out2,$inpperm 1836 stvx_u $out1,$x10,$out 1837 le?vperm $out3,$out3,$out3,$inpperm 1838 stvx_u $out2,$x20,$out 1839 le?vperm $out4,$out4,$out4,$inpperm 1840 stvx_u $out3,$x30,$out 1841 stvx_u $out4,$x40,$out 1842 addi $out,$out,0x50 1843 b Lctr32_enc8x_done 1844 1845 .align 5 1846 Lctr32_enc8x_four: 1847 vcipherlast $out0,$out0,$in4 1848 vcipherlast $out1,$out1,$in5 1849 vcipherlast $out2,$out2,$in6 1850 vcipherlast $out3,$out3,$in7 1851 1852 le?vperm $out0,$out0,$out0,$inpperm 1853 le?vperm $out1,$out1,$out1,$inpperm 1854 stvx_u $out0,$x00,$out 1855 le?vperm $out2,$out2,$out2,$inpperm 1856 stvx_u $out1,$x10,$out 1857 le?vperm $out3,$out3,$out3,$inpperm 1858 stvx_u $out2,$x20,$out 1859 stvx_u $out3,$x30,$out 1860 addi $out,$out,0x40 1861 b Lctr32_enc8x_done 1862 1863 .align 5 1864 Lctr32_enc8x_three: 1865 vcipherlast $out0,$out0,$in5 1866 vcipherlast $out1,$out1,$in6 1867 vcipherlast $out2,$out2,$in7 1868 1869 le?vperm $out0,$out0,$out0,$inpperm 1870 le?vperm $out1,$out1,$out1,$inpperm 1871 stvx_u $out0,$x00,$out 1872 le?vperm $out2,$out2,$out2,$inpperm 1873 stvx_u $out1,$x10,$out 1874 stvx_u $out2,$x20,$out 1875 addi $out,$out,0x30 1876 b Lctr32_enc8x_done 1877 1878 .align 5 1879 Lctr32_enc8x_two: 1880 vcipherlast $out0,$out0,$in6 1881 vcipherlast $out1,$out1,$in7 1882 1883 le?vperm $out0,$out0,$out0,$inpperm 1884 le?vperm $out1,$out1,$out1,$inpperm 1885 stvx_u $out0,$x00,$out 1886 stvx_u $out1,$x10,$out 1887 addi $out,$out,0x20 1888 b Lctr32_enc8x_done 1889 1890 .align 5 1891 Lctr32_enc8x_one: 1892 vcipherlast $out0,$out0,$in7 1893 1894 le?vperm $out0,$out0,$out0,$inpperm 1895 stvx_u $out0,0,$out 1896 addi $out,$out,0x10 1897 1898 Lctr32_enc8x_done: 1899 li r10,`$FRAME+15` 1900 li r11,`$FRAME+31` 1901 stvx $inpperm,r10,$sp # wipe copies of round keys 1902 addi r10,r10,32 1903 stvx $inpperm,r11,$sp 1904 addi r11,r11,32 1905 stvx $inpperm,r10,$sp 1906 addi r10,r10,32 1907 stvx $inpperm,r11,$sp 1908 addi r11,r11,32 1909 stvx $inpperm,r10,$sp 1910 addi r10,r10,32 1911 stvx $inpperm,r11,$sp 1912 addi r11,r11,32 1913 stvx $inpperm,r10,$sp 1914 addi r10,r10,32 1915 stvx $inpperm,r11,$sp 1916 addi r11,r11,32 1917 1918 mtspr 256,$vrsave 1919 lvx v20,r10,$sp # ABI says so 1920 addi r10,r10,32 1921 lvx v21,r11,$sp 1922 addi r11,r11,32 1923 lvx v22,r10,$sp 1924 addi r10,r10,32 1925 lvx v23,r11,$sp 1926 addi r11,r11,32 1927 lvx v24,r10,$sp 1928 addi r10,r10,32 1929 lvx v25,r11,$sp 1930 addi r11,r11,32 1931 lvx v26,r10,$sp 1932 addi r10,r10,32 1933 lvx v27,r11,$sp 1934 addi r11,r11,32 1935 lvx v28,r10,$sp 1936 addi r10,r10,32 1937 lvx v29,r11,$sp 1938 addi r11,r11,32 1939 lvx v30,r10,$sp 1940 lvx v31,r11,$sp 1941 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1942 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1943 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1944 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1945 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1946 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1947 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1948 blr 1949 .long 0 1950 .byte 0,12,0x14,0,0x80,6,6,0 1951 .long 0 1952 .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks 1953 ___ 1954 }} }}} 1955 1956 ######################################################################### 1957 {{{ # XTS procedures # 1958 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # 1959 # const AES_KEY *key1, const AES_KEY *key2, # 1960 # [const] unsigned char iv[16]); # 1961 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # 1962 # input tweak value is assumed to be encrypted already, and last tweak # 1963 # value, one suitable for consecutive call on same chunk of data, is # 1964 # written back to original buffer. In addition, in "tweak chaining" # 1965 # mode only complete input blocks are processed. # 1966 1967 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); 1968 my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); 1969 my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); 1970 my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); 1971 my $taillen = $key2; 1972 1973 ($inp,$idx) = ($idx,$inp); # reassign 1974 1975 $code.=<<___; 1976 .globl .${prefix}_xts_encrypt 1977 mr $inp,r3 # reassign 1978 li r3,-1 1979 ${UCMP}i $len,16 1980 bltlr- 1981 1982 lis r0,0xfff0 1983 mfspr r12,256 # save vrsave 1984 li r11,0 1985 mtspr 256,r0 1986 1987 vspltisb $seven,0x07 # 0x070707..07 1988 le?lvsl $leperm,r11,r11 1989 le?vspltisb $tmp,0x0f 1990 le?vxor $leperm,$leperm,$seven 1991 1992 li $idx,15 1993 lvx $tweak,0,$ivp # load [unaligned] iv 1994 lvsl $inpperm,0,$ivp 1995 lvx $inptail,$idx,$ivp 1996 le?vxor $inpperm,$inpperm,$tmp 1997 vperm $tweak,$tweak,$inptail,$inpperm 1998 1999 neg r11,$inp 2000 lvsr $inpperm,0,r11 # prepare for unaligned load 2001 lvx $inout,0,$inp 2002 addi $inp,$inp,15 # 15 is not typo 2003 le?vxor $inpperm,$inpperm,$tmp 2004 2005 ${UCMP}i $key2,0 # key2==NULL? 2006 beq Lxts_enc_no_key2 2007 2008 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2009 lwz $rounds,240($key2) 2010 srwi $rounds,$rounds,1 2011 subi $rounds,$rounds,1 2012 li $idx,16 2013 2014 lvx $rndkey0,0,$key2 2015 lvx $rndkey1,$idx,$key2 2016 addi $idx,$idx,16 2017 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2018 vxor $tweak,$tweak,$rndkey0 2019 lvx $rndkey0,$idx,$key2 2020 addi $idx,$idx,16 2021 mtctr $rounds 2022 2023 Ltweak_xts_enc: 2024 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2025 vcipher $tweak,$tweak,$rndkey1 2026 lvx $rndkey1,$idx,$key2 2027 addi $idx,$idx,16 2028 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2029 vcipher $tweak,$tweak,$rndkey0 2030 lvx $rndkey0,$idx,$key2 2031 addi $idx,$idx,16 2032 bdnz Ltweak_xts_enc 2033 2034 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2035 vcipher $tweak,$tweak,$rndkey1 2036 lvx $rndkey1,$idx,$key2 2037 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2038 vcipherlast $tweak,$tweak,$rndkey0 2039 2040 li $ivp,0 # don't chain the tweak 2041 b Lxts_enc 2042 2043 Lxts_enc_no_key2: 2044 li $idx,-16 2045 and $len,$len,$idx # in "tweak chaining" 2046 # mode only complete 2047 # blocks are processed 2048 Lxts_enc: 2049 lvx $inptail,0,$inp 2050 addi $inp,$inp,16 2051 2052 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2053 lwz $rounds,240($key1) 2054 srwi $rounds,$rounds,1 2055 subi $rounds,$rounds,1 2056 li $idx,16 2057 2058 vslb $eighty7,$seven,$seven # 0x808080..80 2059 vor $eighty7,$eighty7,$seven # 0x878787..87 2060 vspltisb $tmp,1 # 0x010101..01 2061 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2062 2063 ${UCMP}i $len,96 2064 bge _aesp8_xts_encrypt6x 2065 2066 andi. $taillen,$len,15 2067 subic r0,$len,32 2068 subi $taillen,$taillen,16 2069 subfe r0,r0,r0 2070 and r0,r0,$taillen 2071 add $inp,$inp,r0 2072 2073 lvx $rndkey0,0,$key1 2074 lvx $rndkey1,$idx,$key1 2075 addi $idx,$idx,16 2076 vperm $inout,$inout,$inptail,$inpperm 2077 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2078 vxor $inout,$inout,$tweak 2079 vxor $inout,$inout,$rndkey0 2080 lvx $rndkey0,$idx,$key1 2081 addi $idx,$idx,16 2082 mtctr $rounds 2083 b Loop_xts_enc 2084 2085 .align 5 2086 Loop_xts_enc: 2087 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2088 vcipher $inout,$inout,$rndkey1 2089 lvx $rndkey1,$idx,$key1 2090 addi $idx,$idx,16 2091 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2092 vcipher $inout,$inout,$rndkey0 2093 lvx $rndkey0,$idx,$key1 2094 addi $idx,$idx,16 2095 bdnz Loop_xts_enc 2096 2097 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2098 vcipher $inout,$inout,$rndkey1 2099 lvx $rndkey1,$idx,$key1 2100 li $idx,16 2101 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2102 vxor $rndkey0,$rndkey0,$tweak 2103 vcipherlast $output,$inout,$rndkey0 2104 2105 le?vperm $tmp,$output,$output,$leperm 2106 be?nop 2107 le?stvx_u $tmp,0,$out 2108 be?stvx_u $output,0,$out 2109 addi $out,$out,16 2110 2111 subic. $len,$len,16 2112 beq Lxts_enc_done 2113 2114 vmr $inout,$inptail 2115 lvx $inptail,0,$inp 2116 addi $inp,$inp,16 2117 lvx $rndkey0,0,$key1 2118 lvx $rndkey1,$idx,$key1 2119 addi $idx,$idx,16 2120 2121 subic r0,$len,32 2122 subfe r0,r0,r0 2123 and r0,r0,$taillen 2124 add $inp,$inp,r0 2125 2126 vsrab $tmp,$tweak,$seven # next tweak value 2127 vaddubm $tweak,$tweak,$tweak 2128 vsldoi $tmp,$tmp,$tmp,15 2129 vand $tmp,$tmp,$eighty7 2130 vxor $tweak,$tweak,$tmp 2131 2132 vperm $inout,$inout,$inptail,$inpperm 2133 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2134 vxor $inout,$inout,$tweak 2135 vxor $output,$output,$rndkey0 # just in case $len<16 2136 vxor $inout,$inout,$rndkey0 2137 lvx $rndkey0,$idx,$key1 2138 addi $idx,$idx,16 2139 2140 mtctr $rounds 2141 ${UCMP}i $len,16 2142 bge Loop_xts_enc 2143 2144 vxor $output,$output,$tweak 2145 lvsr $inpperm,0,$len # $inpperm is no longer needed 2146 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2147 vspltisb $tmp,-1 2148 vperm $inptail,$inptail,$tmp,$inpperm 2149 vsel $inout,$inout,$output,$inptail 2150 2151 subi r11,$out,17 2152 subi $out,$out,16 2153 mtctr $len 2154 li $len,16 2155 Loop_xts_enc_steal: 2156 lbzu r0,1(r11) 2157 stb r0,16(r11) 2158 bdnz Loop_xts_enc_steal 2159 2160 mtctr $rounds 2161 b Loop_xts_enc # one more time... 2162 2163 Lxts_enc_done: 2164 ${UCMP}i $ivp,0 2165 beq Lxts_enc_ret 2166 2167 vsrab $tmp,$tweak,$seven # next tweak value 2168 vaddubm $tweak,$tweak,$tweak 2169 vsldoi $tmp,$tmp,$tmp,15 2170 vand $tmp,$tmp,$eighty7 2171 vxor $tweak,$tweak,$tmp 2172 2173 le?vperm $tweak,$tweak,$tweak,$leperm 2174 stvx_u $tweak,0,$ivp 2175 2176 Lxts_enc_ret: 2177 mtspr 256,r12 # restore vrsave 2178 li r3,0 2179 blr 2180 .long 0 2181 .byte 0,12,0x04,0,0x80,6,6,0 2182 .long 0 2183 .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt 2184 2185 .globl .${prefix}_xts_decrypt 2186 mr $inp,r3 # reassign 2187 li r3,-1 2188 ${UCMP}i $len,16 2189 bltlr- 2190 2191 lis r0,0xfff8 2192 mfspr r12,256 # save vrsave 2193 li r11,0 2194 mtspr 256,r0 2195 2196 andi. r0,$len,15 2197 neg r0,r0 2198 andi. r0,r0,16 2199 sub $len,$len,r0 2200 2201 vspltisb $seven,0x07 # 0x070707..07 2202 le?lvsl $leperm,r11,r11 2203 le?vspltisb $tmp,0x0f 2204 le?vxor $leperm,$leperm,$seven 2205 2206 li $idx,15 2207 lvx $tweak,0,$ivp # load [unaligned] iv 2208 lvsl $inpperm,0,$ivp 2209 lvx $inptail,$idx,$ivp 2210 le?vxor $inpperm,$inpperm,$tmp 2211 vperm $tweak,$tweak,$inptail,$inpperm 2212 2213 neg r11,$inp 2214 lvsr $inpperm,0,r11 # prepare for unaligned load 2215 lvx $inout,0,$inp 2216 addi $inp,$inp,15 # 15 is not typo 2217 le?vxor $inpperm,$inpperm,$tmp 2218 2219 ${UCMP}i $key2,0 # key2==NULL? 2220 beq Lxts_dec_no_key2 2221 2222 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2223 lwz $rounds,240($key2) 2224 srwi $rounds,$rounds,1 2225 subi $rounds,$rounds,1 2226 li $idx,16 2227 2228 lvx $rndkey0,0,$key2 2229 lvx $rndkey1,$idx,$key2 2230 addi $idx,$idx,16 2231 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2232 vxor $tweak,$tweak,$rndkey0 2233 lvx $rndkey0,$idx,$key2 2234 addi $idx,$idx,16 2235 mtctr $rounds 2236 2237 Ltweak_xts_dec: 2238 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2239 vcipher $tweak,$tweak,$rndkey1 2240 lvx $rndkey1,$idx,$key2 2241 addi $idx,$idx,16 2242 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2243 vcipher $tweak,$tweak,$rndkey0 2244 lvx $rndkey0,$idx,$key2 2245 addi $idx,$idx,16 2246 bdnz Ltweak_xts_dec 2247 2248 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2249 vcipher $tweak,$tweak,$rndkey1 2250 lvx $rndkey1,$idx,$key2 2251 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2252 vcipherlast $tweak,$tweak,$rndkey0 2253 2254 li $ivp,0 # don't chain the tweak 2255 b Lxts_dec 2256 2257 Lxts_dec_no_key2: 2258 neg $idx,$len 2259 andi. $idx,$idx,15 2260 add $len,$len,$idx # in "tweak chaining" 2261 # mode only complete 2262 # blocks are processed 2263 Lxts_dec: 2264 lvx $inptail,0,$inp 2265 addi $inp,$inp,16 2266 2267 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2268 lwz $rounds,240($key1) 2269 srwi $rounds,$rounds,1 2270 subi $rounds,$rounds,1 2271 li $idx,16 2272 2273 vslb $eighty7,$seven,$seven # 0x808080..80 2274 vor $eighty7,$eighty7,$seven # 0x878787..87 2275 vspltisb $tmp,1 # 0x010101..01 2276 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2277 2278 ${UCMP}i $len,96 2279 bge _aesp8_xts_decrypt6x 2280 2281 lvx $rndkey0,0,$key1 2282 lvx $rndkey1,$idx,$key1 2283 addi $idx,$idx,16 2284 vperm $inout,$inout,$inptail,$inpperm 2285 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2286 vxor $inout,$inout,$tweak 2287 vxor $inout,$inout,$rndkey0 2288 lvx $rndkey0,$idx,$key1 2289 addi $idx,$idx,16 2290 mtctr $rounds 2291 2292 ${UCMP}i $len,16 2293 blt Ltail_xts_dec 2294 be?b Loop_xts_dec 2295 2296 .align 5 2297 Loop_xts_dec: 2298 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2299 vncipher $inout,$inout,$rndkey1 2300 lvx $rndkey1,$idx,$key1 2301 addi $idx,$idx,16 2302 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2303 vncipher $inout,$inout,$rndkey0 2304 lvx $rndkey0,$idx,$key1 2305 addi $idx,$idx,16 2306 bdnz Loop_xts_dec 2307 2308 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2309 vncipher $inout,$inout,$rndkey1 2310 lvx $rndkey1,$idx,$key1 2311 li $idx,16 2312 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2313 vxor $rndkey0,$rndkey0,$tweak 2314 vncipherlast $output,$inout,$rndkey0 2315 2316 le?vperm $tmp,$output,$output,$leperm 2317 be?nop 2318 le?stvx_u $tmp,0,$out 2319 be?stvx_u $output,0,$out 2320 addi $out,$out,16 2321 2322 subic. $len,$len,16 2323 beq Lxts_dec_done 2324 2325 vmr $inout,$inptail 2326 lvx $inptail,0,$inp 2327 addi $inp,$inp,16 2328 lvx $rndkey0,0,$key1 2329 lvx $rndkey1,$idx,$key1 2330 addi $idx,$idx,16 2331 2332 vsrab $tmp,$tweak,$seven # next tweak value 2333 vaddubm $tweak,$tweak,$tweak 2334 vsldoi $tmp,$tmp,$tmp,15 2335 vand $tmp,$tmp,$eighty7 2336 vxor $tweak,$tweak,$tmp 2337 2338 vperm $inout,$inout,$inptail,$inpperm 2339 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2340 vxor $inout,$inout,$tweak 2341 vxor $inout,$inout,$rndkey0 2342 lvx $rndkey0,$idx,$key1 2343 addi $idx,$idx,16 2344 2345 mtctr $rounds 2346 ${UCMP}i $len,16 2347 bge Loop_xts_dec 2348 2349 Ltail_xts_dec: 2350 vsrab $tmp,$tweak,$seven # next tweak value 2351 vaddubm $tweak1,$tweak,$tweak 2352 vsldoi $tmp,$tmp,$tmp,15 2353 vand $tmp,$tmp,$eighty7 2354 vxor $tweak1,$tweak1,$tmp 2355 2356 subi $inp,$inp,16 2357 add $inp,$inp,$len 2358 2359 vxor $inout,$inout,$tweak # :-( 2360 vxor $inout,$inout,$tweak1 # :-) 2361 2362 Loop_xts_dec_short: 2363 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2364 vncipher $inout,$inout,$rndkey1 2365 lvx $rndkey1,$idx,$key1 2366 addi $idx,$idx,16 2367 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2368 vncipher $inout,$inout,$rndkey0 2369 lvx $rndkey0,$idx,$key1 2370 addi $idx,$idx,16 2371 bdnz Loop_xts_dec_short 2372 2373 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2374 vncipher $inout,$inout,$rndkey1 2375 lvx $rndkey1,$idx,$key1 2376 li $idx,16 2377 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2378 vxor $rndkey0,$rndkey0,$tweak1 2379 vncipherlast $output,$inout,$rndkey0 2380 2381 le?vperm $tmp,$output,$output,$leperm 2382 be?nop 2383 le?stvx_u $tmp,0,$out 2384 be?stvx_u $output,0,$out 2385 2386 vmr $inout,$inptail 2387 lvx $inptail,0,$inp 2388 #addi $inp,$inp,16 2389 lvx $rndkey0,0,$key1 2390 lvx $rndkey1,$idx,$key1 2391 addi $idx,$idx,16 2392 vperm $inout,$inout,$inptail,$inpperm 2393 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2394 2395 lvsr $inpperm,0,$len # $inpperm is no longer needed 2396 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2397 vspltisb $tmp,-1 2398 vperm $inptail,$inptail,$tmp,$inpperm 2399 vsel $inout,$inout,$output,$inptail 2400 2401 vxor $rndkey0,$rndkey0,$tweak 2402 vxor $inout,$inout,$rndkey0 2403 lvx $rndkey0,$idx,$key1 2404 addi $idx,$idx,16 2405 2406 subi r11,$out,1 2407 mtctr $len 2408 li $len,16 2409 Loop_xts_dec_steal: 2410 lbzu r0,1(r11) 2411 stb r0,16(r11) 2412 bdnz Loop_xts_dec_steal 2413 2414 mtctr $rounds 2415 b Loop_xts_dec # one more time... 2416 2417 Lxts_dec_done: 2418 ${UCMP}i $ivp,0 2419 beq Lxts_dec_ret 2420 2421 vsrab $tmp,$tweak,$seven # next tweak value 2422 vaddubm $tweak,$tweak,$tweak 2423 vsldoi $tmp,$tmp,$tmp,15 2424 vand $tmp,$tmp,$eighty7 2425 vxor $tweak,$tweak,$tmp 2426 2427 le?vperm $tweak,$tweak,$tweak,$leperm 2428 stvx_u $tweak,0,$ivp 2429 2430 Lxts_dec_ret: 2431 mtspr 256,r12 # restore vrsave 2432 li r3,0 2433 blr 2434 .long 0 2435 .byte 0,12,0x04,0,0x80,6,6,0 2436 .long 0 2437 .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt 2438 ___ 2439 ######################################################################### 2440 {{ # Optimized XTS procedures # 2441 my $key_=$key2; 2442 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); 2443 $x00=0 if ($flavour =~ /osx/); 2444 my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); 2445 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); 2446 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); 2447 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 2448 # v26-v31 last 6 round keys 2449 my ($keyperm)=($out0); # aliases with "caller", redundant assignment 2450 my $taillen=$x70; 2451 2452 $code.=<<___; 2453 .align 5 2454 _aesp8_xts_encrypt6x: 2455 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 2456 mflr r11 2457 li r7,`$FRAME+8*16+15` 2458 li r3,`$FRAME+8*16+31` 2459 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 2460 stvx v20,r7,$sp # ABI says so 2461 addi r7,r7,32 2462 stvx v21,r3,$sp 2463 addi r3,r3,32 2464 stvx v22,r7,$sp 2465 addi r7,r7,32 2466 stvx v23,r3,$sp 2467 addi r3,r3,32 2468 stvx v24,r7,$sp 2469 addi r7,r7,32 2470 stvx v25,r3,$sp 2471 addi r3,r3,32 2472 stvx v26,r7,$sp 2473 addi r7,r7,32 2474 stvx v27,r3,$sp 2475 addi r3,r3,32 2476 stvx v28,r7,$sp 2477 addi r7,r7,32 2478 stvx v29,r3,$sp 2479 addi r3,r3,32 2480 stvx v30,r7,$sp 2481 stvx v31,r3,$sp 2482 li r0,-1 2483 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 2484 li $x10,0x10 2485 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 2486 li $x20,0x20 2487 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 2488 li $x30,0x30 2489 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 2490 li $x40,0x40 2491 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 2492 li $x50,0x50 2493 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 2494 li $x60,0x60 2495 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 2496 li $x70,0x70 2497 mtspr 256,r0 2498 2499 xxlor 2, 32+$eighty7, 32+$eighty7 2500 vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87 2501 xxlor 1, 32+$eighty7, 32+$eighty7 2502 2503 # Load XOR Lconsts. 2504 mr $x70, r6 2505 bl Lconsts 2506 lxvw4x 0, $x40, r6 # load XOR contents 2507 mr r6, $x70 2508 li $x70,0x70 2509 2510 subi $rounds,$rounds,3 # -4 in total 2511 2512 lvx $rndkey0,$x00,$key1 # load key schedule 2513 lvx v30,$x10,$key1 2514 addi $key1,$key1,0x20 2515 lvx v31,$x00,$key1 2516 ?vperm $rndkey0,$rndkey0,v30,$keyperm 2517 addi $key_,$sp,$FRAME+15 2518 mtctr $rounds 2519 2520 Load_xts_enc_key: 2521 ?vperm v24,v30,v31,$keyperm 2522 lvx v30,$x10,$key1 2523 addi $key1,$key1,0x20 2524 stvx v24,$x00,$key_ # off-load round[1] 2525 ?vperm v25,v31,v30,$keyperm 2526 lvx v31,$x00,$key1 2527 stvx v25,$x10,$key_ # off-load round[2] 2528 addi $key_,$key_,0x20 2529 bdnz Load_xts_enc_key 2530 2531 lvx v26,$x10,$key1 2532 ?vperm v24,v30,v31,$keyperm 2533 lvx v27,$x20,$key1 2534 stvx v24,$x00,$key_ # off-load round[3] 2535 ?vperm v25,v31,v26,$keyperm 2536 lvx v28,$x30,$key1 2537 stvx v25,$x10,$key_ # off-load round[4] 2538 addi $key_,$sp,$FRAME+15 # rewind $key_ 2539 ?vperm v26,v26,v27,$keyperm 2540 lvx v29,$x40,$key1 2541 ?vperm v27,v27,v28,$keyperm 2542 lvx v30,$x50,$key1 2543 ?vperm v28,v28,v29,$keyperm 2544 lvx v31,$x60,$key1 2545 ?vperm v29,v29,v30,$keyperm 2546 lvx $twk5,$x70,$key1 # borrow $twk5 2547 ?vperm v30,v30,v31,$keyperm 2548 lvx v24,$x00,$key_ # pre-load round[1] 2549 ?vperm v31,v31,$twk5,$keyperm 2550 lvx v25,$x10,$key_ # pre-load round[2] 2551 2552 # Switch to use the following codes with 0x010101..87 to generate tweak. 2553 # eighty7 = 0x010101..87 2554 # vsrab tmp, tweak, seven # next tweak value, right shift 7 bits 2555 # vand tmp, tmp, eighty7 # last byte with carry 2556 # vaddubm tweak, tweak, tweak # left shift 1 bit (x2) 2557 # xxlor vsx, 0, 0 2558 # vpermxor tweak, tweak, tmp, vsx 2559 2560 vperm $in0,$inout,$inptail,$inpperm 2561 subi $inp,$inp,31 # undo "caller" 2562 vxor $twk0,$tweak,$rndkey0 2563 vsrab $tmp,$tweak,$seven # next tweak value 2564 vaddubm $tweak,$tweak,$tweak 2565 vand $tmp,$tmp,$eighty7 2566 vxor $out0,$in0,$twk0 2567 xxlor 32+$in1, 0, 0 2568 vpermxor $tweak, $tweak, $tmp, $in1 2569 2570 lvx_u $in1,$x10,$inp 2571 vxor $twk1,$tweak,$rndkey0 2572 vsrab $tmp,$tweak,$seven # next tweak value 2573 vaddubm $tweak,$tweak,$tweak 2574 le?vperm $in1,$in1,$in1,$leperm 2575 vand $tmp,$tmp,$eighty7 2576 vxor $out1,$in1,$twk1 2577 xxlor 32+$in2, 0, 0 2578 vpermxor $tweak, $tweak, $tmp, $in2 2579 2580 lvx_u $in2,$x20,$inp 2581 andi. $taillen,$len,15 2582 vxor $twk2,$tweak,$rndkey0 2583 vsrab $tmp,$tweak,$seven # next tweak value 2584 vaddubm $tweak,$tweak,$tweak 2585 le?vperm $in2,$in2,$in2,$leperm 2586 vand $tmp,$tmp,$eighty7 2587 vxor $out2,$in2,$twk2 2588 xxlor 32+$in3, 0, 0 2589 vpermxor $tweak, $tweak, $tmp, $in3 2590 2591 lvx_u $in3,$x30,$inp 2592 sub $len,$len,$taillen 2593 vxor $twk3,$tweak,$rndkey0 2594 vsrab $tmp,$tweak,$seven # next tweak value 2595 vaddubm $tweak,$tweak,$tweak 2596 le?vperm $in3,$in3,$in3,$leperm 2597 vand $tmp,$tmp,$eighty7 2598 vxor $out3,$in3,$twk3 2599 xxlor 32+$in4, 0, 0 2600 vpermxor $tweak, $tweak, $tmp, $in4 2601 2602 lvx_u $in4,$x40,$inp 2603 subi $len,$len,0x60 2604 vxor $twk4,$tweak,$rndkey0 2605 vsrab $tmp,$tweak,$seven # next tweak value 2606 vaddubm $tweak,$tweak,$tweak 2607 le?vperm $in4,$in4,$in4,$leperm 2608 vand $tmp,$tmp,$eighty7 2609 vxor $out4,$in4,$twk4 2610 xxlor 32+$in5, 0, 0 2611 vpermxor $tweak, $tweak, $tmp, $in5 2612 2613 lvx_u $in5,$x50,$inp 2614 addi $inp,$inp,0x60 2615 vxor $twk5,$tweak,$rndkey0 2616 vsrab $tmp,$tweak,$seven # next tweak value 2617 vaddubm $tweak,$tweak,$tweak 2618 le?vperm $in5,$in5,$in5,$leperm 2619 vand $tmp,$tmp,$eighty7 2620 vxor $out5,$in5,$twk5 2621 xxlor 32+$in0, 0, 0 2622 vpermxor $tweak, $tweak, $tmp, $in0 2623 2624 vxor v31,v31,$rndkey0 2625 mtctr $rounds 2626 b Loop_xts_enc6x 2627 2628 .align 5 2629 Loop_xts_enc6x: 2630 vcipher $out0,$out0,v24 2631 vcipher $out1,$out1,v24 2632 vcipher $out2,$out2,v24 2633 vcipher $out3,$out3,v24 2634 vcipher $out4,$out4,v24 2635 vcipher $out5,$out5,v24 2636 lvx v24,$x20,$key_ # round[3] 2637 addi $key_,$key_,0x20 2638 2639 vcipher $out0,$out0,v25 2640 vcipher $out1,$out1,v25 2641 vcipher $out2,$out2,v25 2642 vcipher $out3,$out3,v25 2643 vcipher $out4,$out4,v25 2644 vcipher $out5,$out5,v25 2645 lvx v25,$x10,$key_ # round[4] 2646 bdnz Loop_xts_enc6x 2647 2648 xxlor 32+$eighty7, 1, 1 # 0x010101..87 2649 2650 subic $len,$len,96 # $len-=96 2651 vxor $in0,$twk0,v31 # xor with last round key 2652 vcipher $out0,$out0,v24 2653 vcipher $out1,$out1,v24 2654 vsrab $tmp,$tweak,$seven # next tweak value 2655 vxor $twk0,$tweak,$rndkey0 2656 vaddubm $tweak,$tweak,$tweak 2657 vcipher $out2,$out2,v24 2658 vcipher $out3,$out3,v24 2659 vcipher $out4,$out4,v24 2660 vcipher $out5,$out5,v24 2661 2662 subfe. r0,r0,r0 # borrow?-1:0 2663 vand $tmp,$tmp,$eighty7 2664 vcipher $out0,$out0,v25 2665 vcipher $out1,$out1,v25 2666 xxlor 32+$in1, 0, 0 2667 vpermxor $tweak, $tweak, $tmp, $in1 2668 vcipher $out2,$out2,v25 2669 vcipher $out3,$out3,v25 2670 vxor $in1,$twk1,v31 2671 vsrab $tmp,$tweak,$seven # next tweak value 2672 vxor $twk1,$tweak,$rndkey0 2673 vcipher $out4,$out4,v25 2674 vcipher $out5,$out5,v25 2675 2676 and r0,r0,$len 2677 vaddubm $tweak,$tweak,$tweak 2678 vcipher $out0,$out0,v26 2679 vcipher $out1,$out1,v26 2680 vand $tmp,$tmp,$eighty7 2681 vcipher $out2,$out2,v26 2682 vcipher $out3,$out3,v26 2683 xxlor 32+$in2, 0, 0 2684 vpermxor $tweak, $tweak, $tmp, $in2 2685 vcipher $out4,$out4,v26 2686 vcipher $out5,$out5,v26 2687 2688 add $inp,$inp,r0 # $inp is adjusted in such 2689 # way that at exit from the 2690 # loop inX-in5 are loaded 2691 # with last "words" 2692 vxor $in2,$twk2,v31 2693 vsrab $tmp,$tweak,$seven # next tweak value 2694 vxor $twk2,$tweak,$rndkey0 2695 vaddubm $tweak,$tweak,$tweak 2696 vcipher $out0,$out0,v27 2697 vcipher $out1,$out1,v27 2698 vcipher $out2,$out2,v27 2699 vcipher $out3,$out3,v27 2700 vand $tmp,$tmp,$eighty7 2701 vcipher $out4,$out4,v27 2702 vcipher $out5,$out5,v27 2703 2704 addi $key_,$sp,$FRAME+15 # rewind $key_ 2705 xxlor 32+$in3, 0, 0 2706 vpermxor $tweak, $tweak, $tmp, $in3 2707 vcipher $out0,$out0,v28 2708 vcipher $out1,$out1,v28 2709 vxor $in3,$twk3,v31 2710 vsrab $tmp,$tweak,$seven # next tweak value 2711 vxor $twk3,$tweak,$rndkey0 2712 vcipher $out2,$out2,v28 2713 vcipher $out3,$out3,v28 2714 vaddubm $tweak,$tweak,$tweak 2715 vcipher $out4,$out4,v28 2716 vcipher $out5,$out5,v28 2717 lvx v24,$x00,$key_ # re-pre-load round[1] 2718 vand $tmp,$tmp,$eighty7 2719 2720 vcipher $out0,$out0,v29 2721 vcipher $out1,$out1,v29 2722 xxlor 32+$in4, 0, 0 2723 vpermxor $tweak, $tweak, $tmp, $in4 2724 vcipher $out2,$out2,v29 2725 vcipher $out3,$out3,v29 2726 vxor $in4,$twk4,v31 2727 vsrab $tmp,$tweak,$seven # next tweak value 2728 vxor $twk4,$tweak,$rndkey0 2729 vcipher $out4,$out4,v29 2730 vcipher $out5,$out5,v29 2731 lvx v25,$x10,$key_ # re-pre-load round[2] 2732 vaddubm $tweak,$tweak,$tweak 2733 2734 vcipher $out0,$out0,v30 2735 vcipher $out1,$out1,v30 2736 vand $tmp,$tmp,$eighty7 2737 vcipher $out2,$out2,v30 2738 vcipher $out3,$out3,v30 2739 xxlor 32+$in5, 0, 0 2740 vpermxor $tweak, $tweak, $tmp, $in5 2741 vcipher $out4,$out4,v30 2742 vcipher $out5,$out5,v30 2743 vxor $in5,$twk5,v31 2744 vsrab $tmp,$tweak,$seven # next tweak value 2745 vxor $twk5,$tweak,$rndkey0 2746 2747 vcipherlast $out0,$out0,$in0 2748 lvx_u $in0,$x00,$inp # load next input block 2749 vaddubm $tweak,$tweak,$tweak 2750 vcipherlast $out1,$out1,$in1 2751 lvx_u $in1,$x10,$inp 2752 vcipherlast $out2,$out2,$in2 2753 le?vperm $in0,$in0,$in0,$leperm 2754 lvx_u $in2,$x20,$inp 2755 vand $tmp,$tmp,$eighty7 2756 vcipherlast $out3,$out3,$in3 2757 le?vperm $in1,$in1,$in1,$leperm 2758 lvx_u $in3,$x30,$inp 2759 vcipherlast $out4,$out4,$in4 2760 le?vperm $in2,$in2,$in2,$leperm 2761 lvx_u $in4,$x40,$inp 2762 xxlor 10, 32+$in0, 32+$in0 2763 xxlor 32+$in0, 0, 0 2764 vpermxor $tweak, $tweak, $tmp, $in0 2765 xxlor 32+$in0, 10, 10 2766 vcipherlast $tmp,$out5,$in5 # last block might be needed 2767 # in stealing mode 2768 le?vperm $in3,$in3,$in3,$leperm 2769 lvx_u $in5,$x50,$inp 2770 addi $inp,$inp,0x60 2771 le?vperm $in4,$in4,$in4,$leperm 2772 le?vperm $in5,$in5,$in5,$leperm 2773 2774 le?vperm $out0,$out0,$out0,$leperm 2775 le?vperm $out1,$out1,$out1,$leperm 2776 stvx_u $out0,$x00,$out # store output 2777 vxor $out0,$in0,$twk0 2778 le?vperm $out2,$out2,$out2,$leperm 2779 stvx_u $out1,$x10,$out 2780 vxor $out1,$in1,$twk1 2781 le?vperm $out3,$out3,$out3,$leperm 2782 stvx_u $out2,$x20,$out 2783 vxor $out2,$in2,$twk2 2784 le?vperm $out4,$out4,$out4,$leperm 2785 stvx_u $out3,$x30,$out 2786 vxor $out3,$in3,$twk3 2787 le?vperm $out5,$tmp,$tmp,$leperm 2788 stvx_u $out4,$x40,$out 2789 vxor $out4,$in4,$twk4 2790 le?stvx_u $out5,$x50,$out 2791 be?stvx_u $tmp, $x50,$out 2792 vxor $out5,$in5,$twk5 2793 addi $out,$out,0x60 2794 2795 mtctr $rounds 2796 beq Loop_xts_enc6x # did $len-=96 borrow? 2797 2798 xxlor 32+$eighty7, 2, 2 # 0x010101..87 2799 2800 addic. $len,$len,0x60 2801 beq Lxts_enc6x_zero 2802 cmpwi $len,0x20 2803 blt Lxts_enc6x_one 2804 nop 2805 beq Lxts_enc6x_two 2806 cmpwi $len,0x40 2807 blt Lxts_enc6x_three 2808 nop 2809 beq Lxts_enc6x_four 2810 2811 Lxts_enc6x_five: 2812 vxor $out0,$in1,$twk0 2813 vxor $out1,$in2,$twk1 2814 vxor $out2,$in3,$twk2 2815 vxor $out3,$in4,$twk3 2816 vxor $out4,$in5,$twk4 2817 2818 bl _aesp8_xts_enc5x 2819 2820 le?vperm $out0,$out0,$out0,$leperm 2821 vmr $twk0,$twk5 # unused tweak 2822 le?vperm $out1,$out1,$out1,$leperm 2823 stvx_u $out0,$x00,$out # store output 2824 le?vperm $out2,$out2,$out2,$leperm 2825 stvx_u $out1,$x10,$out 2826 le?vperm $out3,$out3,$out3,$leperm 2827 stvx_u $out2,$x20,$out 2828 vxor $tmp,$out4,$twk5 # last block prep for stealing 2829 le?vperm $out4,$out4,$out4,$leperm 2830 stvx_u $out3,$x30,$out 2831 stvx_u $out4,$x40,$out 2832 addi $out,$out,0x50 2833 bne Lxts_enc6x_steal 2834 b Lxts_enc6x_done 2835 2836 .align 4 2837 Lxts_enc6x_four: 2838 vxor $out0,$in2,$twk0 2839 vxor $out1,$in3,$twk1 2840 vxor $out2,$in4,$twk2 2841 vxor $out3,$in5,$twk3 2842 vxor $out4,$out4,$out4 2843 2844 bl _aesp8_xts_enc5x 2845 2846 le?vperm $out0,$out0,$out0,$leperm 2847 vmr $twk0,$twk4 # unused tweak 2848 le?vperm $out1,$out1,$out1,$leperm 2849 stvx_u $out0,$x00,$out # store output 2850 le?vperm $out2,$out2,$out2,$leperm 2851 stvx_u $out1,$x10,$out 2852 vxor $tmp,$out3,$twk4 # last block prep for stealing 2853 le?vperm $out3,$out3,$out3,$leperm 2854 stvx_u $out2,$x20,$out 2855 stvx_u $out3,$x30,$out 2856 addi $out,$out,0x40 2857 bne Lxts_enc6x_steal 2858 b Lxts_enc6x_done 2859 2860 .align 4 2861 Lxts_enc6x_three: 2862 vxor $out0,$in3,$twk0 2863 vxor $out1,$in4,$twk1 2864 vxor $out2,$in5,$twk2 2865 vxor $out3,$out3,$out3 2866 vxor $out4,$out4,$out4 2867 2868 bl _aesp8_xts_enc5x 2869 2870 le?vperm $out0,$out0,$out0,$leperm 2871 vmr $twk0,$twk3 # unused tweak 2872 le?vperm $out1,$out1,$out1,$leperm 2873 stvx_u $out0,$x00,$out # store output 2874 vxor $tmp,$out2,$twk3 # last block prep for stealing 2875 le?vperm $out2,$out2,$out2,$leperm 2876 stvx_u $out1,$x10,$out 2877 stvx_u $out2,$x20,$out 2878 addi $out,$out,0x30 2879 bne Lxts_enc6x_steal 2880 b Lxts_enc6x_done 2881 2882 .align 4 2883 Lxts_enc6x_two: 2884 vxor $out0,$in4,$twk0 2885 vxor $out1,$in5,$twk1 2886 vxor $out2,$out2,$out2 2887 vxor $out3,$out3,$out3 2888 vxor $out4,$out4,$out4 2889 2890 bl _aesp8_xts_enc5x 2891 2892 le?vperm $out0,$out0,$out0,$leperm 2893 vmr $twk0,$twk2 # unused tweak 2894 vxor $tmp,$out1,$twk2 # last block prep for stealing 2895 le?vperm $out1,$out1,$out1,$leperm 2896 stvx_u $out0,$x00,$out # store output 2897 stvx_u $out1,$x10,$out 2898 addi $out,$out,0x20 2899 bne Lxts_enc6x_steal 2900 b Lxts_enc6x_done 2901 2902 .align 4 2903 Lxts_enc6x_one: 2904 vxor $out0,$in5,$twk0 2905 nop 2906 Loop_xts_enc1x: 2907 vcipher $out0,$out0,v24 2908 lvx v24,$x20,$key_ # round[3] 2909 addi $key_,$key_,0x20 2910 2911 vcipher $out0,$out0,v25 2912 lvx v25,$x10,$key_ # round[4] 2913 bdnz Loop_xts_enc1x 2914 2915 add $inp,$inp,$taillen 2916 cmpwi $taillen,0 2917 vcipher $out0,$out0,v24 2918 2919 subi $inp,$inp,16 2920 vcipher $out0,$out0,v25 2921 2922 lvsr $inpperm,0,$taillen 2923 vcipher $out0,$out0,v26 2924 2925 lvx_u $in0,0,$inp 2926 vcipher $out0,$out0,v27 2927 2928 addi $key_,$sp,$FRAME+15 # rewind $key_ 2929 vcipher $out0,$out0,v28 2930 lvx v24,$x00,$key_ # re-pre-load round[1] 2931 2932 vcipher $out0,$out0,v29 2933 lvx v25,$x10,$key_ # re-pre-load round[2] 2934 vxor $twk0,$twk0,v31 2935 2936 le?vperm $in0,$in0,$in0,$leperm 2937 vcipher $out0,$out0,v30 2938 2939 vperm $in0,$in0,$in0,$inpperm 2940 vcipherlast $out0,$out0,$twk0 2941 2942 vmr $twk0,$twk1 # unused tweak 2943 vxor $tmp,$out0,$twk1 # last block prep for stealing 2944 le?vperm $out0,$out0,$out0,$leperm 2945 stvx_u $out0,$x00,$out # store output 2946 addi $out,$out,0x10 2947 bne Lxts_enc6x_steal 2948 b Lxts_enc6x_done 2949 2950 .align 4 2951 Lxts_enc6x_zero: 2952 cmpwi $taillen,0 2953 beq Lxts_enc6x_done 2954 2955 add $inp,$inp,$taillen 2956 subi $inp,$inp,16 2957 lvx_u $in0,0,$inp 2958 lvsr $inpperm,0,$taillen # $in5 is no more 2959 le?vperm $in0,$in0,$in0,$leperm 2960 vperm $in0,$in0,$in0,$inpperm 2961 vxor $tmp,$tmp,$twk0 2962 Lxts_enc6x_steal: 2963 vxor $in0,$in0,$twk0 2964 vxor $out0,$out0,$out0 2965 vspltisb $out1,-1 2966 vperm $out0,$out0,$out1,$inpperm 2967 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? 2968 2969 subi r30,$out,17 2970 subi $out,$out,16 2971 mtctr $taillen 2972 Loop_xts_enc6x_steal: 2973 lbzu r0,1(r30) 2974 stb r0,16(r30) 2975 bdnz Loop_xts_enc6x_steal 2976 2977 li $taillen,0 2978 mtctr $rounds 2979 b Loop_xts_enc1x # one more time... 2980 2981 .align 4 2982 Lxts_enc6x_done: 2983 ${UCMP}i $ivp,0 2984 beq Lxts_enc6x_ret 2985 2986 vxor $tweak,$twk0,$rndkey0 2987 le?vperm $tweak,$tweak,$tweak,$leperm 2988 stvx_u $tweak,0,$ivp 2989 2990 Lxts_enc6x_ret: 2991 mtlr r11 2992 li r10,`$FRAME+15` 2993 li r11,`$FRAME+31` 2994 stvx $seven,r10,$sp # wipe copies of round keys 2995 addi r10,r10,32 2996 stvx $seven,r11,$sp 2997 addi r11,r11,32 2998 stvx $seven,r10,$sp 2999 addi r10,r10,32 3000 stvx $seven,r11,$sp 3001 addi r11,r11,32 3002 stvx $seven,r10,$sp 3003 addi r10,r10,32 3004 stvx $seven,r11,$sp 3005 addi r11,r11,32 3006 stvx $seven,r10,$sp 3007 addi r10,r10,32 3008 stvx $seven,r11,$sp 3009 addi r11,r11,32 3010 3011 mtspr 256,$vrsave 3012 lvx v20,r10,$sp # ABI says so 3013 addi r10,r10,32 3014 lvx v21,r11,$sp 3015 addi r11,r11,32 3016 lvx v22,r10,$sp 3017 addi r10,r10,32 3018 lvx v23,r11,$sp 3019 addi r11,r11,32 3020 lvx v24,r10,$sp 3021 addi r10,r10,32 3022 lvx v25,r11,$sp 3023 addi r11,r11,32 3024 lvx v26,r10,$sp 3025 addi r10,r10,32 3026 lvx v27,r11,$sp 3027 addi r11,r11,32 3028 lvx v28,r10,$sp 3029 addi r10,r10,32 3030 lvx v29,r11,$sp 3031 addi r11,r11,32 3032 lvx v30,r10,$sp 3033 lvx v31,r11,$sp 3034 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3035 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3036 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3037 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3038 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3039 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3040 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3041 blr 3042 .long 0 3043 .byte 0,12,0x04,1,0x80,6,6,0 3044 .long 0 3045 3046 .align 5 3047 _aesp8_xts_enc5x: 3048 vcipher $out0,$out0,v24 3049 vcipher $out1,$out1,v24 3050 vcipher $out2,$out2,v24 3051 vcipher $out3,$out3,v24 3052 vcipher $out4,$out4,v24 3053 lvx v24,$x20,$key_ # round[3] 3054 addi $key_,$key_,0x20 3055 3056 vcipher $out0,$out0,v25 3057 vcipher $out1,$out1,v25 3058 vcipher $out2,$out2,v25 3059 vcipher $out3,$out3,v25 3060 vcipher $out4,$out4,v25 3061 lvx v25,$x10,$key_ # round[4] 3062 bdnz _aesp8_xts_enc5x 3063 3064 add $inp,$inp,$taillen 3065 cmpwi $taillen,0 3066 vcipher $out0,$out0,v24 3067 vcipher $out1,$out1,v24 3068 vcipher $out2,$out2,v24 3069 vcipher $out3,$out3,v24 3070 vcipher $out4,$out4,v24 3071 3072 subi $inp,$inp,16 3073 vcipher $out0,$out0,v25 3074 vcipher $out1,$out1,v25 3075 vcipher $out2,$out2,v25 3076 vcipher $out3,$out3,v25 3077 vcipher $out4,$out4,v25 3078 vxor $twk0,$twk0,v31 3079 3080 vcipher $out0,$out0,v26 3081 lvsr $inpperm,r0,$taillen # $in5 is no more 3082 vcipher $out1,$out1,v26 3083 vcipher $out2,$out2,v26 3084 vcipher $out3,$out3,v26 3085 vcipher $out4,$out4,v26 3086 vxor $in1,$twk1,v31 3087 3088 vcipher $out0,$out0,v27 3089 lvx_u $in0,0,$inp 3090 vcipher $out1,$out1,v27 3091 vcipher $out2,$out2,v27 3092 vcipher $out3,$out3,v27 3093 vcipher $out4,$out4,v27 3094 vxor $in2,$twk2,v31 3095 3096 addi $key_,$sp,$FRAME+15 # rewind $key_ 3097 vcipher $out0,$out0,v28 3098 vcipher $out1,$out1,v28 3099 vcipher $out2,$out2,v28 3100 vcipher $out3,$out3,v28 3101 vcipher $out4,$out4,v28 3102 lvx v24,$x00,$key_ # re-pre-load round[1] 3103 vxor $in3,$twk3,v31 3104 3105 vcipher $out0,$out0,v29 3106 le?vperm $in0,$in0,$in0,$leperm 3107 vcipher $out1,$out1,v29 3108 vcipher $out2,$out2,v29 3109 vcipher $out3,$out3,v29 3110 vcipher $out4,$out4,v29 3111 lvx v25,$x10,$key_ # re-pre-load round[2] 3112 vxor $in4,$twk4,v31 3113 3114 vcipher $out0,$out0,v30 3115 vperm $in0,$in0,$in0,$inpperm 3116 vcipher $out1,$out1,v30 3117 vcipher $out2,$out2,v30 3118 vcipher $out3,$out3,v30 3119 vcipher $out4,$out4,v30 3120 3121 vcipherlast $out0,$out0,$twk0 3122 vcipherlast $out1,$out1,$in1 3123 vcipherlast $out2,$out2,$in2 3124 vcipherlast $out3,$out3,$in3 3125 vcipherlast $out4,$out4,$in4 3126 blr 3127 .long 0 3128 .byte 0,12,0x14,0,0,0,0,0 3129 3130 .align 5 3131 _aesp8_xts_decrypt6x: 3132 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 3133 mflr r11 3134 li r7,`$FRAME+8*16+15` 3135 li r3,`$FRAME+8*16+31` 3136 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 3137 stvx v20,r7,$sp # ABI says so 3138 addi r7,r7,32 3139 stvx v21,r3,$sp 3140 addi r3,r3,32 3141 stvx v22,r7,$sp 3142 addi r7,r7,32 3143 stvx v23,r3,$sp 3144 addi r3,r3,32 3145 stvx v24,r7,$sp 3146 addi r7,r7,32 3147 stvx v25,r3,$sp 3148 addi r3,r3,32 3149 stvx v26,r7,$sp 3150 addi r7,r7,32 3151 stvx v27,r3,$sp 3152 addi r3,r3,32 3153 stvx v28,r7,$sp 3154 addi r7,r7,32 3155 stvx v29,r3,$sp 3156 addi r3,r3,32 3157 stvx v30,r7,$sp 3158 stvx v31,r3,$sp 3159 li r0,-1 3160 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 3161 li $x10,0x10 3162 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3163 li $x20,0x20 3164 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3165 li $x30,0x30 3166 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3167 li $x40,0x40 3168 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3169 li $x50,0x50 3170 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3171 li $x60,0x60 3172 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3173 li $x70,0x70 3174 mtspr 256,r0 3175 3176 xxlor 2, 32+$eighty7, 32+$eighty7 3177 vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87 3178 xxlor 1, 32+$eighty7, 32+$eighty7 3179 3180 # Load XOR Lconsts. 3181 mr $x70, r6 3182 bl Lconsts 3183 lxvw4x 0, $x40, r6 # load XOR contents 3184 mr r6, $x70 3185 li $x70,0x70 3186 3187 subi $rounds,$rounds,3 # -4 in total 3188 3189 lvx $rndkey0,$x00,$key1 # load key schedule 3190 lvx v30,$x10,$key1 3191 addi $key1,$key1,0x20 3192 lvx v31,$x00,$key1 3193 ?vperm $rndkey0,$rndkey0,v30,$keyperm 3194 addi $key_,$sp,$FRAME+15 3195 mtctr $rounds 3196 3197 Load_xts_dec_key: 3198 ?vperm v24,v30,v31,$keyperm 3199 lvx v30,$x10,$key1 3200 addi $key1,$key1,0x20 3201 stvx v24,$x00,$key_ # off-load round[1] 3202 ?vperm v25,v31,v30,$keyperm 3203 lvx v31,$x00,$key1 3204 stvx v25,$x10,$key_ # off-load round[2] 3205 addi $key_,$key_,0x20 3206 bdnz Load_xts_dec_key 3207 3208 lvx v26,$x10,$key1 3209 ?vperm v24,v30,v31,$keyperm 3210 lvx v27,$x20,$key1 3211 stvx v24,$x00,$key_ # off-load round[3] 3212 ?vperm v25,v31,v26,$keyperm 3213 lvx v28,$x30,$key1 3214 stvx v25,$x10,$key_ # off-load round[4] 3215 addi $key_,$sp,$FRAME+15 # rewind $key_ 3216 ?vperm v26,v26,v27,$keyperm 3217 lvx v29,$x40,$key1 3218 ?vperm v27,v27,v28,$keyperm 3219 lvx v30,$x50,$key1 3220 ?vperm v28,v28,v29,$keyperm 3221 lvx v31,$x60,$key1 3222 ?vperm v29,v29,v30,$keyperm 3223 lvx $twk5,$x70,$key1 # borrow $twk5 3224 ?vperm v30,v30,v31,$keyperm 3225 lvx v24,$x00,$key_ # pre-load round[1] 3226 ?vperm v31,v31,$twk5,$keyperm 3227 lvx v25,$x10,$key_ # pre-load round[2] 3228 3229 vperm $in0,$inout,$inptail,$inpperm 3230 subi $inp,$inp,31 # undo "caller" 3231 vxor $twk0,$tweak,$rndkey0 3232 vsrab $tmp,$tweak,$seven # next tweak value 3233 vaddubm $tweak,$tweak,$tweak 3234 vand $tmp,$tmp,$eighty7 3235 vxor $out0,$in0,$twk0 3236 xxlor 32+$in1, 0, 0 3237 vpermxor $tweak, $tweak, $tmp, $in1 3238 3239 lvx_u $in1,$x10,$inp 3240 vxor $twk1,$tweak,$rndkey0 3241 vsrab $tmp,$tweak,$seven # next tweak value 3242 vaddubm $tweak,$tweak,$tweak 3243 le?vperm $in1,$in1,$in1,$leperm 3244 vand $tmp,$tmp,$eighty7 3245 vxor $out1,$in1,$twk1 3246 xxlor 32+$in2, 0, 0 3247 vpermxor $tweak, $tweak, $tmp, $in2 3248 3249 lvx_u $in2,$x20,$inp 3250 andi. $taillen,$len,15 3251 vxor $twk2,$tweak,$rndkey0 3252 vsrab $tmp,$tweak,$seven # next tweak value 3253 vaddubm $tweak,$tweak,$tweak 3254 le?vperm $in2,$in2,$in2,$leperm 3255 vand $tmp,$tmp,$eighty7 3256 vxor $out2,$in2,$twk2 3257 xxlor 32+$in3, 0, 0 3258 vpermxor $tweak, $tweak, $tmp, $in3 3259 3260 lvx_u $in3,$x30,$inp 3261 sub $len,$len,$taillen 3262 vxor $twk3,$tweak,$rndkey0 3263 vsrab $tmp,$tweak,$seven # next tweak value 3264 vaddubm $tweak,$tweak,$tweak 3265 le?vperm $in3,$in3,$in3,$leperm 3266 vand $tmp,$tmp,$eighty7 3267 vxor $out3,$in3,$twk3 3268 xxlor 32+$in4, 0, 0 3269 vpermxor $tweak, $tweak, $tmp, $in4 3270 3271 lvx_u $in4,$x40,$inp 3272 subi $len,$len,0x60 3273 vxor $twk4,$tweak,$rndkey0 3274 vsrab $tmp,$tweak,$seven # next tweak value 3275 vaddubm $tweak,$tweak,$tweak 3276 le?vperm $in4,$in4,$in4,$leperm 3277 vand $tmp,$tmp,$eighty7 3278 vxor $out4,$in4,$twk4 3279 xxlor 32+$in5, 0, 0 3280 vpermxor $tweak, $tweak, $tmp, $in5 3281 3282 lvx_u $in5,$x50,$inp 3283 addi $inp,$inp,0x60 3284 vxor $twk5,$tweak,$rndkey0 3285 vsrab $tmp,$tweak,$seven # next tweak value 3286 vaddubm $tweak,$tweak,$tweak 3287 le?vperm $in5,$in5,$in5,$leperm 3288 vand $tmp,$tmp,$eighty7 3289 vxor $out5,$in5,$twk5 3290 xxlor 32+$in0, 0, 0 3291 vpermxor $tweak, $tweak, $tmp, $in0 3292 3293 vxor v31,v31,$rndkey0 3294 mtctr $rounds 3295 b Loop_xts_dec6x 3296 3297 .align 5 3298 Loop_xts_dec6x: 3299 vncipher $out0,$out0,v24 3300 vncipher $out1,$out1,v24 3301 vncipher $out2,$out2,v24 3302 vncipher $out3,$out3,v24 3303 vncipher $out4,$out4,v24 3304 vncipher $out5,$out5,v24 3305 lvx v24,$x20,$key_ # round[3] 3306 addi $key_,$key_,0x20 3307 3308 vncipher $out0,$out0,v25 3309 vncipher $out1,$out1,v25 3310 vncipher $out2,$out2,v25 3311 vncipher $out3,$out3,v25 3312 vncipher $out4,$out4,v25 3313 vncipher $out5,$out5,v25 3314 lvx v25,$x10,$key_ # round[4] 3315 bdnz Loop_xts_dec6x 3316 3317 xxlor 32+$eighty7, 1, 1 # 0x010101..87 3318 3319 subic $len,$len,96 # $len-=96 3320 vxor $in0,$twk0,v31 # xor with last round key 3321 vncipher $out0,$out0,v24 3322 vncipher $out1,$out1,v24 3323 vsrab $tmp,$tweak,$seven # next tweak value 3324 vxor $twk0,$tweak,$rndkey0 3325 vaddubm $tweak,$tweak,$tweak 3326 vncipher $out2,$out2,v24 3327 vncipher $out3,$out3,v24 3328 vncipher $out4,$out4,v24 3329 vncipher $out5,$out5,v24 3330 3331 subfe. r0,r0,r0 # borrow?-1:0 3332 vand $tmp,$tmp,$eighty7 3333 vncipher $out0,$out0,v25 3334 vncipher $out1,$out1,v25 3335 xxlor 32+$in1, 0, 0 3336 vpermxor $tweak, $tweak, $tmp, $in1 3337 vncipher $out2,$out2,v25 3338 vncipher $out3,$out3,v25 3339 vxor $in1,$twk1,v31 3340 vsrab $tmp,$tweak,$seven # next tweak value 3341 vxor $twk1,$tweak,$rndkey0 3342 vncipher $out4,$out4,v25 3343 vncipher $out5,$out5,v25 3344 3345 and r0,r0,$len 3346 vaddubm $tweak,$tweak,$tweak 3347 vncipher $out0,$out0,v26 3348 vncipher $out1,$out1,v26 3349 vand $tmp,$tmp,$eighty7 3350 vncipher $out2,$out2,v26 3351 vncipher $out3,$out3,v26 3352 xxlor 32+$in2, 0, 0 3353 vpermxor $tweak, $tweak, $tmp, $in2 3354 vncipher $out4,$out4,v26 3355 vncipher $out5,$out5,v26 3356 3357 add $inp,$inp,r0 # $inp is adjusted in such 3358 # way that at exit from the 3359 # loop inX-in5 are loaded 3360 # with last "words" 3361 vxor $in2,$twk2,v31 3362 vsrab $tmp,$tweak,$seven # next tweak value 3363 vxor $twk2,$tweak,$rndkey0 3364 vaddubm $tweak,$tweak,$tweak 3365 vncipher $out0,$out0,v27 3366 vncipher $out1,$out1,v27 3367 vncipher $out2,$out2,v27 3368 vncipher $out3,$out3,v27 3369 vand $tmp,$tmp,$eighty7 3370 vncipher $out4,$out4,v27 3371 vncipher $out5,$out5,v27 3372 3373 addi $key_,$sp,$FRAME+15 # rewind $key_ 3374 xxlor 32+$in3, 0, 0 3375 vpermxor $tweak, $tweak, $tmp, $in3 3376 vncipher $out0,$out0,v28 3377 vncipher $out1,$out1,v28 3378 vxor $in3,$twk3,v31 3379 vsrab $tmp,$tweak,$seven # next tweak value 3380 vxor $twk3,$tweak,$rndkey0 3381 vncipher $out2,$out2,v28 3382 vncipher $out3,$out3,v28 3383 vaddubm $tweak,$tweak,$tweak 3384 vncipher $out4,$out4,v28 3385 vncipher $out5,$out5,v28 3386 lvx v24,$x00,$key_ # re-pre-load round[1] 3387 vand $tmp,$tmp,$eighty7 3388 3389 vncipher $out0,$out0,v29 3390 vncipher $out1,$out1,v29 3391 xxlor 32+$in4, 0, 0 3392 vpermxor $tweak, $tweak, $tmp, $in4 3393 vncipher $out2,$out2,v29 3394 vncipher $out3,$out3,v29 3395 vxor $in4,$twk4,v31 3396 vsrab $tmp,$tweak,$seven # next tweak value 3397 vxor $twk4,$tweak,$rndkey0 3398 vncipher $out4,$out4,v29 3399 vncipher $out5,$out5,v29 3400 lvx v25,$x10,$key_ # re-pre-load round[2] 3401 vaddubm $tweak,$tweak,$tweak 3402 3403 vncipher $out0,$out0,v30 3404 vncipher $out1,$out1,v30 3405 vand $tmp,$tmp,$eighty7 3406 vncipher $out2,$out2,v30 3407 vncipher $out3,$out3,v30 3408 xxlor 32+$in5, 0, 0 3409 vpermxor $tweak, $tweak, $tmp, $in5 3410 vncipher $out4,$out4,v30 3411 vncipher $out5,$out5,v30 3412 vxor $in5,$twk5,v31 3413 vsrab $tmp,$tweak,$seven # next tweak value 3414 vxor $twk5,$tweak,$rndkey0 3415 3416 vncipherlast $out0,$out0,$in0 3417 lvx_u $in0,$x00,$inp # load next input block 3418 vaddubm $tweak,$tweak,$tweak 3419 vncipherlast $out1,$out1,$in1 3420 lvx_u $in1,$x10,$inp 3421 vncipherlast $out2,$out2,$in2 3422 le?vperm $in0,$in0,$in0,$leperm 3423 lvx_u $in2,$x20,$inp 3424 vand $tmp,$tmp,$eighty7 3425 vncipherlast $out3,$out3,$in3 3426 le?vperm $in1,$in1,$in1,$leperm 3427 lvx_u $in3,$x30,$inp 3428 vncipherlast $out4,$out4,$in4 3429 le?vperm $in2,$in2,$in2,$leperm 3430 lvx_u $in4,$x40,$inp 3431 xxlor 10, 32+$in0, 32+$in0 3432 xxlor 32+$in0, 0, 0 3433 vpermxor $tweak, $tweak, $tmp, $in0 3434 xxlor 32+$in0, 10, 10 3435 vncipherlast $out5,$out5,$in5 3436 le?vperm $in3,$in3,$in3,$leperm 3437 lvx_u $in5,$x50,$inp 3438 addi $inp,$inp,0x60 3439 le?vperm $in4,$in4,$in4,$leperm 3440 le?vperm $in5,$in5,$in5,$leperm 3441 3442 le?vperm $out0,$out0,$out0,$leperm 3443 le?vperm $out1,$out1,$out1,$leperm 3444 stvx_u $out0,$x00,$out # store output 3445 vxor $out0,$in0,$twk0 3446 le?vperm $out2,$out2,$out2,$leperm 3447 stvx_u $out1,$x10,$out 3448 vxor $out1,$in1,$twk1 3449 le?vperm $out3,$out3,$out3,$leperm 3450 stvx_u $out2,$x20,$out 3451 vxor $out2,$in2,$twk2 3452 le?vperm $out4,$out4,$out4,$leperm 3453 stvx_u $out3,$x30,$out 3454 vxor $out3,$in3,$twk3 3455 le?vperm $out5,$out5,$out5,$leperm 3456 stvx_u $out4,$x40,$out 3457 vxor $out4,$in4,$twk4 3458 stvx_u $out5,$x50,$out 3459 vxor $out5,$in5,$twk5 3460 addi $out,$out,0x60 3461 3462 mtctr $rounds 3463 beq Loop_xts_dec6x # did $len-=96 borrow? 3464 3465 xxlor 32+$eighty7, 2, 2 # 0x010101..87 3466 3467 addic. $len,$len,0x60 3468 beq Lxts_dec6x_zero 3469 cmpwi $len,0x20 3470 blt Lxts_dec6x_one 3471 nop 3472 beq Lxts_dec6x_two 3473 cmpwi $len,0x40 3474 blt Lxts_dec6x_three 3475 nop 3476 beq Lxts_dec6x_four 3477 3478 Lxts_dec6x_five: 3479 vxor $out0,$in1,$twk0 3480 vxor $out1,$in2,$twk1 3481 vxor $out2,$in3,$twk2 3482 vxor $out3,$in4,$twk3 3483 vxor $out4,$in5,$twk4 3484 3485 bl _aesp8_xts_dec5x 3486 3487 le?vperm $out0,$out0,$out0,$leperm 3488 vmr $twk0,$twk5 # unused tweak 3489 vxor $twk1,$tweak,$rndkey0 3490 le?vperm $out1,$out1,$out1,$leperm 3491 stvx_u $out0,$x00,$out # store output 3492 vxor $out0,$in0,$twk1 3493 le?vperm $out2,$out2,$out2,$leperm 3494 stvx_u $out1,$x10,$out 3495 le?vperm $out3,$out3,$out3,$leperm 3496 stvx_u $out2,$x20,$out 3497 le?vperm $out4,$out4,$out4,$leperm 3498 stvx_u $out3,$x30,$out 3499 stvx_u $out4,$x40,$out 3500 addi $out,$out,0x50 3501 bne Lxts_dec6x_steal 3502 b Lxts_dec6x_done 3503 3504 .align 4 3505 Lxts_dec6x_four: 3506 vxor $out0,$in2,$twk0 3507 vxor $out1,$in3,$twk1 3508 vxor $out2,$in4,$twk2 3509 vxor $out3,$in5,$twk3 3510 vxor $out4,$out4,$out4 3511 3512 bl _aesp8_xts_dec5x 3513 3514 le?vperm $out0,$out0,$out0,$leperm 3515 vmr $twk0,$twk4 # unused tweak 3516 vmr $twk1,$twk5 3517 le?vperm $out1,$out1,$out1,$leperm 3518 stvx_u $out0,$x00,$out # store output 3519 vxor $out0,$in0,$twk5 3520 le?vperm $out2,$out2,$out2,$leperm 3521 stvx_u $out1,$x10,$out 3522 le?vperm $out3,$out3,$out3,$leperm 3523 stvx_u $out2,$x20,$out 3524 stvx_u $out3,$x30,$out 3525 addi $out,$out,0x40 3526 bne Lxts_dec6x_steal 3527 b Lxts_dec6x_done 3528 3529 .align 4 3530 Lxts_dec6x_three: 3531 vxor $out0,$in3,$twk0 3532 vxor $out1,$in4,$twk1 3533 vxor $out2,$in5,$twk2 3534 vxor $out3,$out3,$out3 3535 vxor $out4,$out4,$out4 3536 3537 bl _aesp8_xts_dec5x 3538 3539 le?vperm $out0,$out0,$out0,$leperm 3540 vmr $twk0,$twk3 # unused tweak 3541 vmr $twk1,$twk4 3542 le?vperm $out1,$out1,$out1,$leperm 3543 stvx_u $out0,$x00,$out # store output 3544 vxor $out0,$in0,$twk4 3545 le?vperm $out2,$out2,$out2,$leperm 3546 stvx_u $out1,$x10,$out 3547 stvx_u $out2,$x20,$out 3548 addi $out,$out,0x30 3549 bne Lxts_dec6x_steal 3550 b Lxts_dec6x_done 3551 3552 .align 4 3553 Lxts_dec6x_two: 3554 vxor $out0,$in4,$twk0 3555 vxor $out1,$in5,$twk1 3556 vxor $out2,$out2,$out2 3557 vxor $out3,$out3,$out3 3558 vxor $out4,$out4,$out4 3559 3560 bl _aesp8_xts_dec5x 3561 3562 le?vperm $out0,$out0,$out0,$leperm 3563 vmr $twk0,$twk2 # unused tweak 3564 vmr $twk1,$twk3 3565 le?vperm $out1,$out1,$out1,$leperm 3566 stvx_u $out0,$x00,$out # store output 3567 vxor $out0,$in0,$twk3 3568 stvx_u $out1,$x10,$out 3569 addi $out,$out,0x20 3570 bne Lxts_dec6x_steal 3571 b Lxts_dec6x_done 3572 3573 .align 4 3574 Lxts_dec6x_one: 3575 vxor $out0,$in5,$twk0 3576 nop 3577 Loop_xts_dec1x: 3578 vncipher $out0,$out0,v24 3579 lvx v24,$x20,$key_ # round[3] 3580 addi $key_,$key_,0x20 3581 3582 vncipher $out0,$out0,v25 3583 lvx v25,$x10,$key_ # round[4] 3584 bdnz Loop_xts_dec1x 3585 3586 subi r0,$taillen,1 3587 vncipher $out0,$out0,v24 3588 3589 andi. r0,r0,16 3590 cmpwi $taillen,0 3591 vncipher $out0,$out0,v25 3592 3593 sub $inp,$inp,r0 3594 vncipher $out0,$out0,v26 3595 3596 lvx_u $in0,0,$inp 3597 vncipher $out0,$out0,v27 3598 3599 addi $key_,$sp,$FRAME+15 # rewind $key_ 3600 vncipher $out0,$out0,v28 3601 lvx v24,$x00,$key_ # re-pre-load round[1] 3602 3603 vncipher $out0,$out0,v29 3604 lvx v25,$x10,$key_ # re-pre-load round[2] 3605 vxor $twk0,$twk0,v31 3606 3607 le?vperm $in0,$in0,$in0,$leperm 3608 vncipher $out0,$out0,v30 3609 3610 mtctr $rounds 3611 vncipherlast $out0,$out0,$twk0 3612 3613 vmr $twk0,$twk1 # unused tweak 3614 vmr $twk1,$twk2 3615 le?vperm $out0,$out0,$out0,$leperm 3616 stvx_u $out0,$x00,$out # store output 3617 addi $out,$out,0x10 3618 vxor $out0,$in0,$twk2 3619 bne Lxts_dec6x_steal 3620 b Lxts_dec6x_done 3621 3622 .align 4 3623 Lxts_dec6x_zero: 3624 cmpwi $taillen,0 3625 beq Lxts_dec6x_done 3626 3627 lvx_u $in0,0,$inp 3628 le?vperm $in0,$in0,$in0,$leperm 3629 vxor $out0,$in0,$twk1 3630 Lxts_dec6x_steal: 3631 vncipher $out0,$out0,v24 3632 lvx v24,$x20,$key_ # round[3] 3633 addi $key_,$key_,0x20 3634 3635 vncipher $out0,$out0,v25 3636 lvx v25,$x10,$key_ # round[4] 3637 bdnz Lxts_dec6x_steal 3638 3639 add $inp,$inp,$taillen 3640 vncipher $out0,$out0,v24 3641 3642 cmpwi $taillen,0 3643 vncipher $out0,$out0,v25 3644 3645 lvx_u $in0,0,$inp 3646 vncipher $out0,$out0,v26 3647 3648 lvsr $inpperm,0,$taillen # $in5 is no more 3649 vncipher $out0,$out0,v27 3650 3651 addi $key_,$sp,$FRAME+15 # rewind $key_ 3652 vncipher $out0,$out0,v28 3653 lvx v24,$x00,$key_ # re-pre-load round[1] 3654 3655 vncipher $out0,$out0,v29 3656 lvx v25,$x10,$key_ # re-pre-load round[2] 3657 vxor $twk1,$twk1,v31 3658 3659 le?vperm $in0,$in0,$in0,$leperm 3660 vncipher $out0,$out0,v30 3661 3662 vperm $in0,$in0,$in0,$inpperm 3663 vncipherlast $tmp,$out0,$twk1 3664 3665 le?vperm $out0,$tmp,$tmp,$leperm 3666 le?stvx_u $out0,0,$out 3667 be?stvx_u $tmp,0,$out 3668 3669 vxor $out0,$out0,$out0 3670 vspltisb $out1,-1 3671 vperm $out0,$out0,$out1,$inpperm 3672 vsel $out0,$in0,$tmp,$out0 3673 vxor $out0,$out0,$twk0 3674 3675 subi r30,$out,1 3676 mtctr $taillen 3677 Loop_xts_dec6x_steal: 3678 lbzu r0,1(r30) 3679 stb r0,16(r30) 3680 bdnz Loop_xts_dec6x_steal 3681 3682 li $taillen,0 3683 mtctr $rounds 3684 b Loop_xts_dec1x # one more time... 3685 3686 .align 4 3687 Lxts_dec6x_done: 3688 ${UCMP}i $ivp,0 3689 beq Lxts_dec6x_ret 3690 3691 vxor $tweak,$twk0,$rndkey0 3692 le?vperm $tweak,$tweak,$tweak,$leperm 3693 stvx_u $tweak,0,$ivp 3694 3695 Lxts_dec6x_ret: 3696 mtlr r11 3697 li r10,`$FRAME+15` 3698 li r11,`$FRAME+31` 3699 stvx $seven,r10,$sp # wipe copies of round keys 3700 addi r10,r10,32 3701 stvx $seven,r11,$sp 3702 addi r11,r11,32 3703 stvx $seven,r10,$sp 3704 addi r10,r10,32 3705 stvx $seven,r11,$sp 3706 addi r11,r11,32 3707 stvx $seven,r10,$sp 3708 addi r10,r10,32 3709 stvx $seven,r11,$sp 3710 addi r11,r11,32 3711 stvx $seven,r10,$sp 3712 addi r10,r10,32 3713 stvx $seven,r11,$sp 3714 addi r11,r11,32 3715 3716 mtspr 256,$vrsave 3717 lvx v20,r10,$sp # ABI says so 3718 addi r10,r10,32 3719 lvx v21,r11,$sp 3720 addi r11,r11,32 3721 lvx v22,r10,$sp 3722 addi r10,r10,32 3723 lvx v23,r11,$sp 3724 addi r11,r11,32 3725 lvx v24,r10,$sp 3726 addi r10,r10,32 3727 lvx v25,r11,$sp 3728 addi r11,r11,32 3729 lvx v26,r10,$sp 3730 addi r10,r10,32 3731 lvx v27,r11,$sp 3732 addi r11,r11,32 3733 lvx v28,r10,$sp 3734 addi r10,r10,32 3735 lvx v29,r11,$sp 3736 addi r11,r11,32 3737 lvx v30,r10,$sp 3738 lvx v31,r11,$sp 3739 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3740 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3741 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3742 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3743 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3744 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3745 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3746 blr 3747 .long 0 3748 .byte 0,12,0x04,1,0x80,6,6,0 3749 .long 0 3750 3751 .align 5 3752 _aesp8_xts_dec5x: 3753 vncipher $out0,$out0,v24 3754 vncipher $out1,$out1,v24 3755 vncipher $out2,$out2,v24 3756 vncipher $out3,$out3,v24 3757 vncipher $out4,$out4,v24 3758 lvx v24,$x20,$key_ # round[3] 3759 addi $key_,$key_,0x20 3760 3761 vncipher $out0,$out0,v25 3762 vncipher $out1,$out1,v25 3763 vncipher $out2,$out2,v25 3764 vncipher $out3,$out3,v25 3765 vncipher $out4,$out4,v25 3766 lvx v25,$x10,$key_ # round[4] 3767 bdnz _aesp8_xts_dec5x 3768 3769 subi r0,$taillen,1 3770 vncipher $out0,$out0,v24 3771 vncipher $out1,$out1,v24 3772 vncipher $out2,$out2,v24 3773 vncipher $out3,$out3,v24 3774 vncipher $out4,$out4,v24 3775 3776 andi. r0,r0,16 3777 cmpwi $taillen,0 3778 vncipher $out0,$out0,v25 3779 vncipher $out1,$out1,v25 3780 vncipher $out2,$out2,v25 3781 vncipher $out3,$out3,v25 3782 vncipher $out4,$out4,v25 3783 vxor $twk0,$twk0,v31 3784 3785 sub $inp,$inp,r0 3786 vncipher $out0,$out0,v26 3787 vncipher $out1,$out1,v26 3788 vncipher $out2,$out2,v26 3789 vncipher $out3,$out3,v26 3790 vncipher $out4,$out4,v26 3791 vxor $in1,$twk1,v31 3792 3793 vncipher $out0,$out0,v27 3794 lvx_u $in0,0,$inp 3795 vncipher $out1,$out1,v27 3796 vncipher $out2,$out2,v27 3797 vncipher $out3,$out3,v27 3798 vncipher $out4,$out4,v27 3799 vxor $in2,$twk2,v31 3800 3801 addi $key_,$sp,$FRAME+15 # rewind $key_ 3802 vncipher $out0,$out0,v28 3803 vncipher $out1,$out1,v28 3804 vncipher $out2,$out2,v28 3805 vncipher $out3,$out3,v28 3806 vncipher $out4,$out4,v28 3807 lvx v24,$x00,$key_ # re-pre-load round[1] 3808 vxor $in3,$twk3,v31 3809 3810 vncipher $out0,$out0,v29 3811 le?vperm $in0,$in0,$in0,$leperm 3812 vncipher $out1,$out1,v29 3813 vncipher $out2,$out2,v29 3814 vncipher $out3,$out3,v29 3815 vncipher $out4,$out4,v29 3816 lvx v25,$x10,$key_ # re-pre-load round[2] 3817 vxor $in4,$twk4,v31 3818 3819 vncipher $out0,$out0,v30 3820 vncipher $out1,$out1,v30 3821 vncipher $out2,$out2,v30 3822 vncipher $out3,$out3,v30 3823 vncipher $out4,$out4,v30 3824 3825 vncipherlast $out0,$out0,$twk0 3826 vncipherlast $out1,$out1,$in1 3827 vncipherlast $out2,$out2,$in2 3828 vncipherlast $out3,$out3,$in3 3829 vncipherlast $out4,$out4,$in4 3830 mtctr $rounds 3831 blr 3832 .long 0 3833 .byte 0,12,0x14,0,0,0,0,0 3834 ___ 3835 }} }}} 3836 3837 my $consts=1; 3838 foreach(split("\n",$code)) { 3839 s/\`([^\`]*)\`/eval($1)/geo; 3840 3841 # constants table endian-specific conversion 3842 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { 3843 my $conv=$3; 3844 my @bytes=(); 3845 3846 # convert to endian-agnostic format 3847 if ($1 eq "long") { 3848 foreach (split(/,\s*/,$2)) { 3849 my $l = /^0/?oct:int; 3850 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; 3851 } 3852 } else { 3853 @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); 3854 } 3855 3856 # little-endian conversion 3857 if ($flavour =~ /le$/o) { 3858 SWITCH: for($conv) { 3859 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 3860 /\?rev/ && do { @bytes=reverse(@bytes); last; }; 3861 } 3862 } 3863 3864 #emit 3865 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; 3866 next; 3867 } 3868 $consts=0 if (m/Lconsts:/o); # end of table 3869 3870 # instructions prefixed with '?' are endian-specific and need 3871 # to be adjusted accordingly... 3872 if ($flavour =~ /le$/o) { # little-endian 3873 s/le\?//o or 3874 s/be\?/#be#/o or 3875 s/\?lvsr/lvsl/o or 3876 s/\?lvsl/lvsr/o or 3877 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or 3878 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or 3879 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; 3880 } else { # big-endian 3881 s/le\?/#le#/o or 3882 s/be\?//o or 3883 s/\?([a-z]+)/$1/o; 3884 } 3885 3886 print $_,"\n"; 3887 } 3888 3889 close STDOUT;
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.