1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 # 3 # Accelerated AES-GCM stitched implementation for ppc64le. 4 # 5 # Copyright 2022- IBM Inc. All rights reserved 6 # 7 #=================================================================================== 8 # Written by Danny Tsen <dtsen@linux.ibm.com> 9 # 10 # GHASH is based on the Karatsuba multiplication method. 11 # 12 # Xi xor X1 13 # 14 # X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H = 15 # (X1.h * H4.h + xX.l * H4.l + X1 * H4) + 16 # (X2.h * H3.h + X2.l * H3.l + X2 * H3) + 17 # (X3.h * H2.h + X3.l * H2.l + X3 * H2) + 18 # (X4.h * H.h + X4.l * H.l + X4 * H) 19 # 20 # Xi = v0 21 # H Poly = v2 22 # Hash keys = v3 - v14 23 # ( H.l, H, H.h) 24 # ( H^2.l, H^2, H^2.h) 25 # ( H^3.l, H^3, H^3.h) 26 # ( H^4.l, H^4, H^4.h) 27 # 28 # v30 is IV 29 # v31 - counter 1 30 # 31 # AES used, 32 # vs0 - vs14 for round keys 33 # v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted) 34 # 35 # This implementation uses stitched AES-GCM approach to improve overall performance. 36 # AES is implemented with 8x blocks and GHASH is using 2 4x blocks. 37 # 38 # =================================================================================== 39 # 40 41 #include <asm/ppc_asm.h> 42 #include <linux/linkage.h> 43 44 .machine "any" 45 .text 46 47 # 4x loops 48 # v15 - v18 - input states 49 # vs1 - vs9 - round keys 50 # 51 .macro Loop_aes_middle4x 52 xxlor 19+32, 1, 1 53 xxlor 20+32, 2, 2 54 xxlor 21+32, 3, 3 55 xxlor 22+32, 4, 4 56 57 vcipher 15, 15, 19 58 vcipher 16, 16, 19 59 vcipher 17, 17, 19 60 vcipher 18, 18, 19 61 62 vcipher 15, 15, 20 63 vcipher 16, 16, 20 64 vcipher 17, 17, 20 65 vcipher 18, 18, 20 66 67 vcipher 15, 15, 21 68 vcipher 16, 16, 21 69 vcipher 17, 17, 21 70 vcipher 18, 18, 21 71 72 vcipher 15, 15, 22 73 vcipher 16, 16, 22 74 vcipher 17, 17, 22 75 vcipher 18, 18, 22 76 77 xxlor 19+32, 5, 5 78 xxlor 20+32, 6, 6 79 xxlor 21+32, 7, 7 80 xxlor 22+32, 8, 8 81 82 vcipher 15, 15, 19 83 vcipher 16, 16, 19 84 vcipher 17, 17, 19 85 vcipher 18, 18, 19 86 87 vcipher 15, 15, 20 88 vcipher 16, 16, 20 89 vcipher 17, 17, 20 90 vcipher 18, 18, 20 91 92 vcipher 15, 15, 21 93 vcipher 16, 16, 21 94 vcipher 17, 17, 21 95 vcipher 18, 18, 21 96 97 vcipher 15, 15, 22 98 vcipher 16, 16, 22 99 vcipher 17, 17, 22 100 vcipher 18, 18, 22 101 102 xxlor 23+32, 9, 9 103 vcipher 15, 15, 23 104 vcipher 16, 16, 23 105 vcipher 17, 17, 23 106 vcipher 18, 18, 23 107 .endm 108 109 # 8x loops 110 # v15 - v22 - input states 111 # vs1 - vs9 - round keys 112 # 113 .macro Loop_aes_middle8x 114 xxlor 23+32, 1, 1 115 xxlor 24+32, 2, 2 116 xxlor 25+32, 3, 3 117 xxlor 26+32, 4, 4 118 119 vcipher 15, 15, 23 120 vcipher 16, 16, 23 121 vcipher 17, 17, 23 122 vcipher 18, 18, 23 123 vcipher 19, 19, 23 124 vcipher 20, 20, 23 125 vcipher 21, 21, 23 126 vcipher 22, 22, 23 127 128 vcipher 15, 15, 24 129 vcipher 16, 16, 24 130 vcipher 17, 17, 24 131 vcipher 18, 18, 24 132 vcipher 19, 19, 24 133 vcipher 20, 20, 24 134 vcipher 21, 21, 24 135 vcipher 22, 22, 24 136 137 vcipher 15, 15, 25 138 vcipher 16, 16, 25 139 vcipher 17, 17, 25 140 vcipher 18, 18, 25 141 vcipher 19, 19, 25 142 vcipher 20, 20, 25 143 vcipher 21, 21, 25 144 vcipher 22, 22, 25 145 146 vcipher 15, 15, 26 147 vcipher 16, 16, 26 148 vcipher 17, 17, 26 149 vcipher 18, 18, 26 150 vcipher 19, 19, 26 151 vcipher 20, 20, 26 152 vcipher 21, 21, 26 153 vcipher 22, 22, 26 154 155 xxlor 23+32, 5, 5 156 xxlor 24+32, 6, 6 157 xxlor 25+32, 7, 7 158 xxlor 26+32, 8, 8 159 160 vcipher 15, 15, 23 161 vcipher 16, 16, 23 162 vcipher 17, 17, 23 163 vcipher 18, 18, 23 164 vcipher 19, 19, 23 165 vcipher 20, 20, 23 166 vcipher 21, 21, 23 167 vcipher 22, 22, 23 168 169 vcipher 15, 15, 24 170 vcipher 16, 16, 24 171 vcipher 17, 17, 24 172 vcipher 18, 18, 24 173 vcipher 19, 19, 24 174 vcipher 20, 20, 24 175 vcipher 21, 21, 24 176 vcipher 22, 22, 24 177 178 vcipher 15, 15, 25 179 vcipher 16, 16, 25 180 vcipher 17, 17, 25 181 vcipher 18, 18, 25 182 vcipher 19, 19, 25 183 vcipher 20, 20, 25 184 vcipher 21, 21, 25 185 vcipher 22, 22, 25 186 187 vcipher 15, 15, 26 188 vcipher 16, 16, 26 189 vcipher 17, 17, 26 190 vcipher 18, 18, 26 191 vcipher 19, 19, 26 192 vcipher 20, 20, 26 193 vcipher 21, 21, 26 194 vcipher 22, 22, 26 195 196 xxlor 23+32, 9, 9 197 vcipher 15, 15, 23 198 vcipher 16, 16, 23 199 vcipher 17, 17, 23 200 vcipher 18, 18, 23 201 vcipher 19, 19, 23 202 vcipher 20, 20, 23 203 vcipher 21, 21, 23 204 vcipher 22, 22, 23 205 .endm 206 207 .macro Loop_aes_middle_1x 208 xxlor 19+32, 1, 1 209 xxlor 20+32, 2, 2 210 xxlor 21+32, 3, 3 211 xxlor 22+32, 4, 4 212 213 vcipher 15, 15, 19 214 vcipher 15, 15, 20 215 vcipher 15, 15, 21 216 vcipher 15, 15, 22 217 218 xxlor 19+32, 5, 5 219 xxlor 20+32, 6, 6 220 xxlor 21+32, 7, 7 221 xxlor 22+32, 8, 8 222 223 vcipher 15, 15, 19 224 vcipher 15, 15, 20 225 vcipher 15, 15, 21 226 vcipher 15, 15, 22 227 228 xxlor 19+32, 9, 9 229 vcipher 15, 15, 19 230 .endm 231 232 # 233 # Compute 4x hash values based on Karatsuba method. 234 # 235 .macro ppc_aes_gcm_ghash 236 vxor 15, 15, 0 237 238 vpmsumd 23, 12, 15 # H4.L * X.L 239 vpmsumd 24, 9, 16 240 vpmsumd 25, 6, 17 241 vpmsumd 26, 3, 18 242 243 vxor 23, 23, 24 244 vxor 23, 23, 25 245 vxor 23, 23, 26 # L 246 247 vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L 248 vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L 249 vpmsumd 26, 7, 17 250 vpmsumd 27, 4, 18 251 252 vxor 24, 24, 25 253 vxor 24, 24, 26 254 vxor 24, 24, 27 # M 255 256 # sum hash and reduction with H Poly 257 vpmsumd 28, 23, 2 # reduction 258 259 vxor 29, 29, 29 260 vsldoi 26, 24, 29, 8 # mL 261 vsldoi 29, 29, 24, 8 # mH 262 vxor 23, 23, 26 # mL + L 263 264 vsldoi 23, 23, 23, 8 # swap 265 vxor 23, 23, 28 266 267 vpmsumd 24, 14, 15 # H4.H * X.H 268 vpmsumd 25, 11, 16 269 vpmsumd 26, 8, 17 270 vpmsumd 27, 5, 18 271 272 vxor 24, 24, 25 273 vxor 24, 24, 26 274 vxor 24, 24, 27 275 276 vxor 24, 24, 29 277 278 # sum hash and reduction with H Poly 279 vsldoi 27, 23, 23, 8 # swap 280 vpmsumd 23, 23, 2 281 vxor 27, 27, 24 282 vxor 23, 23, 27 283 284 xxlor 32, 23+32, 23+32 # update hash 285 286 .endm 287 288 # 289 # Combine two 4x ghash 290 # v15 - v22 - input blocks 291 # 292 .macro ppc_aes_gcm_ghash2_4x 293 # first 4x hash 294 vxor 15, 15, 0 # Xi + X 295 296 vpmsumd 23, 12, 15 # H4.L * X.L 297 vpmsumd 24, 9, 16 298 vpmsumd 25, 6, 17 299 vpmsumd 26, 3, 18 300 301 vxor 23, 23, 24 302 vxor 23, 23, 25 303 vxor 23, 23, 26 # L 304 305 vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L 306 vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L 307 vpmsumd 26, 7, 17 308 vpmsumd 27, 4, 18 309 310 vxor 24, 24, 25 311 vxor 24, 24, 26 312 313 # sum hash and reduction with H Poly 314 vpmsumd 28, 23, 2 # reduction 315 316 vxor 29, 29, 29 317 318 vxor 24, 24, 27 # M 319 vsldoi 26, 24, 29, 8 # mL 320 vsldoi 29, 29, 24, 8 # mH 321 vxor 23, 23, 26 # mL + L 322 323 vsldoi 23, 23, 23, 8 # swap 324 vxor 23, 23, 28 325 326 vpmsumd 24, 14, 15 # H4.H * X.H 327 vpmsumd 25, 11, 16 328 vpmsumd 26, 8, 17 329 vpmsumd 27, 5, 18 330 331 vxor 24, 24, 25 332 vxor 24, 24, 26 333 vxor 24, 24, 27 # H 334 335 vxor 24, 24, 29 # H + mH 336 337 # sum hash and reduction with H Poly 338 vsldoi 27, 23, 23, 8 # swap 339 vpmsumd 23, 23, 2 340 vxor 27, 27, 24 341 vxor 27, 23, 27 # 1st Xi 342 343 # 2nd 4x hash 344 vpmsumd 24, 9, 20 345 vpmsumd 25, 6, 21 346 vpmsumd 26, 3, 22 347 vxor 19, 19, 27 # Xi + X 348 vpmsumd 23, 12, 19 # H4.L * X.L 349 350 vxor 23, 23, 24 351 vxor 23, 23, 25 352 vxor 23, 23, 26 # L 353 354 vpmsumd 24, 13, 19 # H4.L * X.H + H4.H * X.L 355 vpmsumd 25, 10, 20 # H3.L * X1.H + H3.H * X1.L 356 vpmsumd 26, 7, 21 357 vpmsumd 27, 4, 22 358 359 vxor 24, 24, 25 360 vxor 24, 24, 26 361 362 # sum hash and reduction with H Poly 363 vpmsumd 28, 23, 2 # reduction 364 365 vxor 29, 29, 29 366 367 vxor 24, 24, 27 # M 368 vsldoi 26, 24, 29, 8 # mL 369 vsldoi 29, 29, 24, 8 # mH 370 vxor 23, 23, 26 # mL + L 371 372 vsldoi 23, 23, 23, 8 # swap 373 vxor 23, 23, 28 374 375 vpmsumd 24, 14, 19 # H4.H * X.H 376 vpmsumd 25, 11, 20 377 vpmsumd 26, 8, 21 378 vpmsumd 27, 5, 22 379 380 vxor 24, 24, 25 381 vxor 24, 24, 26 382 vxor 24, 24, 27 # H 383 384 vxor 24, 24, 29 # H + mH 385 386 # sum hash and reduction with H Poly 387 vsldoi 27, 23, 23, 8 # swap 388 vpmsumd 23, 23, 2 389 vxor 27, 27, 24 390 vxor 23, 23, 27 391 392 xxlor 32, 23+32, 23+32 # update hash 393 394 .endm 395 396 # 397 # Compute update single hash 398 # 399 .macro ppc_update_hash_1x 400 vxor 28, 28, 0 401 402 vxor 19, 19, 19 403 404 vpmsumd 22, 3, 28 # L 405 vpmsumd 23, 4, 28 # M 406 vpmsumd 24, 5, 28 # H 407 408 vpmsumd 27, 22, 2 # reduction 409 410 vsldoi 25, 23, 19, 8 # mL 411 vsldoi 26, 19, 23, 8 # mH 412 vxor 22, 22, 25 # LL + LL 413 vxor 24, 24, 26 # HH + HH 414 415 vsldoi 22, 22, 22, 8 # swap 416 vxor 22, 22, 27 417 418 vsldoi 20, 22, 22, 8 # swap 419 vpmsumd 22, 22, 2 # reduction 420 vxor 20, 20, 24 421 vxor 22, 22, 20 422 423 vmr 0, 22 # update hash 424 425 .endm 426 427 .macro SAVE_REGS 428 stdu 1,-640(1) 429 mflr 0 430 431 std 14,112(1) 432 std 15,120(1) 433 std 16,128(1) 434 std 17,136(1) 435 std 18,144(1) 436 std 19,152(1) 437 std 20,160(1) 438 std 21,168(1) 439 li 9, 256 440 stvx 20, 9, 1 441 addi 9, 9, 16 442 stvx 21, 9, 1 443 addi 9, 9, 16 444 stvx 22, 9, 1 445 addi 9, 9, 16 446 stvx 23, 9, 1 447 addi 9, 9, 16 448 stvx 24, 9, 1 449 addi 9, 9, 16 450 stvx 25, 9, 1 451 addi 9, 9, 16 452 stvx 26, 9, 1 453 addi 9, 9, 16 454 stvx 27, 9, 1 455 addi 9, 9, 16 456 stvx 28, 9, 1 457 addi 9, 9, 16 458 stvx 29, 9, 1 459 addi 9, 9, 16 460 stvx 30, 9, 1 461 addi 9, 9, 16 462 stvx 31, 9, 1 463 stxv 14, 464(1) 464 stxv 15, 480(1) 465 stxv 16, 496(1) 466 stxv 17, 512(1) 467 stxv 18, 528(1) 468 stxv 19, 544(1) 469 stxv 20, 560(1) 470 stxv 21, 576(1) 471 stxv 22, 592(1) 472 std 0, 656(1) 473 .endm 474 475 .macro RESTORE_REGS 476 lxv 14, 464(1) 477 lxv 15, 480(1) 478 lxv 16, 496(1) 479 lxv 17, 512(1) 480 lxv 18, 528(1) 481 lxv 19, 544(1) 482 lxv 20, 560(1) 483 lxv 21, 576(1) 484 lxv 22, 592(1) 485 li 9, 256 486 lvx 20, 9, 1 487 addi 9, 9, 16 488 lvx 21, 9, 1 489 addi 9, 9, 16 490 lvx 22, 9, 1 491 addi 9, 9, 16 492 lvx 23, 9, 1 493 addi 9, 9, 16 494 lvx 24, 9, 1 495 addi 9, 9, 16 496 lvx 25, 9, 1 497 addi 9, 9, 16 498 lvx 26, 9, 1 499 addi 9, 9, 16 500 lvx 27, 9, 1 501 addi 9, 9, 16 502 lvx 28, 9, 1 503 addi 9, 9, 16 504 lvx 29, 9, 1 505 addi 9, 9, 16 506 lvx 30, 9, 1 507 addi 9, 9, 16 508 lvx 31, 9, 1 509 510 ld 0, 656(1) 511 ld 14,112(1) 512 ld 15,120(1) 513 ld 16,128(1) 514 ld 17,136(1) 515 ld 18,144(1) 516 ld 19,152(1) 517 ld 20,160(1) 518 ld 21,168(1) 519 520 mtlr 0 521 addi 1, 1, 640 522 .endm 523 524 .macro LOAD_HASH_TABLE 525 # Load Xi 526 lxvb16x 32, 0, 8 # load Xi 527 528 # load Hash - h^4, h^3, h^2, h 529 li 10, 32 530 lxvd2x 2+32, 10, 8 # H Poli 531 li 10, 48 532 lxvd2x 3+32, 10, 8 # Hl 533 li 10, 64 534 lxvd2x 4+32, 10, 8 # H 535 li 10, 80 536 lxvd2x 5+32, 10, 8 # Hh 537 538 li 10, 96 539 lxvd2x 6+32, 10, 8 # H^2l 540 li 10, 112 541 lxvd2x 7+32, 10, 8 # H^2 542 li 10, 128 543 lxvd2x 8+32, 10, 8 # H^2h 544 545 li 10, 144 546 lxvd2x 9+32, 10, 8 # H^3l 547 li 10, 160 548 lxvd2x 10+32, 10, 8 # H^3 549 li 10, 176 550 lxvd2x 11+32, 10, 8 # H^3h 551 552 li 10, 192 553 lxvd2x 12+32, 10, 8 # H^4l 554 li 10, 208 555 lxvd2x 13+32, 10, 8 # H^4 556 li 10, 224 557 lxvd2x 14+32, 10, 8 # H^4h 558 .endm 559 560 # 561 # aes_p10_gcm_encrypt (const void *inp, void *out, size_t len, 562 # const char *rk, unsigned char iv[16], void *Xip); 563 # 564 # r3 - inp 565 # r4 - out 566 # r5 - len 567 # r6 - AES round keys 568 # r7 - iv and other data 569 # r8 - Xi, HPoli, hash keys 570 # 571 # rounds is at offset 240 in rk 572 # Xi is at 0 in gcm_table (Xip). 573 # 574 _GLOBAL(aes_p10_gcm_encrypt) 575 .align 5 576 577 SAVE_REGS 578 579 LOAD_HASH_TABLE 580 581 # initialize ICB: GHASH( IV ), IV - r7 582 lxvb16x 30+32, 0, 7 # load IV - v30 583 584 mr 12, 5 # length 585 li 11, 0 # block index 586 587 # counter 1 588 vxor 31, 31, 31 589 vspltisb 22, 1 590 vsldoi 31, 31, 22,1 # counter 1 591 592 # load round key to VSR 593 lxv 0, 0(6) 594 lxv 1, 0x10(6) 595 lxv 2, 0x20(6) 596 lxv 3, 0x30(6) 597 lxv 4, 0x40(6) 598 lxv 5, 0x50(6) 599 lxv 6, 0x60(6) 600 lxv 7, 0x70(6) 601 lxv 8, 0x80(6) 602 lxv 9, 0x90(6) 603 lxv 10, 0xa0(6) 604 605 # load rounds - 10 (128), 12 (192), 14 (256) 606 lwz 9,240(6) 607 608 # 609 # vxor state, state, w # addroundkey 610 xxlor 32+29, 0, 0 611 vxor 15, 30, 29 # IV + round key - add round key 0 612 613 cmpdi 9, 10 614 beq Loop_aes_gcm_8x 615 616 # load 2 more round keys (v11, v12) 617 lxv 11, 0xb0(6) 618 lxv 12, 0xc0(6) 619 620 cmpdi 9, 12 621 beq Loop_aes_gcm_8x 622 623 # load 2 more round keys (v11, v12, v13, v14) 624 lxv 13, 0xd0(6) 625 lxv 14, 0xe0(6) 626 cmpdi 9, 14 627 beq Loop_aes_gcm_8x 628 629 b aes_gcm_out 630 631 .align 5 632 Loop_aes_gcm_8x: 633 mr 14, 3 634 mr 9, 4 635 636 # 637 # check partial block 638 # 639 Continue_partial_check: 640 ld 15, 56(7) 641 cmpdi 15, 0 642 beq Continue 643 bgt Final_block 644 cmpdi 15, 16 645 blt Final_block 646 647 Continue: 648 # n blcoks 649 li 10, 128 650 divdu 10, 12, 10 # n 128 bytes-blocks 651 cmpdi 10, 0 652 beq Loop_last_block 653 654 vaddudm 30, 30, 31 # IV + counter 655 vxor 16, 30, 29 656 vaddudm 30, 30, 31 657 vxor 17, 30, 29 658 vaddudm 30, 30, 31 659 vxor 18, 30, 29 660 vaddudm 30, 30, 31 661 vxor 19, 30, 29 662 vaddudm 30, 30, 31 663 vxor 20, 30, 29 664 vaddudm 30, 30, 31 665 vxor 21, 30, 29 666 vaddudm 30, 30, 31 667 vxor 22, 30, 29 668 669 mtctr 10 670 671 li 15, 16 672 li 16, 32 673 li 17, 48 674 li 18, 64 675 li 19, 80 676 li 20, 96 677 li 21, 112 678 679 lwz 10, 240(6) 680 681 Loop_8x_block: 682 683 lxvb16x 15, 0, 14 # load block 684 lxvb16x 16, 15, 14 # load block 685 lxvb16x 17, 16, 14 # load block 686 lxvb16x 18, 17, 14 # load block 687 lxvb16x 19, 18, 14 # load block 688 lxvb16x 20, 19, 14 # load block 689 lxvb16x 21, 20, 14 # load block 690 lxvb16x 22, 21, 14 # load block 691 addi 14, 14, 128 692 693 Loop_aes_middle8x 694 695 xxlor 23+32, 10, 10 696 697 cmpdi 10, 10 698 beq Do_next_ghash 699 700 # 192 bits 701 xxlor 24+32, 11, 11 702 703 vcipher 15, 15, 23 704 vcipher 16, 16, 23 705 vcipher 17, 17, 23 706 vcipher 18, 18, 23 707 vcipher 19, 19, 23 708 vcipher 20, 20, 23 709 vcipher 21, 21, 23 710 vcipher 22, 22, 23 711 712 vcipher 15, 15, 24 713 vcipher 16, 16, 24 714 vcipher 17, 17, 24 715 vcipher 18, 18, 24 716 vcipher 19, 19, 24 717 vcipher 20, 20, 24 718 vcipher 21, 21, 24 719 vcipher 22, 22, 24 720 721 xxlor 23+32, 12, 12 722 723 cmpdi 10, 12 724 beq Do_next_ghash 725 726 # 256 bits 727 xxlor 24+32, 13, 13 728 729 vcipher 15, 15, 23 730 vcipher 16, 16, 23 731 vcipher 17, 17, 23 732 vcipher 18, 18, 23 733 vcipher 19, 19, 23 734 vcipher 20, 20, 23 735 vcipher 21, 21, 23 736 vcipher 22, 22, 23 737 738 vcipher 15, 15, 24 739 vcipher 16, 16, 24 740 vcipher 17, 17, 24 741 vcipher 18, 18, 24 742 vcipher 19, 19, 24 743 vcipher 20, 20, 24 744 vcipher 21, 21, 24 745 vcipher 22, 22, 24 746 747 xxlor 23+32, 14, 14 748 749 cmpdi 10, 14 750 beq Do_next_ghash 751 b aes_gcm_out 752 753 Do_next_ghash: 754 755 # 756 # last round 757 vcipherlast 15, 15, 23 758 vcipherlast 16, 16, 23 759 760 xxlxor 47, 47, 15 761 stxvb16x 47, 0, 9 # store output 762 xxlxor 48, 48, 16 763 stxvb16x 48, 15, 9 # store output 764 765 vcipherlast 17, 17, 23 766 vcipherlast 18, 18, 23 767 768 xxlxor 49, 49, 17 769 stxvb16x 49, 16, 9 # store output 770 xxlxor 50, 50, 18 771 stxvb16x 50, 17, 9 # store output 772 773 vcipherlast 19, 19, 23 774 vcipherlast 20, 20, 23 775 776 xxlxor 51, 51, 19 777 stxvb16x 51, 18, 9 # store output 778 xxlxor 52, 52, 20 779 stxvb16x 52, 19, 9 # store output 780 781 vcipherlast 21, 21, 23 782 vcipherlast 22, 22, 23 783 784 xxlxor 53, 53, 21 785 stxvb16x 53, 20, 9 # store output 786 xxlxor 54, 54, 22 787 stxvb16x 54, 21, 9 # store output 788 789 addi 9, 9, 128 790 791 # ghash here 792 ppc_aes_gcm_ghash2_4x 793 794 xxlor 27+32, 0, 0 795 vaddudm 30, 30, 31 # IV + counter 796 vmr 29, 30 797 vxor 15, 30, 27 # add round key 798 vaddudm 30, 30, 31 799 vxor 16, 30, 27 800 vaddudm 30, 30, 31 801 vxor 17, 30, 27 802 vaddudm 30, 30, 31 803 vxor 18, 30, 27 804 vaddudm 30, 30, 31 805 vxor 19, 30, 27 806 vaddudm 30, 30, 31 807 vxor 20, 30, 27 808 vaddudm 30, 30, 31 809 vxor 21, 30, 27 810 vaddudm 30, 30, 31 811 vxor 22, 30, 27 812 813 addi 12, 12, -128 814 addi 11, 11, 128 815 816 bdnz Loop_8x_block 817 818 vmr 30, 29 819 stxvb16x 30+32, 0, 7 # update IV 820 821 Loop_last_block: 822 cmpdi 12, 0 823 beq aes_gcm_out 824 825 # loop last few blocks 826 li 10, 16 827 divdu 10, 12, 10 828 829 mtctr 10 830 831 lwz 10, 240(6) 832 833 cmpdi 12, 16 834 blt Final_block 835 836 Next_rem_block: 837 lxvb16x 15, 0, 14 # load block 838 839 Loop_aes_middle_1x 840 841 xxlor 23+32, 10, 10 842 843 cmpdi 10, 10 844 beq Do_next_1x 845 846 # 192 bits 847 xxlor 24+32, 11, 11 848 849 vcipher 15, 15, 23 850 vcipher 15, 15, 24 851 852 xxlor 23+32, 12, 12 853 854 cmpdi 10, 12 855 beq Do_next_1x 856 857 # 256 bits 858 xxlor 24+32, 13, 13 859 860 vcipher 15, 15, 23 861 vcipher 15, 15, 24 862 863 xxlor 23+32, 14, 14 864 865 cmpdi 10, 14 866 beq Do_next_1x 867 868 Do_next_1x: 869 vcipherlast 15, 15, 23 870 871 xxlxor 47, 47, 15 872 stxvb16x 47, 0, 9 # store output 873 addi 14, 14, 16 874 addi 9, 9, 16 875 876 vmr 28, 15 877 ppc_update_hash_1x 878 879 addi 12, 12, -16 880 addi 11, 11, 16 881 xxlor 19+32, 0, 0 882 vaddudm 30, 30, 31 # IV + counter 883 vxor 15, 30, 19 # add round key 884 885 bdnz Next_rem_block 886 887 li 15, 0 888 std 15, 56(7) # clear partial? 889 stxvb16x 30+32, 0, 7 # update IV 890 cmpdi 12, 0 891 beq aes_gcm_out 892 893 Final_block: 894 lwz 10, 240(6) 895 Loop_aes_middle_1x 896 897 xxlor 23+32, 10, 10 898 899 cmpdi 10, 10 900 beq Do_final_1x 901 902 # 192 bits 903 xxlor 24+32, 11, 11 904 905 vcipher 15, 15, 23 906 vcipher 15, 15, 24 907 908 xxlor 23+32, 12, 12 909 910 cmpdi 10, 12 911 beq Do_final_1x 912 913 # 256 bits 914 xxlor 24+32, 13, 13 915 916 vcipher 15, 15, 23 917 vcipher 15, 15, 24 918 919 xxlor 23+32, 14, 14 920 921 cmpdi 10, 14 922 beq Do_final_1x 923 924 Do_final_1x: 925 vcipherlast 15, 15, 23 926 927 # check partial block 928 li 21, 0 # encrypt 929 ld 15, 56(7) # partial? 930 cmpdi 15, 0 931 beq Normal_block 932 bl Do_partial_block 933 934 cmpdi 12, 0 935 ble aes_gcm_out 936 937 b Continue_partial_check 938 939 Normal_block: 940 lxvb16x 15, 0, 14 # load last block 941 xxlxor 47, 47, 15 942 943 # create partial block mask 944 li 15, 16 945 sub 15, 15, 12 # index to the mask 946 947 vspltisb 16, -1 # first 16 bytes - 0xffff...ff 948 vspltisb 17, 0 # second 16 bytes - 0x0000...00 949 li 10, 192 950 stvx 16, 10, 1 951 addi 10, 10, 16 952 stvx 17, 10, 1 953 954 addi 10, 1, 192 955 lxvb16x 16, 15, 10 # load partial block mask 956 xxland 47, 47, 16 957 958 vmr 28, 15 959 ppc_update_hash_1x 960 961 # * should store only the remaining bytes. 962 bl Write_partial_block 963 964 stxvb16x 30+32, 0, 7 # update IV 965 std 12, 56(7) # update partial? 966 li 16, 16 967 968 stxvb16x 32, 0, 8 # write out Xi 969 stxvb16x 32, 16, 8 # write out Xi 970 b aes_gcm_out 971 972 # 973 # Compute data mask 974 # 975 .macro GEN_MASK _mask _start _end 976 vspltisb 16, -1 # first 16 bytes - 0xffff...ff 977 vspltisb 17, 0 # second 16 bytes - 0x0000...00 978 li 10, 192 979 stxvb16x 17+32, 10, 1 980 add 10, 10, \_start 981 stxvb16x 16+32, 10, 1 982 add 10, 10, \_end 983 stxvb16x 17+32, 10, 1 984 985 addi 10, 1, 192 986 lxvb16x \_mask, 0, 10 # load partial block mask 987 .endm 988 989 # 990 # Handle multiple partial blocks for encrypt and decrypt 991 # operations. 992 # 993 SYM_FUNC_START_LOCAL(Do_partial_block) 994 add 17, 15, 5 995 cmpdi 17, 16 996 bgt Big_block 997 GEN_MASK 18, 15, 5 998 b _Partial 999 SYM_FUNC_END(Do_partial_block) 1000 Big_block: 1001 li 16, 16 1002 GEN_MASK 18, 15, 16 1003 1004 _Partial: 1005 lxvb16x 17+32, 0, 14 # load last block 1006 sldi 16, 15, 3 1007 mtvsrdd 32+16, 0, 16 1008 vsro 17, 17, 16 1009 xxlxor 47, 47, 17+32 1010 xxland 47, 47, 18 1011 1012 vxor 0, 0, 0 # clear Xi 1013 vmr 28, 15 1014 1015 cmpdi 21, 0 # encrypt/decrypt ops? 1016 beq Skip_decrypt 1017 xxland 32+28, 32+17, 18 1018 1019 Skip_decrypt: 1020 1021 ppc_update_hash_1x 1022 1023 li 16, 16 1024 lxvb16x 32+29, 16, 8 1025 vxor 0, 0, 29 1026 stxvb16x 32, 0, 8 # save Xi 1027 stxvb16x 32, 16, 8 # save Xi 1028 1029 # store partial block 1030 # loop the rest of the stream if any 1031 sldi 16, 15, 3 1032 mtvsrdd 32+16, 0, 16 1033 vslo 15, 15, 16 1034 #stxvb16x 15+32, 0, 9 # last block 1035 1036 li 16, 16 1037 sub 17, 16, 15 # 16 - partial 1038 1039 add 16, 15, 5 1040 cmpdi 16, 16 1041 bgt Larger_16 1042 mr 17, 5 1043 Larger_16: 1044 1045 # write partial 1046 li 10, 192 1047 stxvb16x 15+32, 10, 1 # save current block 1048 1049 addi 10, 9, -1 1050 addi 16, 1, 191 1051 mtctr 17 # move partial byte count 1052 1053 Write_last_partial: 1054 lbzu 18, 1(16) 1055 stbu 18, 1(10) 1056 bdnz Write_last_partial 1057 # Complete loop partial 1058 1059 add 14, 14, 17 1060 add 9, 9, 17 1061 sub 12, 12, 17 1062 add 11, 11, 17 1063 1064 add 15, 15, 5 1065 cmpdi 15, 16 1066 blt Save_partial 1067 1068 vaddudm 30, 30, 31 1069 stxvb16x 30+32, 0, 7 # update IV 1070 xxlor 32+29, 0, 0 1071 vxor 15, 30, 29 # IV + round key - add round key 0 1072 li 15, 0 1073 std 15, 56(7) # partial done - clear 1074 b Partial_done 1075 Save_partial: 1076 std 15, 56(7) # partial 1077 1078 Partial_done: 1079 blr 1080 1081 # 1082 # Write partial block 1083 # r9 - output 1084 # r12 - remaining bytes 1085 # v15 - partial input data 1086 # 1087 SYM_FUNC_START_LOCAL(Write_partial_block) 1088 li 10, 192 1089 stxvb16x 15+32, 10, 1 # last block 1090 1091 addi 10, 9, -1 1092 addi 16, 1, 191 1093 1094 mtctr 12 # remaining bytes 1095 li 15, 0 1096 1097 Write_last_byte: 1098 lbzu 14, 1(16) 1099 stbu 14, 1(10) 1100 bdnz Write_last_byte 1101 blr 1102 SYM_FUNC_END(Write_partial_block) 1103 1104 aes_gcm_out: 1105 # out = state 1106 stxvb16x 32, 0, 8 # write out Xi 1107 add 3, 11, 12 # return count 1108 1109 RESTORE_REGS 1110 blr 1111 1112 # 1113 # 8x Decrypt 1114 # 1115 _GLOBAL(aes_p10_gcm_decrypt) 1116 .align 5 1117 1118 SAVE_REGS 1119 1120 LOAD_HASH_TABLE 1121 1122 # initialize ICB: GHASH( IV ), IV - r7 1123 lxvb16x 30+32, 0, 7 # load IV - v30 1124 1125 mr 12, 5 # length 1126 li 11, 0 # block index 1127 1128 # counter 1 1129 vxor 31, 31, 31 1130 vspltisb 22, 1 1131 vsldoi 31, 31, 22,1 # counter 1 1132 1133 # load round key to VSR 1134 lxv 0, 0(6) 1135 lxv 1, 0x10(6) 1136 lxv 2, 0x20(6) 1137 lxv 3, 0x30(6) 1138 lxv 4, 0x40(6) 1139 lxv 5, 0x50(6) 1140 lxv 6, 0x60(6) 1141 lxv 7, 0x70(6) 1142 lxv 8, 0x80(6) 1143 lxv 9, 0x90(6) 1144 lxv 10, 0xa0(6) 1145 1146 # load rounds - 10 (128), 12 (192), 14 (256) 1147 lwz 9,240(6) 1148 1149 # 1150 # vxor state, state, w # addroundkey 1151 xxlor 32+29, 0, 0 1152 vxor 15, 30, 29 # IV + round key - add round key 0 1153 1154 cmpdi 9, 10 1155 beq Loop_aes_gcm_8x_dec 1156 1157 # load 2 more round keys (v11, v12) 1158 lxv 11, 0xb0(6) 1159 lxv 12, 0xc0(6) 1160 1161 cmpdi 9, 12 1162 beq Loop_aes_gcm_8x_dec 1163 1164 # load 2 more round keys (v11, v12, v13, v14) 1165 lxv 13, 0xd0(6) 1166 lxv 14, 0xe0(6) 1167 cmpdi 9, 14 1168 beq Loop_aes_gcm_8x_dec 1169 1170 b aes_gcm_out 1171 1172 .align 5 1173 Loop_aes_gcm_8x_dec: 1174 mr 14, 3 1175 mr 9, 4 1176 1177 # 1178 # check partial block 1179 # 1180 Continue_partial_check_dec: 1181 ld 15, 56(7) 1182 cmpdi 15, 0 1183 beq Continue_dec 1184 bgt Final_block_dec 1185 cmpdi 15, 16 1186 blt Final_block_dec 1187 1188 Continue_dec: 1189 # n blcoks 1190 li 10, 128 1191 divdu 10, 12, 10 # n 128 bytes-blocks 1192 cmpdi 10, 0 1193 beq Loop_last_block_dec 1194 1195 vaddudm 30, 30, 31 # IV + counter 1196 vxor 16, 30, 29 1197 vaddudm 30, 30, 31 1198 vxor 17, 30, 29 1199 vaddudm 30, 30, 31 1200 vxor 18, 30, 29 1201 vaddudm 30, 30, 31 1202 vxor 19, 30, 29 1203 vaddudm 30, 30, 31 1204 vxor 20, 30, 29 1205 vaddudm 30, 30, 31 1206 vxor 21, 30, 29 1207 vaddudm 30, 30, 31 1208 vxor 22, 30, 29 1209 1210 mtctr 10 1211 1212 li 15, 16 1213 li 16, 32 1214 li 17, 48 1215 li 18, 64 1216 li 19, 80 1217 li 20, 96 1218 li 21, 112 1219 1220 lwz 10, 240(6) 1221 1222 Loop_8x_block_dec: 1223 1224 lxvb16x 15, 0, 14 # load block 1225 lxvb16x 16, 15, 14 # load block 1226 lxvb16x 17, 16, 14 # load block 1227 lxvb16x 18, 17, 14 # load block 1228 lxvb16x 19, 18, 14 # load block 1229 lxvb16x 20, 19, 14 # load block 1230 lxvb16x 21, 20, 14 # load block 1231 lxvb16x 22, 21, 14 # load block 1232 addi 14, 14, 128 1233 1234 Loop_aes_middle8x 1235 1236 xxlor 23+32, 10, 10 1237 1238 cmpdi 10, 10 1239 beq Do_next_ghash_dec 1240 1241 # 192 bits 1242 xxlor 24+32, 11, 11 1243 1244 vcipher 15, 15, 23 1245 vcipher 16, 16, 23 1246 vcipher 17, 17, 23 1247 vcipher 18, 18, 23 1248 vcipher 19, 19, 23 1249 vcipher 20, 20, 23 1250 vcipher 21, 21, 23 1251 vcipher 22, 22, 23 1252 1253 vcipher 15, 15, 24 1254 vcipher 16, 16, 24 1255 vcipher 17, 17, 24 1256 vcipher 18, 18, 24 1257 vcipher 19, 19, 24 1258 vcipher 20, 20, 24 1259 vcipher 21, 21, 24 1260 vcipher 22, 22, 24 1261 1262 xxlor 23+32, 12, 12 1263 1264 cmpdi 10, 12 1265 beq Do_next_ghash_dec 1266 1267 # 256 bits 1268 xxlor 24+32, 13, 13 1269 1270 vcipher 15, 15, 23 1271 vcipher 16, 16, 23 1272 vcipher 17, 17, 23 1273 vcipher 18, 18, 23 1274 vcipher 19, 19, 23 1275 vcipher 20, 20, 23 1276 vcipher 21, 21, 23 1277 vcipher 22, 22, 23 1278 1279 vcipher 15, 15, 24 1280 vcipher 16, 16, 24 1281 vcipher 17, 17, 24 1282 vcipher 18, 18, 24 1283 vcipher 19, 19, 24 1284 vcipher 20, 20, 24 1285 vcipher 21, 21, 24 1286 vcipher 22, 22, 24 1287 1288 xxlor 23+32, 14, 14 1289 1290 cmpdi 10, 14 1291 beq Do_next_ghash_dec 1292 b aes_gcm_out 1293 1294 Do_next_ghash_dec: 1295 1296 # 1297 # last round 1298 vcipherlast 15, 15, 23 1299 vcipherlast 16, 16, 23 1300 1301 xxlxor 47, 47, 15 1302 stxvb16x 47, 0, 9 # store output 1303 xxlxor 48, 48, 16 1304 stxvb16x 48, 15, 9 # store output 1305 1306 vcipherlast 17, 17, 23 1307 vcipherlast 18, 18, 23 1308 1309 xxlxor 49, 49, 17 1310 stxvb16x 49, 16, 9 # store output 1311 xxlxor 50, 50, 18 1312 stxvb16x 50, 17, 9 # store output 1313 1314 vcipherlast 19, 19, 23 1315 vcipherlast 20, 20, 23 1316 1317 xxlxor 51, 51, 19 1318 stxvb16x 51, 18, 9 # store output 1319 xxlxor 52, 52, 20 1320 stxvb16x 52, 19, 9 # store output 1321 1322 vcipherlast 21, 21, 23 1323 vcipherlast 22, 22, 23 1324 1325 xxlxor 53, 53, 21 1326 stxvb16x 53, 20, 9 # store output 1327 xxlxor 54, 54, 22 1328 stxvb16x 54, 21, 9 # store output 1329 1330 addi 9, 9, 128 1331 1332 xxlor 15+32, 15, 15 1333 xxlor 16+32, 16, 16 1334 xxlor 17+32, 17, 17 1335 xxlor 18+32, 18, 18 1336 xxlor 19+32, 19, 19 1337 xxlor 20+32, 20, 20 1338 xxlor 21+32, 21, 21 1339 xxlor 22+32, 22, 22 1340 1341 # ghash here 1342 ppc_aes_gcm_ghash2_4x 1343 1344 xxlor 27+32, 0, 0 1345 vaddudm 30, 30, 31 # IV + counter 1346 vmr 29, 30 1347 vxor 15, 30, 27 # add round key 1348 vaddudm 30, 30, 31 1349 vxor 16, 30, 27 1350 vaddudm 30, 30, 31 1351 vxor 17, 30, 27 1352 vaddudm 30, 30, 31 1353 vxor 18, 30, 27 1354 vaddudm 30, 30, 31 1355 vxor 19, 30, 27 1356 vaddudm 30, 30, 31 1357 vxor 20, 30, 27 1358 vaddudm 30, 30, 31 1359 vxor 21, 30, 27 1360 vaddudm 30, 30, 31 1361 vxor 22, 30, 27 1362 1363 addi 12, 12, -128 1364 addi 11, 11, 128 1365 1366 bdnz Loop_8x_block_dec 1367 1368 vmr 30, 29 1369 stxvb16x 30+32, 0, 7 # update IV 1370 1371 Loop_last_block_dec: 1372 cmpdi 12, 0 1373 beq aes_gcm_out 1374 1375 # loop last few blocks 1376 li 10, 16 1377 divdu 10, 12, 10 1378 1379 mtctr 10 1380 1381 lwz 10, 240(6) 1382 1383 cmpdi 12, 16 1384 blt Final_block_dec 1385 1386 Next_rem_block_dec: 1387 lxvb16x 15, 0, 14 # load block 1388 1389 Loop_aes_middle_1x 1390 1391 xxlor 23+32, 10, 10 1392 1393 cmpdi 10, 10 1394 beq Do_next_1x_dec 1395 1396 # 192 bits 1397 xxlor 24+32, 11, 11 1398 1399 vcipher 15, 15, 23 1400 vcipher 15, 15, 24 1401 1402 xxlor 23+32, 12, 12 1403 1404 cmpdi 10, 12 1405 beq Do_next_1x_dec 1406 1407 # 256 bits 1408 xxlor 24+32, 13, 13 1409 1410 vcipher 15, 15, 23 1411 vcipher 15, 15, 24 1412 1413 xxlor 23+32, 14, 14 1414 1415 cmpdi 10, 14 1416 beq Do_next_1x_dec 1417 1418 Do_next_1x_dec: 1419 vcipherlast 15, 15, 23 1420 1421 xxlxor 47, 47, 15 1422 stxvb16x 47, 0, 9 # store output 1423 addi 14, 14, 16 1424 addi 9, 9, 16 1425 1426 xxlor 28+32, 15, 15 1427 #vmr 28, 15 1428 ppc_update_hash_1x 1429 1430 addi 12, 12, -16 1431 addi 11, 11, 16 1432 xxlor 19+32, 0, 0 1433 vaddudm 30, 30, 31 # IV + counter 1434 vxor 15, 30, 19 # add round key 1435 1436 bdnz Next_rem_block_dec 1437 1438 li 15, 0 1439 std 15, 56(7) # clear partial? 1440 stxvb16x 30+32, 0, 7 # update IV 1441 cmpdi 12, 0 1442 beq aes_gcm_out 1443 1444 Final_block_dec: 1445 lwz 10, 240(6) 1446 Loop_aes_middle_1x 1447 1448 xxlor 23+32, 10, 10 1449 1450 cmpdi 10, 10 1451 beq Do_final_1x_dec 1452 1453 # 192 bits 1454 xxlor 24+32, 11, 11 1455 1456 vcipher 15, 15, 23 1457 vcipher 15, 15, 24 1458 1459 xxlor 23+32, 12, 12 1460 1461 cmpdi 10, 12 1462 beq Do_final_1x_dec 1463 1464 # 256 bits 1465 xxlor 24+32, 13, 13 1466 1467 vcipher 15, 15, 23 1468 vcipher 15, 15, 24 1469 1470 xxlor 23+32, 14, 14 1471 1472 cmpdi 10, 14 1473 beq Do_final_1x_dec 1474 1475 Do_final_1x_dec: 1476 vcipherlast 15, 15, 23 1477 1478 # check partial block 1479 li 21, 1 # decrypt 1480 ld 15, 56(7) # partial? 1481 cmpdi 15, 0 1482 beq Normal_block_dec 1483 bl Do_partial_block 1484 cmpdi 12, 0 1485 ble aes_gcm_out 1486 1487 b Continue_partial_check_dec 1488 1489 Normal_block_dec: 1490 lxvb16x 15, 0, 14 # load last block 1491 xxlxor 47, 47, 15 1492 1493 # create partial block mask 1494 li 15, 16 1495 sub 15, 15, 12 # index to the mask 1496 1497 vspltisb 16, -1 # first 16 bytes - 0xffff...ff 1498 vspltisb 17, 0 # second 16 bytes - 0x0000...00 1499 li 10, 192 1500 stvx 16, 10, 1 1501 addi 10, 10, 16 1502 stvx 17, 10, 1 1503 1504 addi 10, 1, 192 1505 lxvb16x 16, 15, 10 # load partial block mask 1506 xxland 47, 47, 16 1507 1508 xxland 32+28, 15, 16 1509 #vmr 28, 15 1510 ppc_update_hash_1x 1511 1512 # * should store only the remaining bytes. 1513 bl Write_partial_block 1514 1515 stxvb16x 30+32, 0, 7 # update IV 1516 std 12, 56(7) # update partial? 1517 li 16, 16 1518 1519 stxvb16x 32, 0, 8 # write out Xi 1520 stxvb16x 32, 16, 8 # write out Xi 1521 b aes_gcm_out
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.