1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Accelerated GHASH implementation with ARMv8 4 * 5 * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.b 6 */ 7 8 #include <linux/linkage.h> 9 #include <linux/cfi_types.h> 10 #include <asm/assembler.h> 11 12 SHASH .req v0 13 SHASH2 .req v1 14 T1 .req v2 15 T2 .req v3 16 MASK .req v4 17 XM .req v5 18 XL .req v6 19 XH .req v7 20 IN1 .req v7 21 22 k00_16 .req v8 23 k32_48 .req v9 24 25 t3 .req v10 26 t4 .req v11 27 t5 .req v12 28 t6 .req v13 29 t7 .req v14 30 t8 .req v15 31 t9 .req v16 32 33 perm1 .req v17 34 perm2 .req v18 35 perm3 .req v19 36 37 sh1 .req v20 38 sh2 .req v21 39 sh3 .req v22 40 sh4 .req v23 41 42 ss1 .req v24 43 ss2 .req v25 44 ss3 .req v26 45 ss4 .req v27 46 47 XL2 .req v8 48 XM2 .req v9 49 XH2 .req v10 50 XL3 .req v11 51 XM3 .req v12 52 XH3 .req v13 53 TT3 .req v14 54 TT4 .req v15 55 HH .req v16 56 HH3 .req v17 57 HH4 .req v18 58 HH34 .req v19 59 60 .text 61 .arch armv8-a+crypto 62 63 .macro __pmull_p64, rd, rn, r 64 pmull \rd\().1q, \rn\().1d, 65 .endm 66 67 .macro __pmull2_p64, rd, rn, 68 pmull2 \rd\().1q, \rn\().2d, 69 .endm 70 71 .macro __pmull_p8, rq, ad, bd 72 ext t3.8b, \ad\().8b, \ad\ 73 ext t5.8b, \ad\().8b, \ad\ 74 ext t7.8b, \ad\().8b, \ad\ 75 76 __pmull_p8_\bd \rq, \ad 77 .endm 78 79 .macro __pmull2_p8, rq, ad, b 80 tbl t3.16b, {\ad\().16b}, 81 tbl t5.16b, {\ad\().16b}, 82 tbl t7.16b, {\ad\().16b}, 83 84 __pmull2_p8_\bd \rq, \ad 85 .endm 86 87 .macro __pmull_p8_SHASH, rq, 88 __pmull_p8_tail \rq, \ad\().8b, SHASH. 89 .endm 90 91 .macro __pmull_p8_SHASH2, rq, 92 __pmull_p8_tail \rq, \ad\().8b, SHASH2 93 .endm 94 95 .macro __pmull2_p8_SHASH, rq, 96 __pmull_p8_tail \rq, \ad\().16b, SHASH 97 .endm 98 99 .macro __pmull_p8_tail, rq, a 100 pmull\t t3.8h, t3.\nb, \bd 101 pmull\t t4.8h, \ad, \b1\().\nb 102 pmull\t t5.8h, t5.\nb, \bd 103 pmull\t t6.8h, \ad, \b2\().\nb 104 pmull\t t7.8h, t7.\nb, \bd 105 pmull\t t8.8h, \ad, \b3\().\nb 106 pmull\t t9.8h, \ad, \b4\().\nb 107 pmull\t \rq\().8h, \ad, \bd 108 109 eor t3.16b, t3.16b, t4.16b 110 eor t5.16b, t5.16b, t6.16b 111 eor t7.16b, t7.16b, t8.16b 112 113 uzp1 t4.2d, t3.2d, t5.2d 114 uzp2 t3.2d, t3.2d, t5.2d 115 uzp1 t6.2d, t7.2d, t9.2d 116 uzp2 t7.2d, t7.2d, t9.2d 117 118 // t3 = (L) (P0 + P1) << 8 119 // t5 = (M) (P2 + P3) << 16 120 eor t4.16b, t4.16b, t3.16b 121 and t3.16b, t3.16b, k32_48 122 123 // t7 = (N) (P4 + P5) << 24 124 // t9 = (K) (P6 + P7) << 32 125 eor t6.16b, t6.16b, t7.16b 126 and t7.16b, t7.16b, k00_16 127 128 eor t4.16b, t4.16b, t3.16b 129 eor t6.16b, t6.16b, t7.16b 130 131 zip2 t5.2d, t4.2d, t3.2d 132 zip1 t3.2d, t4.2d, t3.2d 133 zip2 t9.2d, t6.2d, t7.2d 134 zip1 t7.2d, t6.2d, t7.2d 135 136 ext t3.16b, t3.16b, t3.16b 137 ext t5.16b, t5.16b, t5.16b 138 ext t7.16b, t7.16b, t7.16b 139 ext t9.16b, t9.16b, t9.16b 140 141 eor t3.16b, t3.16b, t5.16b 142 eor t7.16b, t7.16b, t9.16b 143 eor \rq\().16b, \rq\().16b 144 eor \rq\().16b, \rq\().16b 145 .endm 146 147 .macro __pmull_pre_p64 148 add x8, x3, #16 149 ld1 {HH.2d-HH4.2d}, [x8] 150 151 trn1 SHASH2.2d, SHASH.2d, H 152 trn2 T1.2d, SHASH.2d, HH.2d 153 eor SHASH2.16b, SHASH2.16b 154 155 trn1 HH34.2d, HH3.2d, HH4.2 156 trn2 T1.2d, HH3.2d, HH4.2d 157 eor HH34.16b, HH34.16b, T1 158 159 movi MASK.16b, #0xe1 160 shl MASK.2d, MASK.2d, #57 161 .endm 162 163 .macro __pmull_pre_p8 164 ext SHASH2.16b, SHASH.16b, 165 eor SHASH2.16b, SHASH2.16b 166 167 // k00_16 := 0x0000000000000000_000000 168 // k32_48 := 0x00000000ffffffff_0000ff 169 movi k32_48.2d, #0xffffffff 170 mov k32_48.h[2], k32_48.h[ 171 ushr k00_16.2d, k32_48.2d, 172 173 // prepare the permutation vectors 174 mov_q x5, 0x080f0e0d0c0b0a09 175 movi T1.8b, #8 176 dup perm1.2d, x5 177 eor perm1.16b, perm1.16b, 178 ushr perm2.2d, perm1.2d, #8 179 ushr perm3.2d, perm1.2d, #1 180 ushr T1.2d, perm1.2d, #24 181 sli perm2.2d, perm1.2d, #5 182 sli perm3.2d, perm1.2d, #4 183 sli T1.2d, perm1.2d, #40 184 185 // precompute loop invariants 186 tbl sh1.16b, {SHASH.16b}, 187 tbl sh2.16b, {SHASH.16b}, 188 tbl sh3.16b, {SHASH.16b}, 189 tbl sh4.16b, {SHASH.16b}, 190 ext ss1.8b, SHASH2.8b, SHA 191 ext ss2.8b, SHASH2.8b, SHA 192 ext ss3.8b, SHASH2.8b, SHA 193 ext ss4.8b, SHASH2.8b, SHA 194 .endm 195 196 // 197 // PMULL (64x64->128) based reduction 198 // it in a single instruction. 199 // 200 .macro __pmull_reduce_p64 201 pmull T2.1q, XL.1d, MASK.1d 202 eor XM.16b, XM.16b, T1.16b 203 204 mov XH.d[0], XM.d[1] 205 mov XM.d[1], XL.d[0] 206 207 eor XL.16b, XM.16b, T2.16b 208 ext T2.16b, XL.16b, XL.16b 209 pmull XL.1q, XL.1d, MASK.1d 210 .endm 211 212 // 213 // Alternative reduction for CPUs that 214 // 64x64->128 PMULL instruction 215 // 216 .macro __pmull_reduce_p8 217 eor XM.16b, XM.16b, T1.16b 218 219 mov XL.d[1], XM.d[0] 220 mov XH.d[0], XM.d[1] 221 222 shl T1.2d, XL.2d, #57 223 shl T2.2d, XL.2d, #62 224 eor T2.16b, T2.16b, T1.16b 225 shl T1.2d, XL.2d, #63 226 eor T2.16b, T2.16b, T1.16b 227 ext T1.16b, XL.16b, XH.16b 228 eor T2.16b, T2.16b, T1.16b 229 230 mov XL.d[1], T2.d[0] 231 mov XH.d[0], T2.d[1] 232 233 ushr T2.2d, XL.2d, #1 234 eor XH.16b, XH.16b, XL.16b 235 eor XL.16b, XL.16b, T2.16b 236 ushr T2.2d, T2.2d, #6 237 ushr XL.2d, XL.2d, #1 238 .endm 239 240 .macro __pmull_ghash, pn 241 ld1 {SHASH.2d}, [x3] 242 ld1 {XL.2d}, [x1] 243 244 __pmull_pre_\pn 245 246 /* do the head block first, if supplie 247 cbz x4, 0f 248 ld1 {T1.2d}, [x4] 249 mov x4, xzr 250 b 3f 251 252 0: .ifc \pn, p64 253 tbnz w0, #0, 2f 254 tbnz w0, #1, 2f 255 256 1: ld1 {XM3.16b-TT4.16b}, [x2 257 258 sub w0, w0, #4 259 260 rev64 T1.16b, XM3.16b 261 rev64 T2.16b, XH3.16b 262 rev64 TT4.16b, TT4.16b 263 rev64 TT3.16b, TT3.16b 264 265 ext IN1.16b, TT4.16b, TT4. 266 ext XL3.16b, TT3.16b, TT3. 267 268 eor TT4.16b, TT4.16b, IN1. 269 pmull2 XH2.1q, SHASH.2d, IN1. 270 pmull XL2.1q, SHASH.1d, IN1. 271 pmull XM2.1q, SHASH2.1d, TT4 272 273 eor TT3.16b, TT3.16b, XL3. 274 pmull2 XH3.1q, HH.2d, XL3.2d 275 pmull XL3.1q, HH.1d, XL3.1d 276 pmull2 XM3.1q, SHASH2.2d, TT3 277 278 ext IN1.16b, T2.16b, T2.16 279 eor XL2.16b, XL2.16b, XL3. 280 eor XH2.16b, XH2.16b, XH3. 281 eor XM2.16b, XM2.16b, XM3. 282 283 eor T2.16b, T2.16b, IN1.16 284 pmull2 XH3.1q, HH3.2d, IN1.2d 285 pmull XL3.1q, HH3.1d, IN1.1d 286 pmull XM3.1q, HH34.1d, T2.1d 287 288 eor XL2.16b, XL2.16b, XL3. 289 eor XH2.16b, XH2.16b, XH3. 290 eor XM2.16b, XM2.16b, XM3. 291 292 ext IN1.16b, T1.16b, T1.16 293 ext TT3.16b, XL.16b, XL.16 294 eor XL.16b, XL.16b, IN1.16 295 eor T1.16b, T1.16b, TT3.16 296 297 pmull2 XH.1q, HH4.2d, XL.2d 298 eor T1.16b, T1.16b, XL.16b 299 pmull XL.1q, HH4.1d, XL.1d 300 pmull2 XM.1q, HH34.2d, T1.2d 301 302 eor XL.16b, XL.16b, XL2.16 303 eor XH.16b, XH.16b, XH2.16 304 eor XM.16b, XM.16b, XM2.16 305 306 eor T2.16b, XL.16b, XH.16b 307 ext T1.16b, XL.16b, XH.16b 308 eor XM.16b, XM.16b, T2.16b 309 310 __pmull_reduce_p64 311 312 eor T2.16b, T2.16b, XH.16b 313 eor XL.16b, XL.16b, T2.16b 314 315 cbz w0, 5f 316 b 1b 317 .endif 318 319 2: ld1 {T1.2d}, [x2], #16 320 sub w0, w0, #1 321 322 3: /* multiply XL by SHASH in GF(2^128) * 323 CPU_LE( rev64 T1.16b, T1.16b ) 324 325 ext T2.16b, XL.16b, XL.16b 326 ext IN1.16b, T1.16b, T1.16 327 eor T1.16b, T1.16b, T2.16b 328 eor XL.16b, XL.16b, IN1.16 329 330 __pmull2_\pn XH, XL, SHASH 331 eor T1.16b, T1.16b, XL.16b 332 __pmull_\pn XL, XL, SHASH 333 __pmull_\pn XM, T1, SHASH2 334 335 4: eor T2.16b, XL.16b, XH.16b 336 ext T1.16b, XL.16b, XH.16b 337 eor XM.16b, XM.16b, T2.16b 338 339 __pmull_reduce_\pn 340 341 eor T2.16b, T2.16b, XH.16b 342 eor XL.16b, XL.16b, T2.16b 343 344 cbnz w0, 0b 345 346 5: st1 {XL.2d}, [x1] 347 ret 348 .endm 349 350 /* 351 * void pmull_ghash_update(int blocks, 352 * struct ghas 353 */ 354 SYM_TYPED_FUNC_START(pmull_ghash_update_p64) 355 __pmull_ghash p64 356 SYM_FUNC_END(pmull_ghash_update_p64) 357 358 SYM_TYPED_FUNC_START(pmull_ghash_update_p8) 359 __pmull_ghash p8 360 SYM_FUNC_END(pmull_ghash_update_p8) 361 362 KS0 .req v8 363 KS1 .req v9 364 KS2 .req v10 365 KS3 .req v11 366 367 INP0 .req v21 368 INP1 .req v22 369 INP2 .req v23 370 INP3 .req v24 371 372 K0 .req v25 373 K1 .req v26 374 K2 .req v27 375 K3 .req v28 376 K4 .req v12 377 K5 .req v13 378 K6 .req v4 379 K7 .req v5 380 K8 .req v14 381 K9 .req v15 382 KK .req v29 383 KL .req v30 384 KM .req v31 385 386 .macro load_round_keys, round 387 add \tmp, \rk, #64 388 ld1 {K0.4s-K3.4s}, [\rk] 389 ld1 {K4.4s-K5.4s}, [\tmp] 390 add \tmp, \rk, \rounds, ls 391 sub \tmp, \tmp, #32 392 ld1 {KK.4s-KM.4s}, [\tmp] 393 .endm 394 395 .macro enc_round, state, key 396 aese \state\().16b, \key\() 397 aesmc \state\().16b, \state\ 398 .endm 399 400 .macro enc_qround, s0, s1, s2 401 enc_round \s0, \key 402 enc_round \s1, \key 403 enc_round \s2, \key 404 enc_round \s3, \key 405 .endm 406 407 .macro enc_block, state, roun 408 add \tmp, \rk, #96 409 ld1 {K6.4s-K7.4s}, [\tmp], 410 .irp key, K0, K1, K2, K3, K 411 enc_round \state, \key 412 .endr 413 414 tbnz \rounds, #2, .Lnot128_ 415 .Lout256_\@: 416 enc_round \state, K6 417 enc_round \state, K7 418 419 .Lout192_\@: 420 enc_round \state, KK 421 aese \state\().16b, KL.16b 422 eor \state\().16b, \state\ 423 424 .subsection 1 425 .Lnot128_\@: 426 ld1 {K8.4s-K9.4s}, [\tmp], 427 enc_round \state, K6 428 enc_round \state, K7 429 ld1 {K6.4s-K7.4s}, [\tmp] 430 enc_round \state, K8 431 enc_round \state, K9 432 tbz \rounds, #1, .Lout192_ 433 b .Lout256_\@ 434 .previous 435 .endm 436 437 .align 6 438 .macro pmull_gcm_do_crypt, en 439 frame_push 1 440 441 load_round_keys x7, x6, x8 442 443 ld1 {SHASH.2d}, [x3], #16 444 ld1 {HH.2d-HH4.2d}, [x3] 445 446 trn1 SHASH2.2d, SHASH.2d, H 447 trn2 T1.2d, SHASH.2d, HH.2d 448 eor SHASH2.16b, SHASH2.16b 449 450 trn1 HH34.2d, HH3.2d, HH4.2 451 trn2 T1.2d, HH3.2d, HH4.2d 452 eor HH34.16b, HH34.16b, T1 453 454 ld1 {XL.2d}, [x4] 455 456 cbz x0, 3f 457 458 ldr w8, [x5, #12] 459 CPU_LE( rev w8, w8 ) 460 461 0: mov w9, #4 462 add x10, x0, #0xf 463 lsr x10, x10, #4 464 465 subs x0, x0, #64 466 csel w9, w10, w9, mi 467 add w8, w8, w9 468 469 bmi 1f 470 ld1 {INP0.16b-INP3.16b}, [ 471 .subsection 1 472 /* 473 * Populate the four input registers r 474 * of data, using overlapping loads to 475 * 476 * INP0 INP1 IN 477 * 1 byte | | | 478 * 16 bytes | | | 479 * 17 bytes | | |xxxx 480 * 47 bytes | |xxxxxxxx|xxxx 481 * etc etc 482 * 483 * Note that this code may read up to 484 * the input. It is up to the calling 485 * this happens in the first iteration 486 * input size is < 16 bytes) 487 */ 488 1: mov x15, #16 489 ands x19, x0, #0xf 490 csel x19, x19, x15, ne 491 adr_l x17, .Lpermute_table + 492 493 sub x11, x15, x19 494 add x12, x17, x11 495 sub x17, x17, x11 496 ld1 {T1.16b}, [x12] 497 sub x10, x1, x11 498 sub x11, x2, x11 499 500 cmp x0, #-16 501 csel x14, x15, xzr, gt 502 cmp x0, #-32 503 csel x15, x15, xzr, gt 504 cmp x0, #-48 505 csel x16, x19, xzr, gt 506 csel x1, x1, x10, gt 507 csel x2, x2, x11, gt 508 509 ld1 {INP0.16b}, [x2], x14 510 ld1 {INP1.16b}, [x2], x15 511 ld1 {INP2.16b}, [x2], x16 512 ld1 {INP3.16b}, [x2] 513 tbl INP3.16b, {INP3.16b}, 514 b 2f 515 .previous 516 517 2: .if \enc == 0 518 bl pmull_gcm_ghash_4x 519 .endif 520 521 bl pmull_gcm_enc_4x 522 523 tbnz x0, #63, 6f 524 st1 {INP0.16b-INP3.16b}, [ 525 .if \enc == 1 526 bl pmull_gcm_ghash_4x 527 .endif 528 bne 0b 529 530 3: ldr x10, [sp, #.Lframe_loc 531 cbz x10, 5f 532 533 ld1 {INP3.16b}, [x10] 534 mov w9, #1 535 bl pmull_gcm_ghash_4x 536 537 mov w11, #(0x1 << 24) 538 ld1 {KS0.16b}, [x5] 539 mov KS0.s[3], w11 540 541 enc_block KS0, x7, x6, x12 542 543 ext XL.16b, XL.16b, XL.16b 544 rev64 XL.16b, XL.16b 545 eor XL.16b, XL.16b, KS0.16 546 547 .if \enc == 1 548 st1 {XL.16b}, [x10] 549 .else 550 ldp x11, x12, [sp, #40] 551 adr_l x17, .Lpermute_table 552 ld1 {KS0.16b}, [x11] 553 add x17, x17, x12 554 ld1 {KS1.16b}, [x17] 555 556 cmeq XL.16b, XL.16b, KS0.16 557 mvn XL.16b, XL.16b 558 tbl XL.16b, {XL.16b}, KS1. 559 sminv b0, XL.16b 560 smov w0, v0.b[0] 561 .endif 562 563 4: frame_pop 564 ret 565 566 5: 567 CPU_LE( rev w8, w8 ) 568 str w8, [x5, #12] 569 st1 {XL.2d}, [x4] 570 b 4b 571 572 6: ld1 {T1.16b-T2.16b}, [x17] 573 sub x17, x17, x19, lsl #1 574 575 cmp w9, #1 576 beq 7f 577 .subsection 1 578 7: ld1 {INP2.16b}, [x1] 579 tbx INP2.16b, {INP3.16b}, 580 mov INP3.16b, INP2.16b 581 b 8f 582 .previous 583 584 st1 {INP0.16b}, [x1], x14 585 st1 {INP1.16b}, [x1], x15 586 st1 {INP2.16b}, [x1], x16 587 tbl INP3.16b, {INP3.16b}, 588 tbx INP3.16b, {INP2.16b}, 589 8: st1 {INP3.16b}, [x1] 590 591 .if \enc == 1 592 ld1 {T1.16b}, [x17] 593 tbl INP3.16b, {INP3.16b}, 594 bl pmull_gcm_ghash_4x 595 .endif 596 b 3b 597 .endm 598 599 /* 600 * void pmull_gcm_encrypt(int blocks, 601 * struct ghash 602 * int rounds, 603 */ 604 SYM_FUNC_START(pmull_gcm_encrypt) 605 pmull_gcm_do_crypt 1 606 SYM_FUNC_END(pmull_gcm_encrypt) 607 608 /* 609 * void pmull_gcm_decrypt(int blocks, 610 * struct ghash 611 * int rounds, 612 */ 613 SYM_FUNC_START(pmull_gcm_decrypt) 614 pmull_gcm_do_crypt 0 615 SYM_FUNC_END(pmull_gcm_decrypt) 616 617 SYM_FUNC_START_LOCAL(pmull_gcm_ghash_4x) 618 movi MASK.16b, #0xe1 619 shl MASK.2d, MASK.2d, #57 620 621 rev64 T1.16b, INP0.16b 622 rev64 T2.16b, INP1.16b 623 rev64 TT3.16b, INP2.16b 624 rev64 TT4.16b, INP3.16b 625 626 ext XL.16b, XL.16b, XL.16b 627 628 tbz w9, #2, 0f 629 .subsection 1 630 0: movi XH2.16b, #0 631 movi XM2.16b, #0 632 movi XL2.16b, #0 633 634 tbz w9, #0, 1f 635 tbz w9, #1, 2f 636 637 eor T2.16b, T2.16b, XL.16b 638 ext T1.16b, T2.16b, T2.16b 639 b .Lgh3 640 641 1: eor TT3.16b, TT3.16b, XL.1 642 ext T2.16b, TT3.16b, TT3.1 643 b .Lgh2 644 645 2: eor TT4.16b, TT4.16b, XL.1 646 ext IN1.16b, TT4.16b, TT4. 647 b .Lgh1 648 .previous 649 650 eor T1.16b, T1.16b, XL.16b 651 ext IN1.16b, T1.16b, T1.16 652 653 pmull2 XH2.1q, HH4.2d, IN1.2d 654 eor T1.16b, T1.16b, IN1.16 655 pmull XL2.1q, HH4.1d, IN1.1d 656 pmull2 XM2.1q, HH34.2d, T1.2d 657 658 ext T1.16b, T2.16b, T2.16b 659 .Lgh3: eor T2.16b, T2.16b, T1.16b 660 pmull2 XH.1q, HH3.2d, T1.2d 661 pmull XL.1q, HH3.1d, T1.1d 662 pmull XM.1q, HH34.1d, T2.1d 663 664 eor XH2.16b, XH2.16b, XH.1 665 eor XL2.16b, XL2.16b, XL.1 666 eor XM2.16b, XM2.16b, XM.1 667 668 ext T2.16b, TT3.16b, TT3.1 669 .Lgh2: eor TT3.16b, TT3.16b, T2.1 670 pmull2 XH.1q, HH.2d, T2.2d 671 pmull XL.1q, HH.1d, T2.1d 672 pmull2 XM.1q, SHASH2.2d, TT3. 673 674 eor XH2.16b, XH2.16b, XH.1 675 eor XL2.16b, XL2.16b, XL.1 676 eor XM2.16b, XM2.16b, XM.1 677 678 ext IN1.16b, TT4.16b, TT4. 679 .Lgh1: eor TT4.16b, TT4.16b, IN1. 680 pmull XL.1q, SHASH.1d, IN1.1 681 pmull2 XH.1q, SHASH.2d, IN1.2 682 pmull XM.1q, SHASH2.1d, TT4. 683 684 eor XH.16b, XH.16b, XH2.16 685 eor XL.16b, XL.16b, XL2.16 686 eor XM.16b, XM.16b, XM2.16 687 688 eor T2.16b, XL.16b, XH.16b 689 ext T1.16b, XL.16b, XH.16b 690 eor XM.16b, XM.16b, T2.16b 691 692 __pmull_reduce_p64 693 694 eor T2.16b, T2.16b, XH.16b 695 eor XL.16b, XL.16b, T2.16b 696 697 ret 698 SYM_FUNC_END(pmull_gcm_ghash_4x) 699 700 SYM_FUNC_START_LOCAL(pmull_gcm_enc_4x) 701 ld1 {KS0.16b}, [x5] 702 sub w10, w8, #4 703 sub w11, w8, #3 704 sub w12, w8, #2 705 sub w13, w8, #1 706 rev w10, w10 707 rev w11, w11 708 rev w12, w12 709 rev w13, w13 710 mov KS1.16b, KS0.16b 711 mov KS2.16b, KS0.16b 712 mov KS3.16b, KS0.16b 713 ins KS0.s[3], w10 714 ins KS1.s[3], w11 715 ins KS2.s[3], w12 716 ins KS3.s[3], w13 717 718 add x10, x6, #96 719 ld1 {K6.4s-K7.4s}, [x10], 720 .irp key, K0, K1, K2, K3, K 721 enc_qround KS0, KS1, KS2, KS3, \k 722 .endr 723 724 tbnz x7, #2, .Lnot128 725 .subsection 1 726 .Lnot128: 727 ld1 {K8.4s-K9.4s}, [x10], 728 .irp key, K6, K7 729 enc_qround KS0, KS1, KS2, KS3, \k 730 .endr 731 ld1 {K6.4s-K7.4s}, [x10] 732 .irp key, K8, K9 733 enc_qround KS0, KS1, KS2, KS3, \k 734 .endr 735 tbz x7, #1, .Lout192 736 b .Lout256 737 .previous 738 739 .Lout256: 740 .irp key, K6, K7 741 enc_qround KS0, KS1, KS2, KS3, \k 742 .endr 743 744 .Lout192: 745 enc_qround KS0, KS1, KS2, KS3, KK 746 747 aese KS0.16b, KL.16b 748 aese KS1.16b, KL.16b 749 aese KS2.16b, KL.16b 750 aese KS3.16b, KL.16b 751 752 eor KS0.16b, KS0.16b, KM.1 753 eor KS1.16b, KS1.16b, KM.1 754 eor KS2.16b, KS2.16b, KM.1 755 eor KS3.16b, KS3.16b, KM.1 756 757 eor INP0.16b, INP0.16b, KS 758 eor INP1.16b, INP1.16b, KS 759 eor INP2.16b, INP2.16b, KS 760 eor INP3.16b, INP3.16b, KS 761 762 ret 763 SYM_FUNC_END(pmull_gcm_enc_4x) 764 765 .section ".rodata", "a" 766 .align 6 767 .Lpermute_table: 768 .byte 0xff, 0xff, 0xff, 0xff 769 .byte 0xff, 0xff, 0xff, 0xff 770 .byte 0x0, 0x1, 0x2, 0x3 771 .byte 0x8, 0x9, 0xa, 0xb 772 .byte 0xff, 0xff, 0xff, 0xff 773 .byte 0xff, 0xff, 0xff, 0xff 774 .byte 0x0, 0x1, 0x2, 0x3 775 .byte 0x8, 0x9, 0xa, 0xb 776 .previous
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.