1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * linux/arch/arm64/crypto/aes-modes.S - chain 4 * 5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.bi 6 */ 7 8 /* included by aes-ce.S and aes-neon.S */ 9 10 .text 11 .align 4 12 13 #ifndef MAX_STRIDE 14 #define MAX_STRIDE 4 15 #endif 16 17 #if MAX_STRIDE == 4 18 #define ST4(x...) x 19 #define ST5(x...) 20 #else 21 #define ST4(x...) 22 #define ST5(x...) x 23 #endif 24 25 SYM_FUNC_START_LOCAL(aes_encrypt_block4x) 26 encrypt_block4x v0, v1, v2, v3, w3, x2 27 ret 28 SYM_FUNC_END(aes_encrypt_block4x) 29 30 SYM_FUNC_START_LOCAL(aes_decrypt_block4x) 31 decrypt_block4x v0, v1, v2, v3, w3, x2 32 ret 33 SYM_FUNC_END(aes_decrypt_block4x) 34 35 #if MAX_STRIDE == 5 36 SYM_FUNC_START_LOCAL(aes_encrypt_block5x) 37 encrypt_block5x v0, v1, v2, v3, v4, w3 38 ret 39 SYM_FUNC_END(aes_encrypt_block5x) 40 41 SYM_FUNC_START_LOCAL(aes_decrypt_block5x) 42 decrypt_block5x v0, v1, v2, v3, v4, w3 43 ret 44 SYM_FUNC_END(aes_decrypt_block5x) 45 #endif 46 47 /* 48 * aes_ecb_encrypt(u8 out[], u8 const 49 * int blocks) 50 * aes_ecb_decrypt(u8 out[], u8 const 51 * int blocks) 52 */ 53 54 AES_FUNC_START(aes_ecb_encrypt) 55 frame_push 0 56 57 enc_prepare w3, x2, x5 58 59 .LecbencloopNx: 60 subs w4, w4, #MAX_STRIDE 61 bmi .Lecbenc1x 62 ld1 {v0.16b-v3.16b}, [x1], 63 ST4( bl aes_encrypt_block4x 64 ST5( ld1 {v4.16b}, [x1], #16 65 ST5( bl aes_encrypt_block5x 66 st1 {v0.16b-v3.16b}, [x0], 67 ST5( st1 {v4.16b}, [x0], #16 68 b .LecbencloopNx 69 .Lecbenc1x: 70 adds w4, w4, #MAX_STRIDE 71 beq .Lecbencout 72 .Lecbencloop: 73 ld1 {v0.16b}, [x1], #16 74 encrypt_block v0, w3, x2, x5, w6 75 st1 {v0.16b}, [x0], #16 76 subs w4, w4, #1 77 bne .Lecbencloop 78 .Lecbencout: 79 frame_pop 80 ret 81 AES_FUNC_END(aes_ecb_encrypt) 82 83 84 AES_FUNC_START(aes_ecb_decrypt) 85 frame_push 0 86 87 dec_prepare w3, x2, x5 88 89 .LecbdecloopNx: 90 subs w4, w4, #MAX_STRIDE 91 bmi .Lecbdec1x 92 ld1 {v0.16b-v3.16b}, [x1], 93 ST4( bl aes_decrypt_block4x 94 ST5( ld1 {v4.16b}, [x1], #16 95 ST5( bl aes_decrypt_block5x 96 st1 {v0.16b-v3.16b}, [x0], 97 ST5( st1 {v4.16b}, [x0], #16 98 b .LecbdecloopNx 99 .Lecbdec1x: 100 adds w4, w4, #MAX_STRIDE 101 beq .Lecbdecout 102 .Lecbdecloop: 103 ld1 {v0.16b}, [x1], #16 104 decrypt_block v0, w3, x2, x5, w6 105 st1 {v0.16b}, [x0], #16 106 subs w4, w4, #1 107 bne .Lecbdecloop 108 .Lecbdecout: 109 frame_pop 110 ret 111 AES_FUNC_END(aes_ecb_decrypt) 112 113 114 /* 115 * aes_cbc_encrypt(u8 out[], u8 const 116 * int blocks, u8 iv[] 117 * aes_cbc_decrypt(u8 out[], u8 const 118 * int blocks, u8 iv[] 119 * aes_essiv_cbc_encrypt(u8 out[], u8 120 * int rounds, i 121 * u32 const rk2 122 * aes_essiv_cbc_decrypt(u8 out[], u8 123 * int rounds, i 124 * u32 const rk2 125 */ 126 127 AES_FUNC_START(aes_essiv_cbc_encrypt) 128 ld1 {v4.16b}, [x5] 129 130 mov w8, #14 131 enc_prepare w8, x6, x7 132 encrypt_block v4, w8, x6, x7, w9 133 enc_switch_key w3, x2, x6 134 b .Lcbcencloop4x 135 136 AES_FUNC_START(aes_cbc_encrypt) 137 ld1 {v4.16b}, [x5] 138 enc_prepare w3, x2, x6 139 140 .Lcbcencloop4x: 141 subs w4, w4, #4 142 bmi .Lcbcenc1x 143 ld1 {v0.16b-v3.16b}, [x1], 144 eor v0.16b, v0.16b, v4.16b 145 encrypt_block v0, w3, x2, x6, w7 146 eor v1.16b, v1.16b, v0.16b 147 encrypt_block v1, w3, x2, x6, w7 148 eor v2.16b, v2.16b, v1.16b 149 encrypt_block v2, w3, x2, x6, w7 150 eor v3.16b, v3.16b, v2.16b 151 encrypt_block v3, w3, x2, x6, w7 152 st1 {v0.16b-v3.16b}, [x0], 153 mov v4.16b, v3.16b 154 b .Lcbcencloop4x 155 .Lcbcenc1x: 156 adds w4, w4, #4 157 beq .Lcbcencout 158 .Lcbcencloop: 159 ld1 {v0.16b}, [x1], #16 160 eor v4.16b, v4.16b, v0.16b 161 encrypt_block v4, w3, x2, x6, w7 162 st1 {v4.16b}, [x0], #16 163 subs w4, w4, #1 164 bne .Lcbcencloop 165 .Lcbcencout: 166 st1 {v4.16b}, [x5] 167 ret 168 AES_FUNC_END(aes_cbc_encrypt) 169 AES_FUNC_END(aes_essiv_cbc_encrypt) 170 171 AES_FUNC_START(aes_essiv_cbc_decrypt) 172 ld1 {cbciv.16b}, [x5] 173 174 mov w8, #14 175 enc_prepare w8, x6, x7 176 encrypt_block cbciv, w8, x6, x7, w9 177 b .Lessivcbcdecstart 178 179 AES_FUNC_START(aes_cbc_decrypt) 180 ld1 {cbciv.16b}, [x5] 181 .Lessivcbcdecstart: 182 frame_push 0 183 dec_prepare w3, x2, x6 184 185 .LcbcdecloopNx: 186 subs w4, w4, #MAX_STRIDE 187 bmi .Lcbcdec1x 188 ld1 {v0.16b-v3.16b}, [x1], 189 #if MAX_STRIDE == 5 190 ld1 {v4.16b}, [x1], #16 191 mov v5.16b, v0.16b 192 mov v6.16b, v1.16b 193 mov v7.16b, v2.16b 194 bl aes_decrypt_block5x 195 sub x1, x1, #32 196 eor v0.16b, v0.16b, cbciv. 197 eor v1.16b, v1.16b, v5.16b 198 ld1 {v5.16b}, [x1], #16 199 ld1 {cbciv.16b}, [x1], #16 200 eor v2.16b, v2.16b, v6.16b 201 eor v3.16b, v3.16b, v7.16b 202 eor v4.16b, v4.16b, v5.16b 203 #else 204 mov v4.16b, v0.16b 205 mov v5.16b, v1.16b 206 mov v6.16b, v2.16b 207 bl aes_decrypt_block4x 208 sub x1, x1, #16 209 eor v0.16b, v0.16b, cbciv. 210 eor v1.16b, v1.16b, v4.16b 211 ld1 {cbciv.16b}, [x1], #16 212 eor v2.16b, v2.16b, v5.16b 213 eor v3.16b, v3.16b, v6.16b 214 #endif 215 st1 {v0.16b-v3.16b}, [x0], 216 ST5( st1 {v4.16b}, [x0], #16 217 b .LcbcdecloopNx 218 .Lcbcdec1x: 219 adds w4, w4, #MAX_STRIDE 220 beq .Lcbcdecout 221 .Lcbcdecloop: 222 ld1 {v1.16b}, [x1], #16 223 mov v0.16b, v1.16b 224 decrypt_block v0, w3, x2, x6, w7 225 eor v0.16b, v0.16b, cbciv. 226 mov cbciv.16b, v1.16b 227 st1 {v0.16b}, [x0], #16 228 subs w4, w4, #1 229 bne .Lcbcdecloop 230 .Lcbcdecout: 231 st1 {cbciv.16b}, [x5] 232 frame_pop 233 ret 234 AES_FUNC_END(aes_cbc_decrypt) 235 AES_FUNC_END(aes_essiv_cbc_decrypt) 236 237 238 /* 239 * aes_cbc_cts_encrypt(u8 out[], u8 co 240 * int rounds, int 241 * aes_cbc_cts_decrypt(u8 out[], u8 co 242 * int rounds, int 243 */ 244 245 AES_FUNC_START(aes_cbc_cts_encrypt) 246 adr_l x8, .Lcts_permute_tabl 247 sub x4, x4, #16 248 add x9, x8, #32 249 add x8, x8, x4 250 sub x9, x9, x4 251 ld1 {v3.16b}, [x8] 252 ld1 {v4.16b}, [x9] 253 254 ld1 {v0.16b}, [x1], x4 255 ld1 {v1.16b}, [x1] 256 257 ld1 {v5.16b}, [x5] 258 enc_prepare w3, x2, x6 259 260 eor v0.16b, v0.16b, v5.16b 261 tbl v1.16b, {v1.16b}, v4.1 262 encrypt_block v0, w3, x2, x6, w7 263 264 eor v1.16b, v1.16b, v0.16b 265 tbl v0.16b, {v0.16b}, v3.1 266 encrypt_block v1, w3, x2, x6, w7 267 268 add x4, x0, x4 269 st1 {v0.16b}, [x4] 270 st1 {v1.16b}, [x0] 271 ret 272 AES_FUNC_END(aes_cbc_cts_encrypt) 273 274 AES_FUNC_START(aes_cbc_cts_decrypt) 275 adr_l x8, .Lcts_permute_tabl 276 sub x4, x4, #16 277 add x9, x8, #32 278 add x8, x8, x4 279 sub x9, x9, x4 280 ld1 {v3.16b}, [x8] 281 ld1 {v4.16b}, [x9] 282 283 ld1 {v0.16b}, [x1], x4 284 ld1 {v1.16b}, [x1] 285 286 ld1 {v5.16b}, [x5] 287 dec_prepare w3, x2, x6 288 289 decrypt_block v0, w3, x2, x6, w7 290 tbl v2.16b, {v0.16b}, v3.1 291 eor v2.16b, v2.16b, v1.16b 292 293 tbx v0.16b, {v1.16b}, v4.1 294 decrypt_block v0, w3, x2, x6, w7 295 eor v0.16b, v0.16b, v5.16b 296 297 add x4, x0, x4 298 st1 {v2.16b}, [x4] 299 st1 {v0.16b}, [x0] 300 ret 301 AES_FUNC_END(aes_cbc_cts_decrypt) 302 303 .section ".rodata", "a" 304 .align 6 305 .Lcts_permute_table: 306 .byte 0xff, 0xff, 0xff, 0xff 307 .byte 0xff, 0xff, 0xff, 0xff 308 .byte 0x0, 0x1, 0x2, 0x3 309 .byte 0x8, 0x9, 0xa, 0xb 310 .byte 0xff, 0xff, 0xff, 0xff 311 .byte 0xff, 0xff, 0xff, 0xff 312 .previous 313 314 /* 315 * This macro generates the code for C 316 */ 317 .macro ctr_encrypt xctr 318 // Arguments 319 OUT .req x0 320 IN .req x1 321 KEY .req x2 322 ROUNDS_W .req w3 323 BYTES_W .req w4 324 IV .req x5 325 BYTE_CTR_W .req w6 // XCT 326 // Intermediate values 327 CTR_W .req w11 // XCT 328 CTR .req x11 // XCT 329 IV_PART .req x12 330 BLOCKS .req x13 331 BLOCKS_W .req w13 332 333 frame_push 0 334 335 enc_prepare ROUNDS_W, KEY, IV_PART 336 ld1 {vctr.16b}, [IV] 337 338 /* 339 * Keep 64 bits of the IV in a registe 340 * easily increment the IV. For XCTR 341 * the 64-bit counter with the IV. 342 */ 343 .if \xctr 344 umov IV_PART, vctr. 345 lsr CTR_W, BYTE_CT 346 .else 347 umov IV_PART, vctr. 348 rev IV_PART, IV_PA 349 .endif 350 351 .LctrloopNx\xctr: 352 add BLOCKS_W, BYTES_W, #15 353 sub BYTES_W, BYTES_W, #MAX 354 lsr BLOCKS_W, BLOCKS_W, #4 355 mov w8, #MAX_STRIDE 356 cmp BLOCKS_W, w8 357 csel BLOCKS_W, BLOCKS_W, w8 358 359 /* 360 * Set up the counter values in v0-v{M 361 * 362 * If we are encrypting less than MAX_ 363 * handling code expects the last keys 364 * v{MAX_STRIDE-1}. For example: if e 365 * MAX_STRIDE=5, then v3 and v4 should 366 */ 367 .if \xctr 368 add CTR, CTR, BLOC 369 .else 370 adds IV_PART, IV_PA 371 .endif 372 mov v0.16b, vctr.16b 373 mov v1.16b, vctr.16b 374 mov v2.16b, vctr.16b 375 mov v3.16b, vctr.16b 376 ST5( mov v4.16b, vctr.16b 377 .if \xctr 378 sub x6, CTR, #MAX_ 379 sub x7, CTR, #MAX_ 380 sub x8, CTR, #MAX_ 381 sub x9, CTR, #MAX_ 382 ST5( sub x10, CTR, #MAX 383 eor x6, x6, IV_PAR 384 eor x7, x7, IV_PAR 385 eor x8, x8, IV_PAR 386 eor x9, x9, IV_PAR 387 ST5( eor x10, x10, IV_P 388 mov v0.d[0], x6 389 mov v1.d[0], x7 390 mov v2.d[0], x8 391 mov v3.d[0], x9 392 ST5( mov v4.d[0], x10 393 .else 394 bcs 0f 395 .subsection 1 396 /* 397 * This subsection handles car 398 * 399 * Conditional branching here 400 * invariance since the branch 401 * of the plaintext or key. T 402 * practice anyway. 403 */ 404 405 /* Apply carry to outgoing cou 406 0: umov x8, vctr.d[0] 407 rev x8, x8 408 add x8, x8, #1 409 rev x8, x8 410 ins vctr.d[0], x8 411 412 /* 413 * Apply carry to counter bloc 414 * 415 * Since the carry flag was se 416 * MAX_STRIDE. Using the valu 417 * many counter blocks need to 418 */ 419 cbz IV_PART, 2f 420 adr x16, 1f 421 sub x16, x16, IV_P 422 br x16 423 bti c 424 mov v0.d[0], vctr. 425 bti c 426 mov v1.d[0], vctr. 427 bti c 428 mov v2.d[0], vctr. 429 bti c 430 mov v3.d[0], vctr. 431 ST5( bti c 432 ST5( mov v4.d[0], vctr. 433 1: b 2f 434 .previous 435 436 2: rev x7, IV_PART 437 ins vctr.d[1], x7 438 sub x7, IV_PART, # 439 sub x8, IV_PART, # 440 sub x9, IV_PART, # 441 rev x7, x7 442 rev x8, x8 443 mov v1.d[1], x7 444 rev x9, x9 445 ST5( sub x10, IV_PART, 446 mov v2.d[1], x8 447 ST5( rev x10, x10 448 mov v3.d[1], x9 449 ST5( mov v4.d[1], x10 450 .endif 451 452 /* 453 * If there are at least MAX_STRIDE bl 454 * keystream and store. Otherwise jum 455 */ 456 tbnz BYTES_W, #31, .Lctrtai 457 ld1 {v5.16b-v7.16b}, [IN], 458 ST4( bl aes_encrypt_block4x 459 ST5( bl aes_encrypt_block5x 460 eor v0.16b, v5.16b, v0.16b 461 ST4( ld1 {v5.16b}, [IN], #16 462 eor v1.16b, v6.16b, v1.16b 463 ST5( ld1 {v5.16b-v6.16b}, [IN], 464 eor v2.16b, v7.16b, v2.16b 465 eor v3.16b, v5.16b, v3.16b 466 ST5( eor v4.16b, v6.16b, v4.16b 467 st1 {v0.16b-v3.16b}, [OUT] 468 ST5( st1 {v4.16b}, [OUT], #16 469 cbz BYTES_W, .Lctrout\xctr 470 b .LctrloopNx\xctr 471 472 .Lctrout\xctr: 473 .if !\xctr 474 st1 {vctr.16b}, [I 475 .endif 476 frame_pop 477 ret 478 479 .Lctrtail\xctr: 480 /* 481 * Handle up to MAX_STRIDE * 16 - 1 by 482 * 483 * This code expects the last keystrea 484 * For example: if encrypting two bloc 485 * v4 should have the next two counter 486 * 487 * This allows us to store the ciphert 488 * regions of memory. Any invalid cip 489 * correctly computed blocks. This ap 490 * logic for storing the ciphertext. 491 */ 492 mov x16, #16 493 ands w7, BYTES_W, #0xf 494 csel x13, x7, x16, ne 495 496 ST5( cmp BYTES_W, #64 - (MAX_ST 497 ST5( csel x14, x16, xzr, gt 498 cmp BYTES_W, #48 - (MAX_ST 499 csel x15, x16, xzr, gt 500 cmp BYTES_W, #32 - (MAX_ST 501 csel x16, x16, xzr, gt 502 cmp BYTES_W, #16 - (MAX_ST 503 504 adr_l x9, .Lcts_permute_tabl 505 add x9, x9, x13 506 ble .Lctrtail1x\xctr 507 508 ST5( ld1 {v5.16b}, [IN], x14 509 ld1 {v6.16b}, [IN], x15 510 ld1 {v7.16b}, [IN], x16 511 512 ST4( bl aes_encrypt_block4x 513 ST5( bl aes_encrypt_block5x 514 515 ld1 {v8.16b}, [IN], x13 516 ld1 {v9.16b}, [IN] 517 ld1 {v10.16b}, [x9] 518 519 ST4( eor v6.16b, v6.16b, v0.16b 520 ST4( eor v7.16b, v7.16b, v1.16b 521 ST4( tbl v3.16b, {v3.16b}, v10. 522 ST4( eor v8.16b, v8.16b, v2.16b 523 ST4( eor v9.16b, v9.16b, v3.16b 524 525 ST5( eor v5.16b, v5.16b, v0.16b 526 ST5( eor v6.16b, v6.16b, v1.16b 527 ST5( tbl v4.16b, {v4.16b}, v10. 528 ST5( eor v7.16b, v7.16b, v2.16b 529 ST5( eor v8.16b, v8.16b, v3.16b 530 ST5( eor v9.16b, v9.16b, v4.16b 531 532 ST5( st1 {v5.16b}, [OUT], x14 533 st1 {v6.16b}, [OUT], x15 534 st1 {v7.16b}, [OUT], x16 535 add x13, x13, OUT 536 st1 {v9.16b}, [x13] 537 st1 {v8.16b}, [OUT] 538 b .Lctrout\xctr 539 540 .Lctrtail1x\xctr: 541 /* 542 * Handle <= 16 bytes of plaintext 543 * 544 * This code always reads and writes 1 545 * accesses, XCTR and CTR modes must u 546 * encrypting/decrypting less than 16 547 * 548 * This code is unusual in that it loa 549 * relative to the end of the buffers 550 * This causes unusual behaviour when 551 * bytes; the end of the data is expec 552 * temporary buffer rather than the st 553 * of the temporary buffer. 554 */ 555 sub x8, x7, #16 556 csel x7, x7, x8, eq 557 add IN, IN, x7 558 add OUT, OUT, x7 559 ld1 {v5.16b}, [IN] 560 ld1 {v6.16b}, [OUT] 561 ST5( mov v3.16b, v4.16b 562 encrypt_block v3, ROUNDS_W, KEY, x8, 563 ld1 {v10.16b-v11.16b}, [x9 564 tbl v3.16b, {v3.16b}, v10. 565 sshr v11.16b, v11.16b, #7 566 eor v5.16b, v5.16b, v3.16b 567 bif v5.16b, v6.16b, v11.16 568 st1 {v5.16b}, [OUT] 569 b .Lctrout\xctr 570 571 // Arguments 572 .unreq OUT 573 .unreq IN 574 .unreq KEY 575 .unreq ROUNDS_W 576 .unreq BYTES_W 577 .unreq IV 578 .unreq BYTE_CTR_W // XCTR only 579 // Intermediate values 580 .unreq CTR_W // XCTR only 581 .unreq CTR // XCTR only 582 .unreq IV_PART 583 .unreq BLOCKS 584 .unreq BLOCKS_W 585 .endm 586 587 /* 588 * aes_ctr_encrypt(u8 out[], u8 const 589 * int bytes, u8 ctr[] 590 * 591 * The input and output buffers must a 592 * encrypting/decrypting less than 16 593 * accesses will occur. The data to b 594 * to be at the end of this 16-byte te 595 * start. 596 */ 597 598 AES_FUNC_START(aes_ctr_encrypt) 599 ctr_encrypt 0 600 AES_FUNC_END(aes_ctr_encrypt) 601 602 /* 603 * aes_xctr_encrypt(u8 out[], u8 const 604 * int bytes, u8 const 605 * 606 * The input and output buffers must a 607 * encrypting/decrypting less than 16 608 * accesses will occur. The data to b 609 * to be at the end of this 16-byte te 610 * start. 611 */ 612 613 AES_FUNC_START(aes_xctr_encrypt) 614 ctr_encrypt 1 615 AES_FUNC_END(aes_xctr_encrypt) 616 617 618 /* 619 * aes_xts_encrypt(u8 out[], u8 const 620 * int bytes, u8 const 621 * aes_xts_decrypt(u8 out[], u8 const 622 * int bytes, u8 const 623 */ 624 625 .macro next_tweak, out, in, t 626 sshr \tmp\().2d, \in\().2d 627 and \tmp\().16b, \tmp\().1 628 add \out\().2d, \in\().2d 629 ext \tmp\().16b, \tmp\().1 630 eor \out\().16b, \out\().1 631 .endm 632 633 .macro xts_load_mask, tmp 634 movi xtsmask.2s, #0x1 635 movi \tmp\().2s, #0x87 636 uzp1 xtsmask.4s, xtsmask.4s 637 .endm 638 639 AES_FUNC_START(aes_xts_encrypt) 640 frame_push 0 641 642 ld1 {v4.16b}, [x6] 643 xts_load_mask v8 644 cbz w7, .Lxtsencnotfirst 645 646 enc_prepare w3, x5, x8 647 xts_cts_skip_tw w7, .LxtsencNx 648 encrypt_block v4, w3, x5, x8, w7 649 enc_switch_key w3, x2, x8 650 b .LxtsencNx 651 652 .Lxtsencnotfirst: 653 enc_prepare w3, x2, x8 654 .LxtsencloopNx: 655 next_tweak v4, v4, v8 656 .LxtsencNx: 657 subs w4, w4, #64 658 bmi .Lxtsenc1x 659 ld1 {v0.16b-v3.16b}, [x1], 660 next_tweak v5, v4, v8 661 eor v0.16b, v0.16b, v4.16b 662 next_tweak v6, v5, v8 663 eor v1.16b, v1.16b, v5.16b 664 eor v2.16b, v2.16b, v6.16b 665 next_tweak v7, v6, v8 666 eor v3.16b, v3.16b, v7.16b 667 bl aes_encrypt_block4x 668 eor v3.16b, v3.16b, v7.16b 669 eor v0.16b, v0.16b, v4.16b 670 eor v1.16b, v1.16b, v5.16b 671 eor v2.16b, v2.16b, v6.16b 672 st1 {v0.16b-v3.16b}, [x0], 673 mov v4.16b, v7.16b 674 cbz w4, .Lxtsencret 675 xts_reload_mask v8 676 b .LxtsencloopNx 677 .Lxtsenc1x: 678 adds w4, w4, #64 679 beq .Lxtsencout 680 subs w4, w4, #16 681 bmi .LxtsencctsNx 682 .Lxtsencloop: 683 ld1 {v0.16b}, [x1], #16 684 .Lxtsencctsout: 685 eor v0.16b, v0.16b, v4.16b 686 encrypt_block v0, w3, x2, x8, w7 687 eor v0.16b, v0.16b, v4.16b 688 cbz w4, .Lxtsencout 689 subs w4, w4, #16 690 next_tweak v4, v4, v8 691 bmi .Lxtsenccts 692 st1 {v0.16b}, [x0], #16 693 b .Lxtsencloop 694 .Lxtsencout: 695 st1 {v0.16b}, [x0] 696 .Lxtsencret: 697 st1 {v4.16b}, [x6] 698 frame_pop 699 ret 700 701 .LxtsencctsNx: 702 mov v0.16b, v3.16b 703 sub x0, x0, #16 704 .Lxtsenccts: 705 adr_l x8, .Lcts_permute_tabl 706 707 add x1, x1, w4, sxtw 708 add w4, w4, #16 709 add x9, x8, #32 710 add x8, x8, x4 711 sub x9, x9, x4 712 add x4, x0, x4 713 714 ld1 {v1.16b}, [x1] 715 ld1 {v2.16b}, [x8] 716 ld1 {v3.16b}, [x9] 717 718 tbl v2.16b, {v0.16b}, v2.1 719 tbx v0.16b, {v1.16b}, v3.1 720 st1 {v2.16b}, [x4] 721 mov w4, wzr 722 b .Lxtsencctsout 723 AES_FUNC_END(aes_xts_encrypt) 724 725 AES_FUNC_START(aes_xts_decrypt) 726 frame_push 0 727 728 /* subtract 16 bytes if we are doing C 729 sub w8, w4, #0x10 730 tst w4, #0xf 731 csel w4, w4, w8, eq 732 733 ld1 {v4.16b}, [x6] 734 xts_load_mask v8 735 xts_cts_skip_tw w7, .Lxtsdecskiptw 736 cbz w7, .Lxtsdecnotfirst 737 738 enc_prepare w3, x5, x8 739 encrypt_block v4, w3, x5, x8, w7 740 .Lxtsdecskiptw: 741 dec_prepare w3, x2, x8 742 b .LxtsdecNx 743 744 .Lxtsdecnotfirst: 745 dec_prepare w3, x2, x8 746 .LxtsdecloopNx: 747 next_tweak v4, v4, v8 748 .LxtsdecNx: 749 subs w4, w4, #64 750 bmi .Lxtsdec1x 751 ld1 {v0.16b-v3.16b}, [x1], 752 next_tweak v5, v4, v8 753 eor v0.16b, v0.16b, v4.16b 754 next_tweak v6, v5, v8 755 eor v1.16b, v1.16b, v5.16b 756 eor v2.16b, v2.16b, v6.16b 757 next_tweak v7, v6, v8 758 eor v3.16b, v3.16b, v7.16b 759 bl aes_decrypt_block4x 760 eor v3.16b, v3.16b, v7.16b 761 eor v0.16b, v0.16b, v4.16b 762 eor v1.16b, v1.16b, v5.16b 763 eor v2.16b, v2.16b, v6.16b 764 st1 {v0.16b-v3.16b}, [x0], 765 mov v4.16b, v7.16b 766 cbz w4, .Lxtsdecout 767 xts_reload_mask v8 768 b .LxtsdecloopNx 769 .Lxtsdec1x: 770 adds w4, w4, #64 771 beq .Lxtsdecout 772 subs w4, w4, #16 773 .Lxtsdecloop: 774 ld1 {v0.16b}, [x1], #16 775 bmi .Lxtsdeccts 776 .Lxtsdecctsout: 777 eor v0.16b, v0.16b, v4.16b 778 decrypt_block v0, w3, x2, x8, w7 779 eor v0.16b, v0.16b, v4.16b 780 st1 {v0.16b}, [x0], #16 781 cbz w4, .Lxtsdecout 782 subs w4, w4, #16 783 next_tweak v4, v4, v8 784 b .Lxtsdecloop 785 .Lxtsdecout: 786 st1 {v4.16b}, [x6] 787 frame_pop 788 ret 789 790 .Lxtsdeccts: 791 adr_l x8, .Lcts_permute_tabl 792 793 add x1, x1, w4, sxtw 794 add w4, w4, #16 795 add x9, x8, #32 796 add x8, x8, x4 797 sub x9, x9, x4 798 add x4, x0, x4 799 800 next_tweak v5, v4, v8 801 802 ld1 {v1.16b}, [x1] 803 ld1 {v2.16b}, [x8] 804 ld1 {v3.16b}, [x9] 805 806 eor v0.16b, v0.16b, v5.16b 807 decrypt_block v0, w3, x2, x8, w7 808 eor v0.16b, v0.16b, v5.16b 809 810 tbl v2.16b, {v0.16b}, v2.1 811 tbx v0.16b, {v1.16b}, v3.1 812 813 st1 {v2.16b}, [x4] 814 mov w4, wzr 815 b .Lxtsdecctsout 816 AES_FUNC_END(aes_xts_decrypt) 817 818 /* 819 * aes_mac_update(u8 const in[], u32 c 820 * int blocks, u8 dg[], 821 */ 822 AES_FUNC_START(aes_mac_update) 823 ld1 {v0.16b}, [x4] 824 enc_prepare w2, x1, x7 825 cbz w5, .Lmacloop4x 826 827 encrypt_block v0, w2, x1, x7, w8 828 829 .Lmacloop4x: 830 subs w3, w3, #4 831 bmi .Lmac1x 832 ld1 {v1.16b-v4.16b}, [x0], 833 eor v0.16b, v0.16b, v1.16b 834 encrypt_block v0, w2, x1, x7, w8 835 eor v0.16b, v0.16b, v2.16b 836 encrypt_block v0, w2, x1, x7, w8 837 eor v0.16b, v0.16b, v3.16b 838 encrypt_block v0, w2, x1, x7, w8 839 eor v0.16b, v0.16b, v4.16b 840 cmp w3, wzr 841 csinv x5, x6, xzr, eq 842 cbz w5, .Lmacout 843 encrypt_block v0, w2, x1, x7, w8 844 st1 {v0.16b}, [x4] 845 cond_yield .Lmacout, x7, x8 846 b .Lmacloop4x 847 .Lmac1x: 848 add w3, w3, #4 849 .Lmacloop: 850 cbz w3, .Lmacout 851 ld1 {v1.16b}, [x0], #16 852 eor v0.16b, v0.16b, v1.16b 853 854 subs w3, w3, #1 855 csinv x5, x6, xzr, eq 856 cbz w5, .Lmacout 857 858 .Lmacenc: 859 encrypt_block v0, w2, x1, x7, w8 860 b .Lmacloop 861 862 .Lmacout: 863 st1 {v0.16b}, [x4] 864 mov w0, w3 865 ret 866 AES_FUNC_END(aes_mac_update)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.