1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * linux/arch/arm64/crypto/aes-neon.S - AES ci 4 * 5 * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.b 6 */ 7 8 #include <linux/linkage.h> 9 #include <asm/assembler.h> 10 11 #define AES_FUNC_START(func) SYM_FU 12 #define AES_FUNC_END(func) SYM_FU 13 14 xtsmask .req v7 15 cbciv .req v7 16 vctr .req v4 17 18 .macro xts_reload_mask, tmp 19 xts_load_mask \tmp 20 .endm 21 22 /* special case for the neon-bs driver 23 .macro xts_cts_skip_tw, reg, 24 tbnz \reg, #1, \lbl 25 .endm 26 27 /* multiply by polynomial 'x' in GF(2^ 28 .macro mul_by_x, out, in, tem 29 sshr \temp, \in, #7 30 shl \out, \in, #1 31 and \temp, \temp, \const 32 eor \out, \out, \temp 33 .endm 34 35 /* multiply by polynomial 'x^2' in GF( 36 .macro mul_by_x2, out, in, te 37 ushr \temp, \in, #6 38 shl \out, \in, #2 39 pmul \temp, \temp, \const 40 eor \out, \out, \temp 41 .endm 42 43 /* preload the entire Sbox */ 44 .macro prepare, sbox, shiftro 45 movi v12.16b, #0x1b 46 ldr_l q13, \shiftrows, \temp 47 ldr_l q14, .Lror32by8, \temp 48 adr_l \temp, \sbox 49 ld1 {v16.16b-v19.16b}, [\t 50 ld1 {v20.16b-v23.16b}, [\t 51 ld1 {v24.16b-v27.16b}, [\t 52 ld1 {v28.16b-v31.16b}, [\t 53 .endm 54 55 /* do preload for encryption */ 56 .macro enc_prepare, ignore0, 57 prepare crypto_aes_sbox, .LFor 58 .endm 59 60 .macro enc_switch_key, ignore 61 /* do nothing */ 62 .endm 63 64 /* do preload for decryption */ 65 .macro dec_prepare, ignore0, 66 prepare crypto_aes_inv_sbox, . 67 .endm 68 69 /* apply SubBytes transformation using 70 .macro sub_bytes, in 71 sub v9.16b, \in\().16b, v1 72 tbl \in\().16b, {v16.16b-v 73 sub v10.16b, v9.16b, v15.1 74 tbx \in\().16b, {v20.16b-v 75 sub v11.16b, v10.16b, v15. 76 tbx \in\().16b, {v24.16b-v 77 tbx \in\().16b, {v28.16b-v 78 .endm 79 80 /* apply MixColumns transformation */ 81 .macro mix_columns, in, enc 82 .if \enc == 0 83 /* Inverse MixColumns: pre-multiply by 84 mul_by_x2 v8.16b, \in\().16b, v9 85 eor \in\().16b, \in\().16b 86 rev32 v8.8h, v8.8h 87 eor \in\().16b, \in\().16b 88 .endif 89 90 mul_by_x v9.16b, \in\().16b, v8 91 rev32 v8.8h, \in\().8h 92 eor v8.16b, v8.16b, v9.16b 93 eor \in\().16b, \in\().16b 94 tbl \in\().16b, {\in\().16 95 eor \in\().16b, \in\().16b 96 .endm 97 98 .macro do_block, enc, in, rou 99 ld1 {v15.4s}, [\rk] 100 add \rkp, \rk, #16 101 mov \i, \rounds 102 .La\@: eor \in\().16b, \in\().16b 103 movi v15.16b, #0x40 104 tbl \in\().16b, {\in\().16 105 sub_bytes \in 106 sub \i, \i, #1 107 ld1 {v15.4s}, [\rkp], #16 108 cbz \i, .Lb\@ 109 mix_columns \in, \enc 110 b .La\@ 111 .Lb\@: eor \in\().16b, \in\().16b 112 .endm 113 114 .macro encrypt_block, in, rou 115 do_block 1, \in, \rounds, \rk, 116 .endm 117 118 .macro decrypt_block, in, rou 119 do_block 0, \in, \rounds, \rk, 120 .endm 121 122 /* 123 * Interleaved versions: functionally 124 * ones above, but applied to AES stat 125 */ 126 127 .macro sub_bytes_4x, in0, in1 128 sub v8.16b, \in0\().16b, v 129 tbl \in0\().16b, {v16.16b- 130 sub v9.16b, \in1\().16b, v 131 tbl \in1\().16b, {v16.16b- 132 sub v10.16b, \in2\().16b, 133 tbl \in2\().16b, {v16.16b- 134 sub v11.16b, \in3\().16b, 135 tbl \in3\().16b, {v16.16b- 136 tbx \in0\().16b, {v20.16b- 137 tbx \in1\().16b, {v20.16b- 138 sub v8.16b, v8.16b, v15.16 139 tbx \in2\().16b, {v20.16b- 140 sub v9.16b, v9.16b, v15.16 141 tbx \in3\().16b, {v20.16b- 142 sub v10.16b, v10.16b, v15. 143 tbx \in0\().16b, {v24.16b- 144 sub v11.16b, v11.16b, v15. 145 tbx \in1\().16b, {v24.16b- 146 sub v8.16b, v8.16b, v15.16 147 tbx \in2\().16b, {v24.16b- 148 sub v9.16b, v9.16b, v15.16 149 tbx \in3\().16b, {v24.16b- 150 sub v10.16b, v10.16b, v15. 151 tbx \in0\().16b, {v28.16b- 152 sub v11.16b, v11.16b, v15. 153 tbx \in1\().16b, {v28.16b- 154 tbx \in2\().16b, {v28.16b- 155 tbx \in3\().16b, {v28.16b- 156 .endm 157 158 .macro mul_by_x_2x, out0, out 159 sshr \tmp0\().16b, \in0\(). 160 shl \out0\().16b, \in0\(). 161 sshr \tmp1\().16b, \in1\(). 162 and \tmp0\().16b, \tmp0\() 163 shl \out1\().16b, \in1\(). 164 and \tmp1\().16b, \tmp1\() 165 eor \out0\().16b, \out0\() 166 eor \out1\().16b, \out1\() 167 .endm 168 169 .macro mul_by_x2_2x, out0, ou 170 ushr \tmp0\().16b, \in0\(). 171 shl \out0\().16b, \in0\(). 172 ushr \tmp1\().16b, \in1\(). 173 pmul \tmp0\().16b, \tmp0\() 174 shl \out1\().16b, \in1\(). 175 pmul \tmp1\().16b, \tmp1\() 176 eor \out0\().16b, \out0\() 177 eor \out1\().16b, \out1\() 178 .endm 179 180 .macro mix_columns_2x, in0, i 181 .if \enc == 0 182 /* Inverse MixColumns: pre-multiply by 183 mul_by_x2_2x v8, v9, \in0, \in1, v1 184 eor \in0\().16b, \in0\().1 185 rev32 v8.8h, v8.8h 186 eor \in1\().16b, \in1\().1 187 rev32 v9.8h, v9.8h 188 eor \in0\().16b, \in0\().1 189 eor \in1\().16b, \in1\().1 190 .endif 191 192 mul_by_x_2x v8, v9, \in0, \in1, v1 193 rev32 v10.8h, \in0\().8h 194 rev32 v11.8h, \in1\().8h 195 eor v10.16b, v10.16b, v8.1 196 eor v11.16b, v11.16b, v9.1 197 eor \in0\().16b, \in0\().1 198 eor \in1\().16b, \in1\().1 199 tbl \in0\().16b, {\in0\(). 200 tbl \in1\().16b, {\in1\(). 201 eor \in0\().16b, \in0\().1 202 eor \in1\().16b, \in1\().1 203 .endm 204 205 .macro do_block_4x, enc, in0, 206 ld1 {v15.4s}, [\rk] 207 add \rkp, \rk, #16 208 mov \i, \rounds 209 .La\@: eor \in0\().16b, \in0\().1 210 eor \in1\().16b, \in1\().1 211 eor \in2\().16b, \in2\().1 212 eor \in3\().16b, \in3\().1 213 movi v15.16b, #0x40 214 tbl \in0\().16b, {\in0\(). 215 tbl \in1\().16b, {\in1\(). 216 tbl \in2\().16b, {\in2\(). 217 tbl \in3\().16b, {\in3\(). 218 sub_bytes_4x \in0, \in1, \in2, \in3 219 sub \i, \i, #1 220 ld1 {v15.4s}, [\rkp], #16 221 cbz \i, .Lb\@ 222 mix_columns_2x \in0, \in1, \enc 223 mix_columns_2x \in2, \in3, \enc 224 b .La\@ 225 .Lb\@: eor \in0\().16b, \in0\().1 226 eor \in1\().16b, \in1\().1 227 eor \in2\().16b, \in2\().1 228 eor \in3\().16b, \in3\().1 229 .endm 230 231 .macro encrypt_block4x, in0, 232 do_block_4x 1, \in0, \in1, \in2, \ 233 .endm 234 235 .macro decrypt_block4x, in0, 236 do_block_4x 0, \in0, \in1, \in2, \ 237 .endm 238 239 #include "aes-modes.S" 240 241 .section ".rodata", "a" 242 .align 4 243 .LForward_ShiftRows: 244 .octa 0x0b06010c07020d08030e 245 246 .LReverse_ShiftRows: 247 .octa 0x0306090c0f0205080b0e 248 249 .Lror32by8: 250 .octa 0x0c0f0e0d080b0a090407
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.