~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/crypto/aes-neon.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0-only */
  2 /*
  3  * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
  4  *
  5  * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  6  */
  7 
  8 #include <linux/linkage.h>
  9 #include <asm/assembler.h>
 10 
 11 #define AES_FUNC_START(func)            SYM_FUNC_START(neon_ ## func)
 12 #define AES_FUNC_END(func)              SYM_FUNC_END(neon_ ## func)
 13 
 14         xtsmask         .req    v7
 15         cbciv           .req    v7
 16         vctr            .req    v4
 17 
 18         .macro          xts_reload_mask, tmp
 19         xts_load_mask   \tmp
 20         .endm
 21 
 22         /* special case for the neon-bs driver calling into this one for CTS */
 23         .macro          xts_cts_skip_tw, reg, lbl
 24         tbnz            \reg, #1, \lbl
 25         .endm
 26 
 27         /* multiply by polynomial 'x' in GF(2^8) */
 28         .macro          mul_by_x, out, in, temp, const
 29         sshr            \temp, \in, #7
 30         shl             \out, \in, #1
 31         and             \temp, \temp, \const
 32         eor             \out, \out, \temp
 33         .endm
 34 
 35         /* multiply by polynomial 'x^2' in GF(2^8) */
 36         .macro          mul_by_x2, out, in, temp, const
 37         ushr            \temp, \in, #6
 38         shl             \out, \in, #2
 39         pmul            \temp, \temp, \const
 40         eor             \out, \out, \temp
 41         .endm
 42 
 43         /* preload the entire Sbox */
 44         .macro          prepare, sbox, shiftrows, temp
 45         movi            v12.16b, #0x1b
 46         ldr_l           q13, \shiftrows, \temp
 47         ldr_l           q14, .Lror32by8, \temp
 48         adr_l           \temp, \sbox
 49         ld1             {v16.16b-v19.16b}, [\temp], #64
 50         ld1             {v20.16b-v23.16b}, [\temp], #64
 51         ld1             {v24.16b-v27.16b}, [\temp], #64
 52         ld1             {v28.16b-v31.16b}, [\temp]
 53         .endm
 54 
 55         /* do preload for encryption */
 56         .macro          enc_prepare, ignore0, ignore1, temp
 57         prepare         crypto_aes_sbox, .LForward_ShiftRows, \temp
 58         .endm
 59 
 60         .macro          enc_switch_key, ignore0, ignore1, temp
 61         /* do nothing */
 62         .endm
 63 
 64         /* do preload for decryption */
 65         .macro          dec_prepare, ignore0, ignore1, temp
 66         prepare         crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
 67         .endm
 68 
 69         /* apply SubBytes transformation using the preloaded Sbox */
 70         .macro          sub_bytes, in
 71         sub             v9.16b, \in\().16b, v15.16b
 72         tbl             \in\().16b, {v16.16b-v19.16b}, \in\().16b
 73         sub             v10.16b, v9.16b, v15.16b
 74         tbx             \in\().16b, {v20.16b-v23.16b}, v9.16b
 75         sub             v11.16b, v10.16b, v15.16b
 76         tbx             \in\().16b, {v24.16b-v27.16b}, v10.16b
 77         tbx             \in\().16b, {v28.16b-v31.16b}, v11.16b
 78         .endm
 79 
 80         /* apply MixColumns transformation */
 81         .macro          mix_columns, in, enc
 82         .if             \enc == 0
 83         /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
 84         mul_by_x2       v8.16b, \in\().16b, v9.16b, v12.16b
 85         eor             \in\().16b, \in\().16b, v8.16b
 86         rev32           v8.8h, v8.8h
 87         eor             \in\().16b, \in\().16b, v8.16b
 88         .endif
 89 
 90         mul_by_x        v9.16b, \in\().16b, v8.16b, v12.16b
 91         rev32           v8.8h, \in\().8h
 92         eor             v8.16b, v8.16b, v9.16b
 93         eor             \in\().16b, \in\().16b, v8.16b
 94         tbl             \in\().16b, {\in\().16b}, v14.16b
 95         eor             \in\().16b, \in\().16b, v8.16b
 96         .endm
 97 
 98         .macro          do_block, enc, in, rounds, rk, rkp, i
 99         ld1             {v15.4s}, [\rk]
100         add             \rkp, \rk, #16
101         mov             \i, \rounds
102 .La\@:  eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
103         movi            v15.16b, #0x40
104         tbl             \in\().16b, {\in\().16b}, v13.16b       /* ShiftRows */
105         sub_bytes       \in
106         sub             \i, \i, #1
107         ld1             {v15.4s}, [\rkp], #16
108         cbz             \i, .Lb\@
109         mix_columns     \in, \enc
110         b               .La\@
111 .Lb\@:  eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
112         .endm
113 
114         .macro          encrypt_block, in, rounds, rk, rkp, i
115         do_block        1, \in, \rounds, \rk, \rkp, \i
116         .endm
117 
118         .macro          decrypt_block, in, rounds, rk, rkp, i
119         do_block        0, \in, \rounds, \rk, \rkp, \i
120         .endm
121 
122         /*
123          * Interleaved versions: functionally equivalent to the
124          * ones above, but applied to AES states in parallel.
125          */
126 
127         .macro          sub_bytes_4x, in0, in1, in2, in3
128         sub             v8.16b, \in0\().16b, v15.16b
129         tbl             \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
130         sub             v9.16b, \in1\().16b, v15.16b
131         tbl             \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
132         sub             v10.16b, \in2\().16b, v15.16b
133         tbl             \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
134         sub             v11.16b, \in3\().16b, v15.16b
135         tbl             \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
136         tbx             \in0\().16b, {v20.16b-v23.16b}, v8.16b
137         tbx             \in1\().16b, {v20.16b-v23.16b}, v9.16b
138         sub             v8.16b, v8.16b, v15.16b
139         tbx             \in2\().16b, {v20.16b-v23.16b}, v10.16b
140         sub             v9.16b, v9.16b, v15.16b
141         tbx             \in3\().16b, {v20.16b-v23.16b}, v11.16b
142         sub             v10.16b, v10.16b, v15.16b
143         tbx             \in0\().16b, {v24.16b-v27.16b}, v8.16b
144         sub             v11.16b, v11.16b, v15.16b
145         tbx             \in1\().16b, {v24.16b-v27.16b}, v9.16b
146         sub             v8.16b, v8.16b, v15.16b
147         tbx             \in2\().16b, {v24.16b-v27.16b}, v10.16b
148         sub             v9.16b, v9.16b, v15.16b
149         tbx             \in3\().16b, {v24.16b-v27.16b}, v11.16b
150         sub             v10.16b, v10.16b, v15.16b
151         tbx             \in0\().16b, {v28.16b-v31.16b}, v8.16b
152         sub             v11.16b, v11.16b, v15.16b
153         tbx             \in1\().16b, {v28.16b-v31.16b}, v9.16b
154         tbx             \in2\().16b, {v28.16b-v31.16b}, v10.16b
155         tbx             \in3\().16b, {v28.16b-v31.16b}, v11.16b
156         .endm
157 
158         .macro          mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
159         sshr            \tmp0\().16b, \in0\().16b, #7
160         shl             \out0\().16b, \in0\().16b, #1
161         sshr            \tmp1\().16b, \in1\().16b, #7
162         and             \tmp0\().16b, \tmp0\().16b, \const\().16b
163         shl             \out1\().16b, \in1\().16b, #1
164         and             \tmp1\().16b, \tmp1\().16b, \const\().16b
165         eor             \out0\().16b, \out0\().16b, \tmp0\().16b
166         eor             \out1\().16b, \out1\().16b, \tmp1\().16b
167         .endm
168 
169         .macro          mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
170         ushr            \tmp0\().16b, \in0\().16b, #6
171         shl             \out0\().16b, \in0\().16b, #2
172         ushr            \tmp1\().16b, \in1\().16b, #6
173         pmul            \tmp0\().16b, \tmp0\().16b, \const\().16b
174         shl             \out1\().16b, \in1\().16b, #2
175         pmul            \tmp1\().16b, \tmp1\().16b, \const\().16b
176         eor             \out0\().16b, \out0\().16b, \tmp0\().16b
177         eor             \out1\().16b, \out1\().16b, \tmp1\().16b
178         .endm
179 
180         .macro          mix_columns_2x, in0, in1, enc
181         .if             \enc == 0
182         /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
183         mul_by_x2_2x    v8, v9, \in0, \in1, v10, v11, v12
184         eor             \in0\().16b, \in0\().16b, v8.16b
185         rev32           v8.8h, v8.8h
186         eor             \in1\().16b, \in1\().16b, v9.16b
187         rev32           v9.8h, v9.8h
188         eor             \in0\().16b, \in0\().16b, v8.16b
189         eor             \in1\().16b, \in1\().16b, v9.16b
190         .endif
191 
192         mul_by_x_2x     v8, v9, \in0, \in1, v10, v11, v12
193         rev32           v10.8h, \in0\().8h
194         rev32           v11.8h, \in1\().8h
195         eor             v10.16b, v10.16b, v8.16b
196         eor             v11.16b, v11.16b, v9.16b
197         eor             \in0\().16b, \in0\().16b, v10.16b
198         eor             \in1\().16b, \in1\().16b, v11.16b
199         tbl             \in0\().16b, {\in0\().16b}, v14.16b
200         tbl             \in1\().16b, {\in1\().16b}, v14.16b
201         eor             \in0\().16b, \in0\().16b, v10.16b
202         eor             \in1\().16b, \in1\().16b, v11.16b
203         .endm
204 
205         .macro          do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
206         ld1             {v15.4s}, [\rk]
207         add             \rkp, \rk, #16
208         mov             \i, \rounds
209 .La\@:  eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
210         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
211         eor             \in2\().16b, \in2\().16b, v15.16b       /* ^round key */
212         eor             \in3\().16b, \in3\().16b, v15.16b       /* ^round key */
213         movi            v15.16b, #0x40
214         tbl             \in0\().16b, {\in0\().16b}, v13.16b     /* ShiftRows */
215         tbl             \in1\().16b, {\in1\().16b}, v13.16b     /* ShiftRows */
216         tbl             \in2\().16b, {\in2\().16b}, v13.16b     /* ShiftRows */
217         tbl             \in3\().16b, {\in3\().16b}, v13.16b     /* ShiftRows */
218         sub_bytes_4x    \in0, \in1, \in2, \in3
219         sub             \i, \i, #1
220         ld1             {v15.4s}, [\rkp], #16
221         cbz             \i, .Lb\@
222         mix_columns_2x  \in0, \in1, \enc
223         mix_columns_2x  \in2, \in3, \enc
224         b               .La\@
225 .Lb\@:  eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
226         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
227         eor             \in2\().16b, \in2\().16b, v15.16b       /* ^round key */
228         eor             \in3\().16b, \in3\().16b, v15.16b       /* ^round key */
229         .endm
230 
231         .macro          encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
232         do_block_4x     1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
233         .endm
234 
235         .macro          decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
236         do_block_4x     0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
237         .endm
238 
239 #include "aes-modes.S"
240 
241         .section        ".rodata", "a"
242         .align          4
243 .LForward_ShiftRows:
244         .octa           0x0b06010c07020d08030e09040f0a0500
245 
246 .LReverse_ShiftRows:
247         .octa           0x0306090c0f0205080b0e0104070a0d00
248 
249 .Lror32by8:
250         .octa           0x0c0f0e0d080b0a090407060500030201

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php