~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm/crypto/aes-ce-core.S

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0-only */
  2 /*
  3  * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
  4  *
  5  * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
  6  */
  7 
  8 #include <linux/linkage.h>
  9 #include <asm/assembler.h>
 10 
 11         .text
 12         .arch           armv8-a
 13         .fpu            crypto-neon-fp-armv8
 14         .align          3
 15 
 16         .macro          enc_round, state, key
 17         aese.8          \state, \key
 18         aesmc.8         \state, \state
 19         .endm
 20 
 21         .macro          dec_round, state, key
 22         aesd.8          \state, \key
 23         aesimc.8        \state, \state
 24         .endm
 25 
 26         .macro          enc_dround, key1, key2
 27         enc_round       q0, \key1
 28         enc_round       q0, \key2
 29         .endm
 30 
 31         .macro          dec_dround, key1, key2
 32         dec_round       q0, \key1
 33         dec_round       q0, \key2
 34         .endm
 35 
 36         .macro          enc_fround, key1, key2, key3
 37         enc_round       q0, \key1
 38         aese.8          q0, \key2
 39         veor            q0, q0, \key3
 40         .endm
 41 
 42         .macro          dec_fround, key1, key2, key3
 43         dec_round       q0, \key1
 44         aesd.8          q0, \key2
 45         veor            q0, q0, \key3
 46         .endm
 47 
 48         .macro          enc_dround_4x, key1, key2
 49         enc_round       q0, \key1
 50         enc_round       q1, \key1
 51         enc_round       q2, \key1
 52         enc_round       q3, \key1
 53         enc_round       q0, \key2
 54         enc_round       q1, \key2
 55         enc_round       q2, \key2
 56         enc_round       q3, \key2
 57         .endm
 58 
 59         .macro          dec_dround_4x, key1, key2
 60         dec_round       q0, \key1
 61         dec_round       q1, \key1
 62         dec_round       q2, \key1
 63         dec_round       q3, \key1
 64         dec_round       q0, \key2
 65         dec_round       q1, \key2
 66         dec_round       q2, \key2
 67         dec_round       q3, \key2
 68         .endm
 69 
 70         .macro          enc_fround_4x, key1, key2, key3
 71         enc_round       q0, \key1
 72         enc_round       q1, \key1
 73         enc_round       q2, \key1
 74         enc_round       q3, \key1
 75         aese.8          q0, \key2
 76         aese.8          q1, \key2
 77         aese.8          q2, \key2
 78         aese.8          q3, \key2
 79         veor            q0, q0, \key3
 80         veor            q1, q1, \key3
 81         veor            q2, q2, \key3
 82         veor            q3, q3, \key3
 83         .endm
 84 
 85         .macro          dec_fround_4x, key1, key2, key3
 86         dec_round       q0, \key1
 87         dec_round       q1, \key1
 88         dec_round       q2, \key1
 89         dec_round       q3, \key1
 90         aesd.8          q0, \key2
 91         aesd.8          q1, \key2
 92         aesd.8          q2, \key2
 93         aesd.8          q3, \key2
 94         veor            q0, q0, \key3
 95         veor            q1, q1, \key3
 96         veor            q2, q2, \key3
 97         veor            q3, q3, \key3
 98         .endm
 99 
100         .macro          do_block, dround, fround
101         cmp             r3, #12                 @ which key size?
102         vld1.32         {q10-q11}, [ip]!
103         \dround         q8, q9
104         vld1.32         {q12-q13}, [ip]!
105         \dround         q10, q11
106         vld1.32         {q10-q11}, [ip]!
107         \dround         q12, q13
108         vld1.32         {q12-q13}, [ip]!
109         \dround         q10, q11
110         blo             0f                      @ AES-128: 10 rounds
111         vld1.32         {q10-q11}, [ip]!
112         \dround         q12, q13
113         beq             1f                      @ AES-192: 12 rounds
114         vld1.32         {q12-q13}, [ip]
115         \dround         q10, q11
116 0:      \fround         q12, q13, q14
117         bx              lr
118 
119 1:      \fround         q10, q11, q14
120         bx              lr
121         .endm
122 
123         /*
124          * Internal, non-AAPCS compliant functions that implement the core AES
125          * transforms. These should preserve all registers except q0 - q2 and ip
126          * Arguments:
127          *   q0        : first in/output block
128          *   q1        : second in/output block (_4x version only)
129          *   q2        : third in/output block (_4x version only)
130          *   q3        : fourth in/output block (_4x version only)
131          *   q8        : first round key
132          *   q9        : secound round key
133          *   q14       : final round key
134          *   r2        : address of round key array
135          *   r3        : number of rounds
136          */
137         .align          6
138 aes_encrypt:
139         add             ip, r2, #32             @ 3rd round key
140 .Laes_encrypt_tweak:
141         do_block        enc_dround, enc_fround
142 ENDPROC(aes_encrypt)
143 
144         .align          6
145 aes_decrypt:
146         add             ip, r2, #32             @ 3rd round key
147         do_block        dec_dround, dec_fround
148 ENDPROC(aes_decrypt)
149 
150         .align          6
151 aes_encrypt_4x:
152         add             ip, r2, #32             @ 3rd round key
153         do_block        enc_dround_4x, enc_fround_4x
154 ENDPROC(aes_encrypt_4x)
155 
156         .align          6
157 aes_decrypt_4x:
158         add             ip, r2, #32             @ 3rd round key
159         do_block        dec_dround_4x, dec_fround_4x
160 ENDPROC(aes_decrypt_4x)
161 
162         .macro          prepare_key, rk, rounds
163         add             ip, \rk, \rounds, lsl #4
164         vld1.32         {q8-q9}, [\rk]          @ load first 2 round keys
165         vld1.32         {q14}, [ip]             @ load last round key
166         .endm
167 
168         /*
169          * aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
170          *                 int blocks)
171          * aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
172          *                 int blocks)
173          */
174 ENTRY(ce_aes_ecb_encrypt)
175         push            {r4, lr}
176         ldr             r4, [sp, #8]
177         prepare_key     r2, r3
178 .Lecbencloop4x:
179         subs            r4, r4, #4
180         bmi             .Lecbenc1x
181         vld1.8          {q0-q1}, [r1]!
182         vld1.8          {q2-q3}, [r1]!
183         bl              aes_encrypt_4x
184         vst1.8          {q0-q1}, [r0]!
185         vst1.8          {q2-q3}, [r0]!
186         b               .Lecbencloop4x
187 .Lecbenc1x:
188         adds            r4, r4, #4
189         beq             .Lecbencout
190 .Lecbencloop:
191         vld1.8          {q0}, [r1]!
192         bl              aes_encrypt
193         vst1.8          {q0}, [r0]!
194         subs            r4, r4, #1
195         bne             .Lecbencloop
196 .Lecbencout:
197         pop             {r4, pc}
198 ENDPROC(ce_aes_ecb_encrypt)
199 
200 ENTRY(ce_aes_ecb_decrypt)
201         push            {r4, lr}
202         ldr             r4, [sp, #8]
203         prepare_key     r2, r3
204 .Lecbdecloop4x:
205         subs            r4, r4, #4
206         bmi             .Lecbdec1x
207         vld1.8          {q0-q1}, [r1]!
208         vld1.8          {q2-q3}, [r1]!
209         bl              aes_decrypt_4x
210         vst1.8          {q0-q1}, [r0]!
211         vst1.8          {q2-q3}, [r0]!
212         b               .Lecbdecloop4x
213 .Lecbdec1x:
214         adds            r4, r4, #4
215         beq             .Lecbdecout
216 .Lecbdecloop:
217         vld1.8          {q0}, [r1]!
218         bl              aes_decrypt
219         vst1.8          {q0}, [r0]!
220         subs            r4, r4, #1
221         bne             .Lecbdecloop
222 .Lecbdecout:
223         pop             {r4, pc}
224 ENDPROC(ce_aes_ecb_decrypt)
225 
226         /*
227          * aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
228          *                 int blocks, u8 iv[])
229          * aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
230          *                 int blocks, u8 iv[])
231          */
232 ENTRY(ce_aes_cbc_encrypt)
233         push            {r4-r6, lr}
234         ldrd            r4, r5, [sp, #16]
235         vld1.8          {q0}, [r5]
236         prepare_key     r2, r3
237 .Lcbcencloop:
238         vld1.8          {q1}, [r1]!             @ get next pt block
239         veor            q0, q0, q1              @ ..and xor with iv
240         bl              aes_encrypt
241         vst1.8          {q0}, [r0]!
242         subs            r4, r4, #1
243         bne             .Lcbcencloop
244         vst1.8          {q0}, [r5]
245         pop             {r4-r6, pc}
246 ENDPROC(ce_aes_cbc_encrypt)
247 
248 ENTRY(ce_aes_cbc_decrypt)
249         push            {r4-r6, lr}
250         ldrd            r4, r5, [sp, #16]
251         vld1.8          {q15}, [r5]             @ keep iv in q15
252         prepare_key     r2, r3
253 .Lcbcdecloop4x:
254         subs            r4, r4, #4
255         bmi             .Lcbcdec1x
256         vld1.8          {q0-q1}, [r1]!
257         vld1.8          {q2-q3}, [r1]!
258         vmov            q4, q0
259         vmov            q5, q1
260         vmov            q6, q2
261         vmov            q7, q3
262         bl              aes_decrypt_4x
263         veor            q0, q0, q15
264         veor            q1, q1, q4
265         veor            q2, q2, q5
266         veor            q3, q3, q6
267         vmov            q15, q7
268         vst1.8          {q0-q1}, [r0]!
269         vst1.8          {q2-q3}, [r0]!
270         b               .Lcbcdecloop4x
271 .Lcbcdec1x:
272         adds            r4, r4, #4
273         beq             .Lcbcdecout
274         vmov            q6, q14                 @ preserve last round key
275 .Lcbcdecloop:
276         vld1.8          {q0}, [r1]!             @ get next ct block
277         veor            q14, q15, q6            @ combine prev ct with last key
278         vmov            q15, q0
279         bl              aes_decrypt
280         vst1.8          {q0}, [r0]!
281         subs            r4, r4, #1
282         bne             .Lcbcdecloop
283 .Lcbcdecout:
284         vst1.8          {q15}, [r5]             @ keep iv in q15
285         pop             {r4-r6, pc}
286 ENDPROC(ce_aes_cbc_decrypt)
287 
288 
289         /*
290          * ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
291          *                        int rounds, int bytes, u8 const iv[])
292          * ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
293          *                        int rounds, int bytes, u8 const iv[])
294          */
295 
296 ENTRY(ce_aes_cbc_cts_encrypt)
297         push            {r4-r6, lr}
298         ldrd            r4, r5, [sp, #16]
299 
300         movw            ip, :lower16:.Lcts_permute_table
301         movt            ip, :upper16:.Lcts_permute_table
302         sub             r4, r4, #16
303         add             lr, ip, #32
304         add             ip, ip, r4
305         sub             lr, lr, r4
306         vld1.8          {q5}, [ip]
307         vld1.8          {q6}, [lr]
308 
309         add             ip, r1, r4
310         vld1.8          {q0}, [r1]                      @ overlapping loads
311         vld1.8          {q3}, [ip]
312 
313         vld1.8          {q1}, [r5]                      @ get iv
314         prepare_key     r2, r3
315 
316         veor            q0, q0, q1                      @ xor with iv
317         bl              aes_encrypt
318 
319         vtbl.8          d4, {d0-d1}, d10
320         vtbl.8          d5, {d0-d1}, d11
321         vtbl.8          d2, {d6-d7}, d12
322         vtbl.8          d3, {d6-d7}, d13
323 
324         veor            q0, q0, q1
325         bl              aes_encrypt
326 
327         add             r4, r0, r4
328         vst1.8          {q2}, [r4]                      @ overlapping stores
329         vst1.8          {q0}, [r0]
330 
331         pop             {r4-r6, pc}
332 ENDPROC(ce_aes_cbc_cts_encrypt)
333 
334 ENTRY(ce_aes_cbc_cts_decrypt)
335         push            {r4-r6, lr}
336         ldrd            r4, r5, [sp, #16]
337 
338         movw            ip, :lower16:.Lcts_permute_table
339         movt            ip, :upper16:.Lcts_permute_table
340         sub             r4, r4, #16
341         add             lr, ip, #32
342         add             ip, ip, r4
343         sub             lr, lr, r4
344         vld1.8          {q5}, [ip]
345         vld1.8          {q6}, [lr]
346 
347         add             ip, r1, r4
348         vld1.8          {q0}, [r1]                      @ overlapping loads
349         vld1.8          {q1}, [ip]
350 
351         vld1.8          {q3}, [r5]                      @ get iv
352         prepare_key     r2, r3
353 
354         bl              aes_decrypt
355 
356         vtbl.8          d4, {d0-d1}, d10
357         vtbl.8          d5, {d0-d1}, d11
358         vtbx.8          d0, {d2-d3}, d12
359         vtbx.8          d1, {d2-d3}, d13
360 
361         veor            q1, q1, q2
362         bl              aes_decrypt
363         veor            q0, q0, q3                      @ xor with iv
364 
365         add             r4, r0, r4
366         vst1.8          {q1}, [r4]                      @ overlapping stores
367         vst1.8          {q0}, [r0]
368 
369         pop             {r4-r6, pc}
370 ENDPROC(ce_aes_cbc_cts_decrypt)
371 
372 
373         /*
374          * aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
375          *                 int blocks, u8 ctr[])
376          */
377 ENTRY(ce_aes_ctr_encrypt)
378         push            {r4-r6, lr}
379         ldrd            r4, r5, [sp, #16]
380         vld1.8          {q7}, [r5]              @ load ctr
381         prepare_key     r2, r3
382         vmov            r6, s31                 @ keep swabbed ctr in r6
383         rev             r6, r6
384         cmn             r6, r4                  @ 32 bit overflow?
385         bcs             .Lctrloop
386 .Lctrloop4x:
387         subs            r4, r4, #4
388         bmi             .Lctr1x
389 
390         /*
391          * NOTE: the sequence below has been carefully tweaked to avoid
392          * a silicon erratum that exists in Cortex-A57 (#1742098) and
393          * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs
394          * may produce an incorrect result if they take their input from a
395          * register of which a single 32-bit lane has been updated the last
396          * time it was modified. To work around this, the lanes of registers
397          * q0-q3 below are not manipulated individually, and the different
398          * counter values are prepared by successive manipulations of q7.
399          */
400         add             ip, r6, #1
401         vmov            q0, q7
402         rev             ip, ip
403         add             lr, r6, #2
404         vmov            s31, ip                 @ set lane 3 of q1 via q7
405         add             ip, r6, #3
406         rev             lr, lr
407         vmov            q1, q7
408         vmov            s31, lr                 @ set lane 3 of q2 via q7
409         rev             ip, ip
410         vmov            q2, q7
411         vmov            s31, ip                 @ set lane 3 of q3 via q7
412         add             r6, r6, #4
413         vmov            q3, q7
414 
415         vld1.8          {q4-q5}, [r1]!
416         vld1.8          {q6}, [r1]!
417         vld1.8          {q15}, [r1]!
418         bl              aes_encrypt_4x
419         veor            q0, q0, q4
420         veor            q1, q1, q5
421         veor            q2, q2, q6
422         veor            q3, q3, q15
423         rev             ip, r6
424         vst1.8          {q0-q1}, [r0]!
425         vst1.8          {q2-q3}, [r0]!
426         vmov            s31, ip
427         b               .Lctrloop4x
428 .Lctr1x:
429         adds            r4, r4, #4
430         beq             .Lctrout
431 .Lctrloop:
432         vmov            q0, q7
433         bl              aes_encrypt
434 
435         adds            r6, r6, #1              @ increment BE ctr
436         rev             ip, r6
437         vmov            s31, ip
438         bcs             .Lctrcarry
439 
440 .Lctrcarrydone:
441         subs            r4, r4, #1
442         bmi             .Lctrtailblock          @ blocks < 0 means tail block
443         vld1.8          {q3}, [r1]!
444         veor            q3, q0, q3
445         vst1.8          {q3}, [r0]!
446         bne             .Lctrloop
447 
448 .Lctrout:
449         vst1.8          {q7}, [r5]              @ return next CTR value
450         pop             {r4-r6, pc}
451 
452 .Lctrtailblock:
453         vst1.8          {q0}, [r0, :64]         @ return the key stream
454         b               .Lctrout
455 
456 .Lctrcarry:
457         .irp            sreg, s30, s29, s28
458         vmov            ip, \sreg               @ load next word of ctr
459         rev             ip, ip                  @ ... to handle the carry
460         adds            ip, ip, #1
461         rev             ip, ip
462         vmov            \sreg, ip
463         bcc             .Lctrcarrydone
464         .endr
465         b               .Lctrcarrydone
466 ENDPROC(ce_aes_ctr_encrypt)
467 
468         /*
469          * aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
470          *                 int bytes, u8 iv[], u32 const rk2[], int first)
471          * aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
472          *                 int bytes, u8 iv[], u32 const rk2[], int first)
473          */
474 
475         .macro          next_tweak, out, in, const, tmp
476         vshr.s64        \tmp, \in, #63
477         vand            \tmp, \tmp, \const
478         vadd.u64        \out, \in, \in
479         vext.8          \tmp, \tmp, \tmp, #8
480         veor            \out, \out, \tmp
481         .endm
482 
483 ce_aes_xts_init:
484         vmov.i32        d30, #0x87              @ compose tweak mask vector
485         vmovl.u32       q15, d30
486         vshr.u64        d30, d31, #7
487 
488         ldrd            r4, r5, [sp, #16]       @ load args
489         ldr             r6, [sp, #28]
490         vld1.8          {q0}, [r5]              @ load iv
491         teq             r6, #1                  @ start of a block?
492         bxne            lr
493 
494         @ Encrypt the IV in q0 with the second AES key. This should only
495         @ be done at the start of a block.
496         ldr             r6, [sp, #24]           @ load AES key 2
497         prepare_key     r6, r3
498         add             ip, r6, #32             @ 3rd round key of key 2
499         b               .Laes_encrypt_tweak     @ tail call
500 ENDPROC(ce_aes_xts_init)
501 
502 ENTRY(ce_aes_xts_encrypt)
503         push            {r4-r6, lr}
504 
505         bl              ce_aes_xts_init         @ run shared prologue
506         prepare_key     r2, r3
507         vmov            q4, q0
508 
509         teq             r6, #0                  @ start of a block?
510         bne             .Lxtsenc4x
511 
512 .Lxtsencloop4x:
513         next_tweak      q4, q4, q15, q10
514 .Lxtsenc4x:
515         subs            r4, r4, #64
516         bmi             .Lxtsenc1x
517         vld1.8          {q0-q1}, [r1]!          @ get 4 pt blocks
518         vld1.8          {q2-q3}, [r1]!
519         next_tweak      q5, q4, q15, q10
520         veor            q0, q0, q4
521         next_tweak      q6, q5, q15, q10
522         veor            q1, q1, q5
523         next_tweak      q7, q6, q15, q10
524         veor            q2, q2, q6
525         veor            q3, q3, q7
526         bl              aes_encrypt_4x
527         veor            q0, q0, q4
528         veor            q1, q1, q5
529         veor            q2, q2, q6
530         veor            q3, q3, q7
531         vst1.8          {q0-q1}, [r0]!          @ write 4 ct blocks
532         vst1.8          {q2-q3}, [r0]!
533         vmov            q4, q7
534         teq             r4, #0
535         beq             .Lxtsencret
536         b               .Lxtsencloop4x
537 .Lxtsenc1x:
538         adds            r4, r4, #64
539         beq             .Lxtsencout
540         subs            r4, r4, #16
541         bmi             .LxtsencctsNx
542 .Lxtsencloop:
543         vld1.8          {q0}, [r1]!
544 .Lxtsencctsout:
545         veor            q0, q0, q4
546         bl              aes_encrypt
547         veor            q0, q0, q4
548         teq             r4, #0
549         beq             .Lxtsencout
550         subs            r4, r4, #16
551         next_tweak      q4, q4, q15, q6
552         bmi             .Lxtsenccts
553         vst1.8          {q0}, [r0]!
554         b               .Lxtsencloop
555 .Lxtsencout:
556         vst1.8          {q0}, [r0]
557 .Lxtsencret:
558         vst1.8          {q4}, [r5]
559         pop             {r4-r6, pc}
560 
561 .LxtsencctsNx:
562         vmov            q0, q3
563         sub             r0, r0, #16
564 .Lxtsenccts:
565         movw            ip, :lower16:.Lcts_permute_table
566         movt            ip, :upper16:.Lcts_permute_table
567 
568         add             r1, r1, r4              @ rewind input pointer
569         add             r4, r4, #16             @ # bytes in final block
570         add             lr, ip, #32
571         add             ip, ip, r4
572         sub             lr, lr, r4
573         add             r4, r0, r4              @ output address of final block
574 
575         vld1.8          {q1}, [r1]              @ load final partial block
576         vld1.8          {q2}, [ip]
577         vld1.8          {q3}, [lr]
578 
579         vtbl.8          d4, {d0-d1}, d4
580         vtbl.8          d5, {d0-d1}, d5
581         vtbx.8          d0, {d2-d3}, d6
582         vtbx.8          d1, {d2-d3}, d7
583 
584         vst1.8          {q2}, [r4]              @ overlapping stores
585         mov             r4, #0
586         b               .Lxtsencctsout
587 ENDPROC(ce_aes_xts_encrypt)
588 
589 
590 ENTRY(ce_aes_xts_decrypt)
591         push            {r4-r6, lr}
592 
593         bl              ce_aes_xts_init         @ run shared prologue
594         prepare_key     r2, r3
595         vmov            q4, q0
596 
597         /* subtract 16 bytes if we are doing CTS */
598         tst             r4, #0xf
599         subne           r4, r4, #0x10
600 
601         teq             r6, #0                  @ start of a block?
602         bne             .Lxtsdec4x
603 
604 .Lxtsdecloop4x:
605         next_tweak      q4, q4, q15, q10
606 .Lxtsdec4x:
607         subs            r4, r4, #64
608         bmi             .Lxtsdec1x
609         vld1.8          {q0-q1}, [r1]!          @ get 4 ct blocks
610         vld1.8          {q2-q3}, [r1]!
611         next_tweak      q5, q4, q15, q10
612         veor            q0, q0, q4
613         next_tweak      q6, q5, q15, q10
614         veor            q1, q1, q5
615         next_tweak      q7, q6, q15, q10
616         veor            q2, q2, q6
617         veor            q3, q3, q7
618         bl              aes_decrypt_4x
619         veor            q0, q0, q4
620         veor            q1, q1, q5
621         veor            q2, q2, q6
622         veor            q3, q3, q7
623         vst1.8          {q0-q1}, [r0]!          @ write 4 pt blocks
624         vst1.8          {q2-q3}, [r0]!
625         vmov            q4, q7
626         teq             r4, #0
627         beq             .Lxtsdecout
628         b               .Lxtsdecloop4x
629 .Lxtsdec1x:
630         adds            r4, r4, #64
631         beq             .Lxtsdecout
632         subs            r4, r4, #16
633 .Lxtsdecloop:
634         vld1.8          {q0}, [r1]!
635         bmi             .Lxtsdeccts
636 .Lxtsdecctsout:
637         veor            q0, q0, q4
638         bl              aes_decrypt
639         veor            q0, q0, q4
640         vst1.8          {q0}, [r0]!
641         teq             r4, #0
642         beq             .Lxtsdecout
643         subs            r4, r4, #16
644         next_tweak      q4, q4, q15, q6
645         b               .Lxtsdecloop
646 .Lxtsdecout:
647         vst1.8          {q4}, [r5]
648         pop             {r4-r6, pc}
649 
650 .Lxtsdeccts:
651         movw            ip, :lower16:.Lcts_permute_table
652         movt            ip, :upper16:.Lcts_permute_table
653 
654         add             r1, r1, r4              @ rewind input pointer
655         add             r4, r4, #16             @ # bytes in final block
656         add             lr, ip, #32
657         add             ip, ip, r4
658         sub             lr, lr, r4
659         add             r4, r0, r4              @ output address of final block
660 
661         next_tweak      q5, q4, q15, q6
662 
663         vld1.8          {q1}, [r1]              @ load final partial block
664         vld1.8          {q2}, [ip]
665         vld1.8          {q3}, [lr]
666 
667         veor            q0, q0, q5
668         bl              aes_decrypt
669         veor            q0, q0, q5
670 
671         vtbl.8          d4, {d0-d1}, d4
672         vtbl.8          d5, {d0-d1}, d5
673         vtbx.8          d0, {d2-d3}, d6
674         vtbx.8          d1, {d2-d3}, d7
675 
676         vst1.8          {q2}, [r4]              @ overlapping stores
677         mov             r4, #0
678         b               .Lxtsdecctsout
679 ENDPROC(ce_aes_xts_decrypt)
680 
681         /*
682          * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
683          *                             AES sbox substitution on each byte in
684          *                             'input'
685          */
686 ENTRY(ce_aes_sub)
687         vdup.32         q1, r0
688         veor            q0, q0, q0
689         aese.8          q0, q1
690         vmov            r0, s0
691         bx              lr
692 ENDPROC(ce_aes_sub)
693 
694         /*
695          * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
696          *                                        operation on round key *src
697          */
698 ENTRY(ce_aes_invert)
699         vld1.32         {q0}, [r1]
700         aesimc.8        q0, q0
701         vst1.32         {q0}, [r0]
702         bx              lr
703 ENDPROC(ce_aes_invert)
704 
705         .section        ".rodata", "a"
706         .align          6
707 .Lcts_permute_table:
708         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
709         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
710         .byte            0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
711         .byte            0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
712         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
713         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php