~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm/crypto/ghash-ce-core.S

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0-only */
  2 /*
  3  * Accelerated GHASH implementation with NEON/ARMv8 vmull.p8/64 instructions.
  4  *
  5  * Copyright (C) 2015 - 2017 Linaro Ltd.
  6  * Copyright (C) 2023 Google LLC. <ardb@google.com>
  7  */
  8 
  9 #include <linux/linkage.h>
 10 #include <asm/assembler.h>
 11 
 12         .arch           armv8-a
 13         .fpu            crypto-neon-fp-armv8
 14 
 15         SHASH           .req    q0
 16         T1              .req    q1
 17         XL              .req    q2
 18         XM              .req    q3
 19         XH              .req    q4
 20         IN1             .req    q4
 21 
 22         SHASH_L         .req    d0
 23         SHASH_H         .req    d1
 24         T1_L            .req    d2
 25         T1_H            .req    d3
 26         XL_L            .req    d4
 27         XL_H            .req    d5
 28         XM_L            .req    d6
 29         XM_H            .req    d7
 30         XH_L            .req    d8
 31 
 32         t0l             .req    d10
 33         t0h             .req    d11
 34         t1l             .req    d12
 35         t1h             .req    d13
 36         t2l             .req    d14
 37         t2h             .req    d15
 38         t3l             .req    d16
 39         t3h             .req    d17
 40         t4l             .req    d18
 41         t4h             .req    d19
 42 
 43         t0q             .req    q5
 44         t1q             .req    q6
 45         t2q             .req    q7
 46         t3q             .req    q8
 47         t4q             .req    q9
 48         XH2             .req    q9
 49 
 50         s1l             .req    d20
 51         s1h             .req    d21
 52         s2l             .req    d22
 53         s2h             .req    d23
 54         s3l             .req    d24
 55         s3h             .req    d25
 56         s4l             .req    d26
 57         s4h             .req    d27
 58 
 59         MASK            .req    d28
 60         SHASH2_p8       .req    d28
 61 
 62         k16             .req    d29
 63         k32             .req    d30
 64         k48             .req    d31
 65         SHASH2_p64      .req    d31
 66 
 67         HH              .req    q10
 68         HH3             .req    q11
 69         HH4             .req    q12
 70         HH34            .req    q13
 71 
 72         HH_L            .req    d20
 73         HH_H            .req    d21
 74         HH3_L           .req    d22
 75         HH3_H           .req    d23
 76         HH4_L           .req    d24
 77         HH4_H           .req    d25
 78         HH34_L          .req    d26
 79         HH34_H          .req    d27
 80         SHASH2_H        .req    d29
 81 
 82         XL2             .req    q5
 83         XM2             .req    q6
 84         T2              .req    q7
 85         T3              .req    q8
 86 
 87         XL2_L           .req    d10
 88         XL2_H           .req    d11
 89         XM2_L           .req    d12
 90         XM2_H           .req    d13
 91         T3_L            .req    d16
 92         T3_H            .req    d17
 93 
 94         .text
 95 
 96         .macro          __pmull_p64, rd, rn, rm, b1, b2, b3, b4
 97         vmull.p64       \rd, \rn, \rm
 98         .endm
 99 
100         /*
101          * This implementation of 64x64 -> 128 bit polynomial multiplication
102          * using vmull.p8 instructions (8x8 -> 16) is taken from the paper
103          * "Fast Software Polynomial Multiplication on ARM Processors Using
104          * the NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and
105          * Ricardo Dahab (https://hal.inria.fr/hal-01506572)
106          *
107          * It has been slightly tweaked for in-order performance, and to allow
108          * 'rq' to overlap with 'ad' or 'bd'.
109          */
110         .macro          __pmull_p8, rq, ad, bd, b1=t4l, b2=t3l, b3=t4l, b4=t3l
111         vext.8          t0l, \ad, \ad, #1       @ A1
112         .ifc            \b1, t4l
113         vext.8          t4l, \bd, \bd, #1       @ B1
114         .endif
115         vmull.p8        t0q, t0l, \bd           @ F = A1*B
116         vext.8          t1l, \ad, \ad, #2       @ A2
117         vmull.p8        t4q, \ad, \b1           @ E = A*B1
118         .ifc            \b2, t3l
119         vext.8          t3l, \bd, \bd, #2       @ B2
120         .endif
121         vmull.p8        t1q, t1l, \bd           @ H = A2*B
122         vext.8          t2l, \ad, \ad, #3       @ A3
123         vmull.p8        t3q, \ad, \b2           @ G = A*B2
124         veor            t0q, t0q, t4q           @ L = E + F
125         .ifc            \b3, t4l
126         vext.8          t4l, \bd, \bd, #3       @ B3
127         .endif
128         vmull.p8        t2q, t2l, \bd           @ J = A3*B
129         veor            t0l, t0l, t0h           @ t0 = (L) (P0 + P1) << 8
130         veor            t1q, t1q, t3q           @ M = G + H
131         .ifc            \b4, t3l
132         vext.8          t3l, \bd, \bd, #4       @ B4
133         .endif
134         vmull.p8        t4q, \ad, \b3           @ I = A*B3
135         veor            t1l, t1l, t1h           @ t1 = (M) (P2 + P3) << 16
136         vmull.p8        t3q, \ad, \b4           @ K = A*B4
137         vand            t0h, t0h, k48
138         vand            t1h, t1h, k32
139         veor            t2q, t2q, t4q           @ N = I + J
140         veor            t0l, t0l, t0h
141         veor            t1l, t1l, t1h
142         veor            t2l, t2l, t2h           @ t2 = (N) (P4 + P5) << 24
143         vand            t2h, t2h, k16
144         veor            t3l, t3l, t3h           @ t3 = (K) (P6 + P7) << 32
145         vmov.i64        t3h, #0
146         vext.8          t0q, t0q, t0q, #15
147         veor            t2l, t2l, t2h
148         vext.8          t1q, t1q, t1q, #14
149         vmull.p8        \rq, \ad, \bd           @ D = A*B
150         vext.8          t2q, t2q, t2q, #13
151         vext.8          t3q, t3q, t3q, #12
152         veor            t0q, t0q, t1q
153         veor            t2q, t2q, t3q
154         veor            \rq, \rq, t0q
155         veor            \rq, \rq, t2q
156         .endm
157 
158         //
159         // PMULL (64x64->128) based reduction for CPUs that can do
160         // it in a single instruction.
161         //
162         .macro          __pmull_reduce_p64
163         vmull.p64       T1, XL_L, MASK
164 
165         veor            XH_L, XH_L, XM_H
166         vext.8          T1, T1, T1, #8
167         veor            XL_H, XL_H, XM_L
168         veor            T1, T1, XL
169 
170         vmull.p64       XL, T1_H, MASK
171         .endm
172 
173         //
174         // Alternative reduction for CPUs that lack support for the
175         // 64x64->128 PMULL instruction
176         //
177         .macro          __pmull_reduce_p8
178         veor            XL_H, XL_H, XM_L
179         veor            XH_L, XH_L, XM_H
180 
181         vshl.i64        T1, XL, #57
182         vshl.i64        T2, XL, #62
183         veor            T1, T1, T2
184         vshl.i64        T2, XL, #63
185         veor            T1, T1, T2
186         veor            XL_H, XL_H, T1_L
187         veor            XH_L, XH_L, T1_H
188 
189         vshr.u64        T1, XL, #1
190         veor            XH, XH, XL
191         veor            XL, XL, T1
192         vshr.u64        T1, T1, #6
193         vshr.u64        XL, XL, #1
194         .endm
195 
196         .macro          ghash_update, pn, enc, aggregate=1, head=1
197         vld1.64         {XL}, [r1]
198 
199         .if             \head
200         /* do the head block first, if supplied */
201         ldr             ip, [sp]
202         teq             ip, #0
203         beq             0f
204         vld1.64         {T1}, [ip]
205         teq             r0, #0
206         b               3f
207         .endif
208 
209 0:      .ifc            \pn, p64
210         .if             \aggregate
211         tst             r0, #3                  // skip until #blocks is a
212         bne             2f                      // round multiple of 4
213 
214         vld1.8          {XL2-XM2}, [r2]!
215 1:      vld1.8          {T2-T3}, [r2]!
216 
217         .ifnb           \enc
218         \enc\()_4x      XL2, XM2, T2, T3
219 
220         add             ip, r3, #16
221         vld1.64         {HH}, [ip, :128]!
222         vld1.64         {HH3-HH4}, [ip, :128]
223 
224         veor            SHASH2_p64, SHASH_L, SHASH_H
225         veor            SHASH2_H, HH_L, HH_H
226         veor            HH34_L, HH3_L, HH3_H
227         veor            HH34_H, HH4_L, HH4_H
228 
229         vmov.i8         MASK, #0xe1
230         vshl.u64        MASK, MASK, #57
231         .endif
232 
233         vrev64.8        XL2, XL2
234         vrev64.8        XM2, XM2
235 
236         subs            r0, r0, #4
237 
238         vext.8          T1, XL2, XL2, #8
239         veor            XL2_H, XL2_H, XL_L
240         veor            XL, XL, T1
241 
242         vrev64.8        T1, T3
243         vrev64.8        T3, T2
244 
245         vmull.p64       XH, HH4_H, XL_H                 // a1 * b1
246         veor            XL2_H, XL2_H, XL_H
247         vmull.p64       XL, HH4_L, XL_L                 // a0 * b0
248         vmull.p64       XM, HH34_H, XL2_H               // (a1 + a0)(b1 + b0)
249 
250         vmull.p64       XH2, HH3_H, XM2_L               // a1 * b1
251         veor            XM2_L, XM2_L, XM2_H
252         vmull.p64       XL2, HH3_L, XM2_H               // a0 * b0
253         vmull.p64       XM2, HH34_L, XM2_L              // (a1 + a0)(b1 + b0)
254 
255         veor            XH, XH, XH2
256         veor            XL, XL, XL2
257         veor            XM, XM, XM2
258 
259         vmull.p64       XH2, HH_H, T3_L                 // a1 * b1
260         veor            T3_L, T3_L, T3_H
261         vmull.p64       XL2, HH_L, T3_H                 // a0 * b0
262         vmull.p64       XM2, SHASH2_H, T3_L             // (a1 + a0)(b1 + b0)
263 
264         veor            XH, XH, XH2
265         veor            XL, XL, XL2
266         veor            XM, XM, XM2
267 
268         vmull.p64       XH2, SHASH_H, T1_L              // a1 * b1
269         veor            T1_L, T1_L, T1_H
270         vmull.p64       XL2, SHASH_L, T1_H              // a0 * b0
271         vmull.p64       XM2, SHASH2_p64, T1_L           // (a1 + a0)(b1 + b0)
272 
273         veor            XH, XH, XH2
274         veor            XL, XL, XL2
275         veor            XM, XM, XM2
276 
277         beq             4f
278 
279         vld1.8          {XL2-XM2}, [r2]!
280 
281         veor            T1, XL, XH
282         veor            XM, XM, T1
283 
284         __pmull_reduce_p64
285 
286         veor            T1, T1, XH
287         veor            XL, XL, T1
288 
289         b               1b
290         .endif
291         .endif
292 
293 2:      vld1.8          {T1}, [r2]!
294 
295         .ifnb           \enc
296         \enc\()_1x      T1
297         veor            SHASH2_p64, SHASH_L, SHASH_H
298         vmov.i8         MASK, #0xe1
299         vshl.u64        MASK, MASK, #57
300         .endif
301 
302         subs            r0, r0, #1
303 
304 3:      /* multiply XL by SHASH in GF(2^128) */
305         vrev64.8        T1, T1
306 
307         vext.8          IN1, T1, T1, #8
308         veor            T1_L, T1_L, XL_H
309         veor            XL, XL, IN1
310 
311         __pmull_\pn     XH, XL_H, SHASH_H, s1h, s2h, s3h, s4h   @ a1 * b1
312         veor            T1, T1, XL
313         __pmull_\pn     XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l   @ a0 * b0
314         __pmull_\pn     XM, T1_L, SHASH2_\pn                    @ (a1+a0)(b1+b0)
315 
316 4:      veor            T1, XL, XH
317         veor            XM, XM, T1
318 
319         __pmull_reduce_\pn
320 
321         veor            T1, T1, XH
322         veor            XL, XL, T1
323 
324         bne             0b
325         .endm
326 
327         /*
328          * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
329          *                         struct ghash_key const *k, const char *head)
330          */
331 ENTRY(pmull_ghash_update_p64)
332         vld1.64         {SHASH}, [r3]!
333         vld1.64         {HH}, [r3]!
334         vld1.64         {HH3-HH4}, [r3]
335 
336         veor            SHASH2_p64, SHASH_L, SHASH_H
337         veor            SHASH2_H, HH_L, HH_H
338         veor            HH34_L, HH3_L, HH3_H
339         veor            HH34_H, HH4_L, HH4_H
340 
341         vmov.i8         MASK, #0xe1
342         vshl.u64        MASK, MASK, #57
343 
344         ghash_update    p64
345         vst1.64         {XL}, [r1]
346 
347         bx              lr
348 ENDPROC(pmull_ghash_update_p64)
349 
350 ENTRY(pmull_ghash_update_p8)
351         vld1.64         {SHASH}, [r3]
352         veor            SHASH2_p8, SHASH_L, SHASH_H
353 
354         vext.8          s1l, SHASH_L, SHASH_L, #1
355         vext.8          s2l, SHASH_L, SHASH_L, #2
356         vext.8          s3l, SHASH_L, SHASH_L, #3
357         vext.8          s4l, SHASH_L, SHASH_L, #4
358         vext.8          s1h, SHASH_H, SHASH_H, #1
359         vext.8          s2h, SHASH_H, SHASH_H, #2
360         vext.8          s3h, SHASH_H, SHASH_H, #3
361         vext.8          s4h, SHASH_H, SHASH_H, #4
362 
363         vmov.i64        k16, #0xffff
364         vmov.i64        k32, #0xffffffff
365         vmov.i64        k48, #0xffffffffffff
366 
367         ghash_update    p8
368         vst1.64         {XL}, [r1]
369 
370         bx              lr
371 ENDPROC(pmull_ghash_update_p8)
372 
373         e0              .req    q9
374         e1              .req    q10
375         e2              .req    q11
376         e3              .req    q12
377         e0l             .req    d18
378         e0h             .req    d19
379         e2l             .req    d22
380         e2h             .req    d23
381         e3l             .req    d24
382         e3h             .req    d25
383         ctr             .req    q13
384         ctr0            .req    d26
385         ctr1            .req    d27
386 
387         ek0             .req    q14
388         ek1             .req    q15
389 
390         .macro          round, rk:req, regs:vararg
391         .irp            r, \regs
392         aese.8          \r, \rk
393         aesmc.8         \r, \r
394         .endr
395         .endm
396 
397         .macro          aes_encrypt, rkp, rounds, regs:vararg
398         vld1.8          {ek0-ek1}, [\rkp, :128]!
399         cmp             \rounds, #12
400         blt             .L\@                    // AES-128
401 
402         round           ek0, \regs
403         vld1.8          {ek0}, [\rkp, :128]!
404         round           ek1, \regs
405         vld1.8          {ek1}, [\rkp, :128]!
406 
407         beq             .L\@                    // AES-192
408 
409         round           ek0, \regs
410         vld1.8          {ek0}, [\rkp, :128]!
411         round           ek1, \regs
412         vld1.8          {ek1}, [\rkp, :128]!
413 
414 .L\@:   .rept           4
415         round           ek0, \regs
416         vld1.8          {ek0}, [\rkp, :128]!
417         round           ek1, \regs
418         vld1.8          {ek1}, [\rkp, :128]!
419         .endr
420 
421         round           ek0, \regs
422         vld1.8          {ek0}, [\rkp, :128]
423 
424         .irp            r, \regs
425         aese.8          \r, ek1
426         .endr
427         .irp            r, \regs
428         veor            \r, \r, ek0
429         .endr
430         .endm
431 
432 pmull_aes_encrypt:
433         add             ip, r5, #4
434         vld1.8          {ctr0}, [r5]            // load 12 byte IV
435         vld1.8          {ctr1}, [ip]
436         rev             r8, r7
437         vext.8          ctr1, ctr1, ctr1, #4
438         add             r7, r7, #1
439         vmov.32         ctr1[1], r8
440         vmov            e0, ctr
441 
442         add             ip, r3, #64
443         aes_encrypt     ip, r6, e0
444         bx              lr
445 ENDPROC(pmull_aes_encrypt)
446 
447 pmull_aes_encrypt_4x:
448         add             ip, r5, #4
449         vld1.8          {ctr0}, [r5]
450         vld1.8          {ctr1}, [ip]
451         rev             r8, r7
452         vext.8          ctr1, ctr1, ctr1, #4
453         add             r7, r7, #1
454         vmov.32         ctr1[1], r8
455         rev             ip, r7
456         vmov            e0, ctr
457         add             r7, r7, #1
458         vmov.32         ctr1[1], ip
459         rev             r8, r7
460         vmov            e1, ctr
461         add             r7, r7, #1
462         vmov.32         ctr1[1], r8
463         rev             ip, r7
464         vmov            e2, ctr
465         add             r7, r7, #1
466         vmov.32         ctr1[1], ip
467         vmov            e3, ctr
468 
469         add             ip, r3, #64
470         aes_encrypt     ip, r6, e0, e1, e2, e3
471         bx              lr
472 ENDPROC(pmull_aes_encrypt_4x)
473 
474 pmull_aes_encrypt_final:
475         add             ip, r5, #4
476         vld1.8          {ctr0}, [r5]
477         vld1.8          {ctr1}, [ip]
478         rev             r8, r7
479         vext.8          ctr1, ctr1, ctr1, #4
480         mov             r7, #1 << 24            // BE #1 for the tag
481         vmov.32         ctr1[1], r8
482         vmov            e0, ctr
483         vmov.32         ctr1[1], r7
484         vmov            e1, ctr
485 
486         add             ip, r3, #64
487         aes_encrypt     ip, r6, e0, e1
488         bx              lr
489 ENDPROC(pmull_aes_encrypt_final)
490 
491         .macro          enc_1x, in0
492         bl              pmull_aes_encrypt
493         veor            \in0, \in0, e0
494         vst1.8          {\in0}, [r4]!
495         .endm
496 
497         .macro          dec_1x, in0
498         bl              pmull_aes_encrypt
499         veor            e0, e0, \in0
500         vst1.8          {e0}, [r4]!
501         .endm
502 
503         .macro          enc_4x, in0, in1, in2, in3
504         bl              pmull_aes_encrypt_4x
505 
506         veor            \in0, \in0, e0
507         veor            \in1, \in1, e1
508         veor            \in2, \in2, e2
509         veor            \in3, \in3, e3
510 
511         vst1.8          {\in0-\in1}, [r4]!
512         vst1.8          {\in2-\in3}, [r4]!
513         .endm
514 
515         .macro          dec_4x, in0, in1, in2, in3
516         bl              pmull_aes_encrypt_4x
517 
518         veor            e0, e0, \in0
519         veor            e1, e1, \in1
520         veor            e2, e2, \in2
521         veor            e3, e3, \in3
522 
523         vst1.8          {e0-e1}, [r4]!
524         vst1.8          {e2-e3}, [r4]!
525         .endm
526 
527         /*
528          * void pmull_gcm_encrypt(int blocks, u64 dg[], const char *src,
529          *                        struct gcm_key const *k, char *dst,
530          *                        char *iv, int rounds, u32 counter)
531          */
532 ENTRY(pmull_gcm_encrypt)
533         push            {r4-r8, lr}
534         ldrd            r4, r5, [sp, #24]
535         ldrd            r6, r7, [sp, #32]
536 
537         vld1.64         {SHASH}, [r3]
538 
539         ghash_update    p64, enc, head=0
540         vst1.64         {XL}, [r1]
541 
542         pop             {r4-r8, pc}
543 ENDPROC(pmull_gcm_encrypt)
544 
545         /*
546          * void pmull_gcm_decrypt(int blocks, u64 dg[], const char *src,
547          *                        struct gcm_key const *k, char *dst,
548          *                        char *iv, int rounds, u32 counter)
549          */
550 ENTRY(pmull_gcm_decrypt)
551         push            {r4-r8, lr}
552         ldrd            r4, r5, [sp, #24]
553         ldrd            r6, r7, [sp, #32]
554 
555         vld1.64         {SHASH}, [r3]
556 
557         ghash_update    p64, dec, head=0
558         vst1.64         {XL}, [r1]
559 
560         pop             {r4-r8, pc}
561 ENDPROC(pmull_gcm_decrypt)
562 
563         /*
564          * void pmull_gcm_enc_final(int bytes, u64 dg[], char *tag,
565          *                          struct gcm_key const *k, char *head,
566          *                          char *iv, int rounds, u32 counter)
567          */
568 ENTRY(pmull_gcm_enc_final)
569         push            {r4-r8, lr}
570         ldrd            r4, r5, [sp, #24]
571         ldrd            r6, r7, [sp, #32]
572 
573         bl              pmull_aes_encrypt_final
574 
575         cmp             r0, #0
576         beq             .Lenc_final
577 
578         mov_l           ip, .Lpermute
579         sub             r4, r4, #16
580         add             r8, ip, r0
581         add             ip, ip, #32
582         add             r4, r4, r0
583         sub             ip, ip, r0
584 
585         vld1.8          {e3}, [r8]              // permute vector for key stream
586         vld1.8          {e2}, [ip]              // permute vector for ghash input
587 
588         vtbl.8          e3l, {e0}, e3l
589         vtbl.8          e3h, {e0}, e3h
590 
591         vld1.8          {e0}, [r4]              // encrypt tail block
592         veor            e0, e0, e3
593         vst1.8          {e0}, [r4]
594 
595         vtbl.8          T1_L, {e0}, e2l
596         vtbl.8          T1_H, {e0}, e2h
597 
598         vld1.64         {XL}, [r1]
599 .Lenc_final:
600         vld1.64         {SHASH}, [r3, :128]
601         vmov.i8         MASK, #0xe1
602         veor            SHASH2_p64, SHASH_L, SHASH_H
603         vshl.u64        MASK, MASK, #57
604         mov             r0, #1
605         bne             3f                      // process head block first
606         ghash_update    p64, aggregate=0, head=0
607 
608         vrev64.8        XL, XL
609         vext.8          XL, XL, XL, #8
610         veor            XL, XL, e1
611 
612         sub             r2, r2, #16             // rewind src pointer
613         vst1.8          {XL}, [r2]              // store tag
614 
615         pop             {r4-r8, pc}
616 ENDPROC(pmull_gcm_enc_final)
617 
618         /*
619          * int pmull_gcm_dec_final(int bytes, u64 dg[], char *tag,
620          *                         struct gcm_key const *k, char *head,
621          *                         char *iv, int rounds, u32 counter,
622          *                         const char *otag, int authsize)
623          */
624 ENTRY(pmull_gcm_dec_final)
625         push            {r4-r8, lr}
626         ldrd            r4, r5, [sp, #24]
627         ldrd            r6, r7, [sp, #32]
628 
629         bl              pmull_aes_encrypt_final
630 
631         cmp             r0, #0
632         beq             .Ldec_final
633 
634         mov_l           ip, .Lpermute
635         sub             r4, r4, #16
636         add             r8, ip, r0
637         add             ip, ip, #32
638         add             r4, r4, r0
639         sub             ip, ip, r0
640 
641         vld1.8          {e3}, [r8]              // permute vector for key stream
642         vld1.8          {e2}, [ip]              // permute vector for ghash input
643 
644         vtbl.8          e3l, {e0}, e3l
645         vtbl.8          e3h, {e0}, e3h
646 
647         vld1.8          {e0}, [r4]
648 
649         vtbl.8          T1_L, {e0}, e2l
650         vtbl.8          T1_H, {e0}, e2h
651 
652         veor            e0, e0, e3
653         vst1.8          {e0}, [r4]
654 
655         vld1.64         {XL}, [r1]
656 .Ldec_final:
657         vld1.64         {SHASH}, [r3]
658         vmov.i8         MASK, #0xe1
659         veor            SHASH2_p64, SHASH_L, SHASH_H
660         vshl.u64        MASK, MASK, #57
661         mov             r0, #1
662         bne             3f                      // process head block first
663         ghash_update    p64, aggregate=0, head=0
664 
665         vrev64.8        XL, XL
666         vext.8          XL, XL, XL, #8
667         veor            XL, XL, e1
668 
669         mov_l           ip, .Lpermute
670         ldrd            r2, r3, [sp, #40]       // otag and authsize
671         vld1.8          {T1}, [r2]
672         add             ip, ip, r3
673         vceq.i8         T1, T1, XL              // compare tags
674         vmvn            T1, T1                  // 0 for eq, -1 for ne
675 
676         vld1.8          {e0}, [ip]
677         vtbl.8          XL_L, {T1}, e0l         // keep authsize bytes only
678         vtbl.8          XL_H, {T1}, e0h
679 
680         vpmin.s8        XL_L, XL_L, XL_H        // take the minimum s8 across the vector
681         vpmin.s8        XL_L, XL_L, XL_L
682         vmov.32         r0, XL_L[0]             // fail if != 0x0
683 
684         pop             {r4-r8, pc}
685 ENDPROC(pmull_gcm_dec_final)
686 
687         .section        ".rodata", "a", %progbits
688         .align          5
689 .Lpermute:
690         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
691         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
692         .byte           0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
693         .byte           0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
694         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
695         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php