~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/crypto/sm4-ce-gcm-core.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0-or-later */
  2 /*
  3  * SM4-GCM AEAD Algorithm using ARMv8 Crypto Extensions
  4  * as specified in rfc8998
  5  * https://datatracker.ietf.org/doc/html/rfc8998
  6  *
  7  * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
  8  * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
  9  */
 10 
 11 #include <linux/linkage.h>
 12 #include <linux/cfi_types.h>
 13 #include <asm/assembler.h>
 14 #include "sm4-ce-asm.h"
 15 
 16 .arch   armv8-a+crypto
 17 
 18 .irp b, 0, 1, 2, 3, 24, 25, 26, 27, 28, 29, 30, 31
 19         .set .Lv\b\().4s, \b
 20 .endr
 21 
 22 .macro sm4e, vd, vn
 23         .inst 0xcec08400 | (.L\vn << 5) | .L\vd
 24 .endm
 25 
 26 /* Register macros */
 27 
 28 /* Used for both encryption and decryption */
 29 #define RHASH   v21
 30 #define RRCONST v22
 31 #define RZERO   v23
 32 
 33 /* Helper macros. */
 34 
 35 /*
 36  * input: m0, m1
 37  * output: r0:r1 (low 128-bits in r0, high in r1)
 38  */
 39 #define PMUL_128x128(r0, r1, m0, m1, T0, T1)                    \
 40                 ext             T0.16b, m1.16b, m1.16b, #8;     \
 41                 pmull           r0.1q, m0.1d, m1.1d;            \
 42                 pmull           T1.1q, m0.1d, T0.1d;            \
 43                 pmull2          T0.1q, m0.2d, T0.2d;            \
 44                 pmull2          r1.1q, m0.2d, m1.2d;            \
 45                 eor             T0.16b, T0.16b, T1.16b;         \
 46                 ext             T1.16b, RZERO.16b, T0.16b, #8;  \
 47                 ext             T0.16b, T0.16b, RZERO.16b, #8;  \
 48                 eor             r0.16b, r0.16b, T1.16b;         \
 49                 eor             r1.16b, r1.16b, T0.16b;
 50 
 51 #define PMUL_128x128_4x(r0, r1, m0, m1, T0, T1,                 \
 52                         r2, r3, m2, m3, T2, T3,                 \
 53                         r4, r5, m4, m5, T4, T5,                 \
 54                         r6, r7, m6, m7, T6, T7)                 \
 55                 ext             T0.16b, m1.16b, m1.16b, #8;     \
 56                 ext             T2.16b, m3.16b, m3.16b, #8;     \
 57                 ext             T4.16b, m5.16b, m5.16b, #8;     \
 58                 ext             T6.16b, m7.16b, m7.16b, #8;     \
 59                 pmull           r0.1q, m0.1d, m1.1d;            \
 60                 pmull           r2.1q, m2.1d, m3.1d;            \
 61                 pmull           r4.1q, m4.1d, m5.1d;            \
 62                 pmull           r6.1q, m6.1d, m7.1d;            \
 63                 pmull           T1.1q, m0.1d, T0.1d;            \
 64                 pmull           T3.1q, m2.1d, T2.1d;            \
 65                 pmull           T5.1q, m4.1d, T4.1d;            \
 66                 pmull           T7.1q, m6.1d, T6.1d;            \
 67                 pmull2          T0.1q, m0.2d, T0.2d;            \
 68                 pmull2          T2.1q, m2.2d, T2.2d;            \
 69                 pmull2          T4.1q, m4.2d, T4.2d;            \
 70                 pmull2          T6.1q, m6.2d, T6.2d;            \
 71                 pmull2          r1.1q, m0.2d, m1.2d;            \
 72                 pmull2          r3.1q, m2.2d, m3.2d;            \
 73                 pmull2          r5.1q, m4.2d, m5.2d;            \
 74                 pmull2          r7.1q, m6.2d, m7.2d;            \
 75                 eor             T0.16b, T0.16b, T1.16b;         \
 76                 eor             T2.16b, T2.16b, T3.16b;         \
 77                 eor             T4.16b, T4.16b, T5.16b;         \
 78                 eor             T6.16b, T6.16b, T7.16b;         \
 79                 ext             T1.16b, RZERO.16b, T0.16b, #8;  \
 80                 ext             T3.16b, RZERO.16b, T2.16b, #8;  \
 81                 ext             T5.16b, RZERO.16b, T4.16b, #8;  \
 82                 ext             T7.16b, RZERO.16b, T6.16b, #8;  \
 83                 ext             T0.16b, T0.16b, RZERO.16b, #8;  \
 84                 ext             T2.16b, T2.16b, RZERO.16b, #8;  \
 85                 ext             T4.16b, T4.16b, RZERO.16b, #8;  \
 86                 ext             T6.16b, T6.16b, RZERO.16b, #8;  \
 87                 eor             r0.16b, r0.16b, T1.16b;         \
 88                 eor             r2.16b, r2.16b, T3.16b;         \
 89                 eor             r4.16b, r4.16b, T5.16b;         \
 90                 eor             r6.16b, r6.16b, T7.16b;         \
 91                 eor             r1.16b, r1.16b, T0.16b;         \
 92                 eor             r3.16b, r3.16b, T2.16b;         \
 93                 eor             r5.16b, r5.16b, T4.16b;         \
 94                 eor             r7.16b, r7.16b, T6.16b;
 95 
 96 /*
 97  * input: r0:r1 (low 128-bits in r0, high in r1)
 98  * output: a
 99  */
100 #define REDUCTION(a, r0, r1, rconst, T0, T1)                    \
101                 pmull2          T0.1q, r1.2d, rconst.2d;        \
102                 ext             T1.16b, T0.16b, RZERO.16b, #8;  \
103                 ext             T0.16b, RZERO.16b, T0.16b, #8;  \
104                 eor             r1.16b, r1.16b, T1.16b;         \
105                 eor             r0.16b, r0.16b, T0.16b;         \
106                 pmull           T0.1q, r1.1d, rconst.1d;        \
107                 eor             a.16b, r0.16b, T0.16b;
108 
109 #define SM4_CRYPT_PMUL_128x128_BLK(b0, r0, r1, m0, m1, T0, T1)  \
110         rev32                   b0.16b, b0.16b;                 \
111                 ext             T0.16b, m1.16b, m1.16b, #8;     \
112         sm4e                    b0.4s, v24.4s;                  \
113                 pmull           r0.1q, m0.1d, m1.1d;            \
114         sm4e                    b0.4s, v25.4s;                  \
115                 pmull           T1.1q, m0.1d, T0.1d;            \
116         sm4e                    b0.4s, v26.4s;                  \
117                 pmull2          T0.1q, m0.2d, T0.2d;            \
118         sm4e                    b0.4s, v27.4s;                  \
119                 pmull2          r1.1q, m0.2d, m1.2d;            \
120         sm4e                    b0.4s, v28.4s;                  \
121                 eor             T0.16b, T0.16b, T1.16b;         \
122         sm4e                    b0.4s, v29.4s;                  \
123                 ext             T1.16b, RZERO.16b, T0.16b, #8;  \
124         sm4e                    b0.4s, v30.4s;                  \
125                 ext             T0.16b, T0.16b, RZERO.16b, #8;  \
126         sm4e                    b0.4s, v31.4s;                  \
127                 eor             r0.16b, r0.16b, T1.16b;         \
128         rev64                   b0.4s, b0.4s;                   \
129                 eor             r1.16b, r1.16b, T0.16b;         \
130         ext                     b0.16b, b0.16b, b0.16b, #8;     \
131         rev32                   b0.16b, b0.16b;
132 
133 #define SM4_CRYPT_PMUL_128x128_BLK3(b0, b1, b2,                 \
134                                     r0, r1, m0, m1, T0, T1,     \
135                                     r2, r3, m2, m3, T2, T3,     \
136                                     r4, r5, m4, m5, T4, T5)     \
137         rev32                   b0.16b, b0.16b;                 \
138         rev32                   b1.16b, b1.16b;                 \
139         rev32                   b2.16b, b2.16b;                 \
140                 ext             T0.16b, m1.16b, m1.16b, #8;     \
141                 ext             T2.16b, m3.16b, m3.16b, #8;     \
142                 ext             T4.16b, m5.16b, m5.16b, #8;     \
143         sm4e                    b0.4s, v24.4s;                  \
144         sm4e                    b1.4s, v24.4s;                  \
145         sm4e                    b2.4s, v24.4s;                  \
146                 pmull           r0.1q, m0.1d, m1.1d;            \
147                 pmull           r2.1q, m2.1d, m3.1d;            \
148                 pmull           r4.1q, m4.1d, m5.1d;            \
149         sm4e                    b0.4s, v25.4s;                  \
150         sm4e                    b1.4s, v25.4s;                  \
151         sm4e                    b2.4s, v25.4s;                  \
152                 pmull           T1.1q, m0.1d, T0.1d;            \
153                 pmull           T3.1q, m2.1d, T2.1d;            \
154                 pmull           T5.1q, m4.1d, T4.1d;            \
155         sm4e                    b0.4s, v26.4s;                  \
156         sm4e                    b1.4s, v26.4s;                  \
157         sm4e                    b2.4s, v26.4s;                  \
158                 pmull2          T0.1q, m0.2d, T0.2d;            \
159                 pmull2          T2.1q, m2.2d, T2.2d;            \
160                 pmull2          T4.1q, m4.2d, T4.2d;            \
161         sm4e                    b0.4s, v27.4s;                  \
162         sm4e                    b1.4s, v27.4s;                  \
163         sm4e                    b2.4s, v27.4s;                  \
164                 pmull2          r1.1q, m0.2d, m1.2d;            \
165                 pmull2          r3.1q, m2.2d, m3.2d;            \
166                 pmull2          r5.1q, m4.2d, m5.2d;            \
167         sm4e                    b0.4s, v28.4s;                  \
168         sm4e                    b1.4s, v28.4s;                  \
169         sm4e                    b2.4s, v28.4s;                  \
170                 eor             T0.16b, T0.16b, T1.16b;         \
171                 eor             T2.16b, T2.16b, T3.16b;         \
172                 eor             T4.16b, T4.16b, T5.16b;         \
173         sm4e                    b0.4s, v29.4s;                  \
174         sm4e                    b1.4s, v29.4s;                  \
175         sm4e                    b2.4s, v29.4s;                  \
176                 ext             T1.16b, RZERO.16b, T0.16b, #8;  \
177                 ext             T3.16b, RZERO.16b, T2.16b, #8;  \
178                 ext             T5.16b, RZERO.16b, T4.16b, #8;  \
179         sm4e                    b0.4s, v30.4s;                  \
180         sm4e                    b1.4s, v30.4s;                  \
181         sm4e                    b2.4s, v30.4s;                  \
182                 ext             T0.16b, T0.16b, RZERO.16b, #8;  \
183                 ext             T2.16b, T2.16b, RZERO.16b, #8;  \
184                 ext             T4.16b, T4.16b, RZERO.16b, #8;  \
185         sm4e                    b0.4s, v31.4s;                  \
186         sm4e                    b1.4s, v31.4s;                  \
187         sm4e                    b2.4s, v31.4s;                  \
188                 eor             r0.16b, r0.16b, T1.16b;         \
189                 eor             r2.16b, r2.16b, T3.16b;         \
190                 eor             r4.16b, r4.16b, T5.16b;         \
191         rev64                   b0.4s, b0.4s;                   \
192         rev64                   b1.4s, b1.4s;                   \
193         rev64                   b2.4s, b2.4s;                   \
194                 eor             r1.16b, r1.16b, T0.16b;         \
195                 eor             r3.16b, r3.16b, T2.16b;         \
196                 eor             r5.16b, r5.16b, T4.16b;         \
197         ext                     b0.16b, b0.16b, b0.16b, #8;     \
198         ext                     b1.16b, b1.16b, b1.16b, #8;     \
199         ext                     b2.16b, b2.16b, b2.16b, #8;     \
200                 eor             r0.16b, r0.16b, r2.16b;         \
201                 eor             r1.16b, r1.16b, r3.16b;         \
202         rev32                   b0.16b, b0.16b;                 \
203         rev32                   b1.16b, b1.16b;                 \
204         rev32                   b2.16b, b2.16b;                 \
205                 eor             r0.16b, r0.16b, r4.16b;         \
206                 eor             r1.16b, r1.16b, r5.16b;
207 
208 #define inc32_le128(vctr)                                       \
209                 mov             vctr.d[1], x9;                  \
210                 add             w6, w9, #1;                     \
211                 mov             vctr.d[0], x8;                  \
212                 bfi             x9, x6, #0, #32;                \
213                 rev64           vctr.16b, vctr.16b;
214 
215 #define GTAG_HASH_LENGTHS(vctr0, vlen)                                  \
216                 ld1             {vlen.16b}, [x7];                       \
217                 /* construct CTR0 */                                    \
218                 /* the lower 32-bits of initial IV is always be32(1) */ \
219                 mov             x6, #0x1;                               \
220                 bfi             x9, x6, #0, #32;                        \
221                 mov             vctr0.d[0], x8;                         \
222                 mov             vctr0.d[1], x9;                         \
223                 rbit            vlen.16b, vlen.16b;                     \
224                 rev64           vctr0.16b, vctr0.16b;                   \
225                 /* authtag = GCTR(CTR0, GHASH) */                       \
226                 eor             RHASH.16b, RHASH.16b, vlen.16b;         \
227                 SM4_CRYPT_PMUL_128x128_BLK(vctr0, RR0, RR1, RHASH, RH1, \
228                                            RTMP0, RTMP1);               \
229                 REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3);      \
230                 rbit            RHASH.16b, RHASH.16b;                   \
231                 eor             RHASH.16b, RHASH.16b, vctr0.16b;
232 
233 
234 /* Register macros for encrypt and ghash */
235 
236 /* can be the same as input v0-v3 */
237 #define RR1     v0
238 #define RR3     v1
239 #define RR5     v2
240 #define RR7     v3
241 
242 #define RR0     v4
243 #define RR2     v5
244 #define RR4     v6
245 #define RR6     v7
246 
247 #define RTMP0   v8
248 #define RTMP1   v9
249 #define RTMP2   v10
250 #define RTMP3   v11
251 #define RTMP4   v12
252 #define RTMP5   v13
253 #define RTMP6   v14
254 #define RTMP7   v15
255 
256 #define RH1     v16
257 #define RH2     v17
258 #define RH3     v18
259 #define RH4     v19
260 
261 .align 3
262 SYM_FUNC_START(sm4_ce_pmull_ghash_setup)
263         /* input:
264          *   x0: round key array, CTX
265          *   x1: ghash table
266          */
267         SM4_PREPARE(x0)
268 
269         adr_l           x2, .Lghash_rconst
270         ld1r            {RRCONST.2d}, [x2]
271 
272         eor             RZERO.16b, RZERO.16b, RZERO.16b
273 
274         /* H = E(K, 0^128) */
275         rev32           v0.16b, RZERO.16b
276         SM4_CRYPT_BLK_BE(v0)
277 
278         /* H ^ 1 */
279         rbit            RH1.16b, v0.16b
280 
281         /* H ^ 2 */
282         PMUL_128x128(RR0, RR1, RH1, RH1, RTMP0, RTMP1)
283         REDUCTION(RH2, RR0, RR1, RRCONST, RTMP2, RTMP3)
284 
285         /* H ^ 3 */
286         PMUL_128x128(RR0, RR1, RH2, RH1, RTMP0, RTMP1)
287         REDUCTION(RH3, RR0, RR1, RRCONST, RTMP2, RTMP3)
288 
289         /* H ^ 4 */
290         PMUL_128x128(RR0, RR1, RH2, RH2, RTMP0, RTMP1)
291         REDUCTION(RH4, RR0, RR1, RRCONST, RTMP2, RTMP3)
292 
293         st1             {RH1.16b-RH4.16b}, [x1]
294 
295         ret
296 SYM_FUNC_END(sm4_ce_pmull_ghash_setup)
297 
298 .align 3
299 SYM_FUNC_START(pmull_ghash_update)
300         /* input:
301          *   x0: ghash table
302          *   x1: ghash result
303          *   x2: src
304          *   w3: nblocks
305          */
306         ld1             {RH1.16b-RH4.16b}, [x0]
307 
308         ld1             {RHASH.16b}, [x1]
309         rbit            RHASH.16b, RHASH.16b
310 
311         adr_l           x4, .Lghash_rconst
312         ld1r            {RRCONST.2d}, [x4]
313 
314         eor             RZERO.16b, RZERO.16b, RZERO.16b
315 
316 .Lghash_loop_4x:
317         cmp             w3, #4
318         blt             .Lghash_loop_1x
319 
320         sub             w3, w3, #4
321 
322         ld1             {v0.16b-v3.16b}, [x2], #64
323 
324         rbit            v0.16b, v0.16b
325         rbit            v1.16b, v1.16b
326         rbit            v2.16b, v2.16b
327         rbit            v3.16b, v3.16b
328 
329         /*
330          * (in0 ^ HASH) * H^4 => rr0:rr1
331          * (in1)        * H^3 => rr2:rr3
332          * (in2)        * H^2 => rr4:rr5
333          * (in3)        * H^1 => rr6:rr7
334          */
335         eor             RHASH.16b, RHASH.16b, v0.16b
336 
337         PMUL_128x128_4x(RR0, RR1, RHASH, RH4, RTMP0, RTMP1,
338                         RR2, RR3, v1, RH3, RTMP2, RTMP3,
339                         RR4, RR5, v2, RH2, RTMP4, RTMP5,
340                         RR6, RR7, v3, RH1, RTMP6, RTMP7)
341 
342         eor             RR0.16b, RR0.16b, RR2.16b
343         eor             RR1.16b, RR1.16b, RR3.16b
344         eor             RR0.16b, RR0.16b, RR4.16b
345         eor             RR1.16b, RR1.16b, RR5.16b
346         eor             RR0.16b, RR0.16b, RR6.16b
347         eor             RR1.16b, RR1.16b, RR7.16b
348 
349         REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1)
350 
351         cbz             w3, .Lghash_end
352         b               .Lghash_loop_4x
353 
354 .Lghash_loop_1x:
355         sub             w3, w3, #1
356 
357         ld1             {v0.16b}, [x2], #16
358         rbit            v0.16b, v0.16b
359         eor             RHASH.16b, RHASH.16b, v0.16b
360 
361         PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
362         REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
363 
364         cbnz            w3, .Lghash_loop_1x
365 
366 .Lghash_end:
367         rbit            RHASH.16b, RHASH.16b
368         st1             {RHASH.2d}, [x1]
369 
370         ret
371 SYM_FUNC_END(pmull_ghash_update)
372 
373 .align 3
374 SYM_TYPED_FUNC_START(sm4_ce_pmull_gcm_enc)
375         /* input:
376          *   x0: round key array, CTX
377          *   x1: dst
378          *   x2: src
379          *   x3: ctr (big endian, 128 bit)
380          *   w4: nbytes
381          *   x5: ghash result
382          *   x6: ghash table
383          *   x7: lengths (only for last block)
384          */
385         SM4_PREPARE(x0)
386 
387         ldp             x8, x9, [x3]
388         rev             x8, x8
389         rev             x9, x9
390 
391         ld1             {RH1.16b-RH4.16b}, [x6]
392 
393         ld1             {RHASH.16b}, [x5]
394         rbit            RHASH.16b, RHASH.16b
395 
396         adr_l           x6, .Lghash_rconst
397         ld1r            {RRCONST.2d}, [x6]
398 
399         eor             RZERO.16b, RZERO.16b, RZERO.16b
400 
401         cbz             w4, .Lgcm_enc_hash_len
402 
403 .Lgcm_enc_loop_4x:
404         cmp             w4, #(4 * 16)
405         blt             .Lgcm_enc_loop_1x
406 
407         sub             w4, w4, #(4 * 16)
408 
409         /* construct CTRs */
410         inc32_le128(v0)                 /* +0 */
411         inc32_le128(v1)                 /* +1 */
412         inc32_le128(v2)                 /* +2 */
413         inc32_le128(v3)                 /* +3 */
414 
415         ld1             {RTMP0.16b-RTMP3.16b}, [x2], #64
416 
417         SM4_CRYPT_BLK4(v0, v1, v2, v3)
418 
419         eor             v0.16b, v0.16b, RTMP0.16b
420         eor             v1.16b, v1.16b, RTMP1.16b
421         eor             v2.16b, v2.16b, RTMP2.16b
422         eor             v3.16b, v3.16b, RTMP3.16b
423         st1             {v0.16b-v3.16b}, [x1], #64
424 
425         /* ghash update */
426 
427         rbit            v0.16b, v0.16b
428         rbit            v1.16b, v1.16b
429         rbit            v2.16b, v2.16b
430         rbit            v3.16b, v3.16b
431 
432         /*
433          * (in0 ^ HASH) * H^4 => rr0:rr1
434          * (in1)        * H^3 => rr2:rr3
435          * (in2)        * H^2 => rr4:rr5
436          * (in3)        * H^1 => rr6:rr7
437          */
438         eor             RHASH.16b, RHASH.16b, v0.16b
439 
440         PMUL_128x128_4x(RR0, RR1, RHASH, RH4, RTMP0, RTMP1,
441                         RR2, RR3, v1, RH3, RTMP2, RTMP3,
442                         RR4, RR5, v2, RH2, RTMP4, RTMP5,
443                         RR6, RR7, v3, RH1, RTMP6, RTMP7)
444 
445         eor             RR0.16b, RR0.16b, RR2.16b
446         eor             RR1.16b, RR1.16b, RR3.16b
447         eor             RR0.16b, RR0.16b, RR4.16b
448         eor             RR1.16b, RR1.16b, RR5.16b
449         eor             RR0.16b, RR0.16b, RR6.16b
450         eor             RR1.16b, RR1.16b, RR7.16b
451 
452         REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1)
453 
454         cbz             w4, .Lgcm_enc_hash_len
455         b               .Lgcm_enc_loop_4x
456 
457 .Lgcm_enc_loop_1x:
458         cmp             w4, #16
459         blt             .Lgcm_enc_tail
460 
461         sub             w4, w4, #16
462 
463         /* construct CTRs */
464         inc32_le128(v0)
465 
466         ld1             {RTMP0.16b}, [x2], #16
467 
468         SM4_CRYPT_BLK(v0)
469 
470         eor             v0.16b, v0.16b, RTMP0.16b
471         st1             {v0.16b}, [x1], #16
472 
473         /* ghash update */
474         rbit            v0.16b, v0.16b
475         eor             RHASH.16b, RHASH.16b, v0.16b
476         PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
477         REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
478 
479         cbz             w4, .Lgcm_enc_hash_len
480         b               .Lgcm_enc_loop_1x
481 
482 .Lgcm_enc_tail:
483         /* construct CTRs */
484         inc32_le128(v0)
485         SM4_CRYPT_BLK(v0)
486 
487         /* load permute table */
488         adr_l           x0, .Lcts_permute_table
489         add             x0, x0, #32
490         sub             x0, x0, w4, uxtw
491         ld1             {v3.16b}, [x0]
492 
493 .Lgcm_enc_tail_loop:
494         /* do encrypt */
495         ldrb            w0, [x2], #1    /* get 1 byte from input */
496         umov            w6, v0.b[0]     /* get top crypted byte */
497         eor             w6, w6, w0      /* w6 = CTR ^ input */
498         strb            w6, [x1], #1    /* store out byte */
499 
500         /* shift right out one byte */
501         ext             v0.16b, v0.16b, v0.16b, #1
502         /* the last ciphertext is placed in high bytes */
503         ins             v0.b[15], w6
504 
505         subs            w4, w4, #1
506         bne             .Lgcm_enc_tail_loop
507 
508         /* padding last block with zeros */
509         tbl             v0.16b, {v0.16b}, v3.16b
510 
511         /* ghash update */
512         rbit            v0.16b, v0.16b
513         eor             RHASH.16b, RHASH.16b, v0.16b
514         PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
515         REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
516 
517 .Lgcm_enc_hash_len:
518         cbz             x7, .Lgcm_enc_end
519 
520         GTAG_HASH_LENGTHS(v1, v3)
521 
522         b               .Lgcm_enc_ret
523 
524 .Lgcm_enc_end:
525         /* store new CTR */
526         rev             x8, x8
527         rev             x9, x9
528         stp             x8, x9, [x3]
529 
530         rbit            RHASH.16b, RHASH.16b
531 
532 .Lgcm_enc_ret:
533         /* store new MAC */
534         st1             {RHASH.2d}, [x5]
535 
536         ret
537 SYM_FUNC_END(sm4_ce_pmull_gcm_enc)
538 
539 #undef  RR1
540 #undef  RR3
541 #undef  RR5
542 #undef  RR7
543 #undef  RR0
544 #undef  RR2
545 #undef  RR4
546 #undef  RR6
547 #undef RTMP0
548 #undef RTMP1
549 #undef RTMP2
550 #undef RTMP3
551 #undef RTMP4
552 #undef RTMP5
553 #undef RTMP6
554 #undef RTMP7
555 #undef  RH1
556 #undef  RH2
557 #undef  RH3
558 #undef  RH4
559 
560 
561 /* Register macros for decrypt */
562 
563 /* v0-v2 for building CTRs, v3-v5 for saving inputs */
564 
565 #define RR1     v6
566 #define RR3     v7
567 #define RR5     v8
568 
569 #define RR0     v9
570 #define RR2     v10
571 #define RR4     v11
572 
573 #define RTMP0   v12
574 #define RTMP1   v13
575 #define RTMP2   v14
576 #define RTMP3   v15
577 #define RTMP4   v16
578 #define RTMP5   v17
579 
580 #define RH1     v18
581 #define RH2     v19
582 #define RH3     v20
583 
584 .align 3
585 SYM_TYPED_FUNC_START(sm4_ce_pmull_gcm_dec)
586         /* input:
587          *   x0: round key array, CTX
588          *   x1: dst
589          *   x2: src
590          *   x3: ctr (big endian, 128 bit)
591          *   w4: nbytes
592          *   x5: ghash result
593          *   x6: ghash table
594          *   x7: lengths (only for last block)
595          */
596         SM4_PREPARE(x0)
597 
598         ldp             x8, x9, [x3]
599         rev             x8, x8
600         rev             x9, x9
601 
602         ld1             {RH1.16b-RH3.16b}, [x6]
603 
604         ld1             {RHASH.16b}, [x5]
605         rbit            RHASH.16b, RHASH.16b
606 
607         adr_l           x6, .Lghash_rconst
608         ld1r            {RRCONST.2d}, [x6]
609 
610         eor             RZERO.16b, RZERO.16b, RZERO.16b
611 
612         cbz             w4, .Lgcm_dec_hash_len
613 
614 .Lgcm_dec_loop_3x:
615         cmp             w4, #(3 * 16)
616         blt             .Lgcm_dec_loop_1x
617 
618         sub             w4, w4, #(3 * 16)
619 
620         ld1             {v3.16b-v5.16b}, [x2], #(3 * 16)
621 
622         /* construct CTRs */
623         inc32_le128(v0)                 /* +0 */
624         rbit            v6.16b, v3.16b
625         inc32_le128(v1)                 /* +1 */
626         rbit            v7.16b, v4.16b
627         inc32_le128(v2)                 /* +2 */
628         rbit            v8.16b, v5.16b
629 
630         eor             RHASH.16b, RHASH.16b, v6.16b
631 
632         /* decrypt & ghash update */
633         SM4_CRYPT_PMUL_128x128_BLK3(v0, v1, v2,
634                                     RR0, RR1, RHASH, RH3, RTMP0, RTMP1,
635                                     RR2, RR3, v7, RH2, RTMP2, RTMP3,
636                                     RR4, RR5, v8, RH1, RTMP4, RTMP5)
637 
638         eor             v0.16b, v0.16b, v3.16b
639         eor             v1.16b, v1.16b, v4.16b
640         eor             v2.16b, v2.16b, v5.16b
641 
642         REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1)
643 
644         st1             {v0.16b-v2.16b}, [x1], #(3 * 16)
645 
646         cbz             w4, .Lgcm_dec_hash_len
647         b               .Lgcm_dec_loop_3x
648 
649 .Lgcm_dec_loop_1x:
650         cmp             w4, #16
651         blt             .Lgcm_dec_tail
652 
653         sub             w4, w4, #16
654 
655         ld1             {v3.16b}, [x2], #16
656 
657         /* construct CTRs */
658         inc32_le128(v0)
659         rbit            v6.16b, v3.16b
660 
661         eor             RHASH.16b, RHASH.16b, v6.16b
662 
663         SM4_CRYPT_PMUL_128x128_BLK(v0, RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
664 
665         eor             v0.16b, v0.16b, v3.16b
666 
667         REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
668 
669         st1             {v0.16b}, [x1], #16
670 
671         cbz             w4, .Lgcm_dec_hash_len
672         b               .Lgcm_dec_loop_1x
673 
674 .Lgcm_dec_tail:
675         /* construct CTRs */
676         inc32_le128(v0)
677         SM4_CRYPT_BLK(v0)
678 
679         /* load permute table */
680         adr_l           x0, .Lcts_permute_table
681         add             x0, x0, #32
682         sub             x0, x0, w4, uxtw
683         ld1             {v3.16b}, [x0]
684 
685 .Lgcm_dec_tail_loop:
686         /* do decrypt */
687         ldrb            w0, [x2], #1    /* get 1 byte from input */
688         umov            w6, v0.b[0]     /* get top crypted byte */
689         eor             w6, w6, w0      /* w6 = CTR ^ input */
690         strb            w6, [x1], #1    /* store out byte */
691 
692         /* shift right out one byte */
693         ext             v0.16b, v0.16b, v0.16b, #1
694         /* the last ciphertext is placed in high bytes */
695         ins             v0.b[15], w0
696 
697         subs            w4, w4, #1
698         bne             .Lgcm_dec_tail_loop
699 
700         /* padding last block with zeros */
701         tbl             v0.16b, {v0.16b}, v3.16b
702 
703         /* ghash update */
704         rbit            v0.16b, v0.16b
705         eor             RHASH.16b, RHASH.16b, v0.16b
706         PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
707         REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
708 
709 .Lgcm_dec_hash_len:
710         cbz             x7, .Lgcm_dec_end
711 
712         GTAG_HASH_LENGTHS(v1, v3)
713 
714         b               .Lgcm_dec_ret
715 
716 .Lgcm_dec_end:
717         /* store new CTR */
718         rev             x8, x8
719         rev             x9, x9
720         stp             x8, x9, [x3]
721 
722         rbit            RHASH.16b, RHASH.16b
723 
724 .Lgcm_dec_ret:
725         /* store new MAC */
726         st1             {RHASH.2d}, [x5]
727 
728         ret
729 SYM_FUNC_END(sm4_ce_pmull_gcm_dec)
730 
731         .section        ".rodata", "a"
732         .align 4
733 .Lcts_permute_table:
734         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
735         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
736         .byte            0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
737         .byte            0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
738         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
739         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
740 
741 .Lghash_rconst:
742         .quad           0x87

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php