~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/crypto/sm3-neon-core.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 /*
  3  * sm3-neon-core.S - SM3 secure hash using NEON instructions
  4  *
  5  * Linux/arm64 port of the libgcrypt SM3 implementation for AArch64
  6  *
  7  * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
  8  * Copyright (c) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
  9  */
 10 
 11 #include <linux/linkage.h>
 12 #include <linux/cfi_types.h>
 13 #include <asm/assembler.h>
 14 
 15 /* Context structure */
 16 
 17 #define state_h0 0
 18 #define state_h1 4
 19 #define state_h2 8
 20 #define state_h3 12
 21 #define state_h4 16
 22 #define state_h5 20
 23 #define state_h6 24
 24 #define state_h7 28
 25 
 26 /* Stack structure */
 27 
 28 #define STACK_W_SIZE        (32 * 2 * 3)
 29 
 30 #define STACK_W             (0)
 31 #define STACK_SIZE          (STACK_W + STACK_W_SIZE)
 32 
 33 /* Register macros */
 34 
 35 #define RSTATE x0
 36 #define RDATA  x1
 37 #define RNBLKS x2
 38 #define RKPTR  x28
 39 #define RFRAME x29
 40 
 41 #define ra w3
 42 #define rb w4
 43 #define rc w5
 44 #define rd w6
 45 #define re w7
 46 #define rf w8
 47 #define rg w9
 48 #define rh w10
 49 
 50 #define t0 w11
 51 #define t1 w12
 52 #define t2 w13
 53 #define t3 w14
 54 #define t4 w15
 55 #define t5 w16
 56 #define t6 w17
 57 
 58 #define k_even w19
 59 #define k_odd w20
 60 
 61 #define addr0 x21
 62 #define addr1 x22
 63 
 64 #define s0 w23
 65 #define s1 w24
 66 #define s2 w25
 67 #define s3 w26
 68 
 69 #define W0 v0
 70 #define W1 v1
 71 #define W2 v2
 72 #define W3 v3
 73 #define W4 v4
 74 #define W5 v5
 75 
 76 #define XTMP0 v6
 77 #define XTMP1 v7
 78 #define XTMP2 v16
 79 #define XTMP3 v17
 80 #define XTMP4 v18
 81 #define XTMP5 v19
 82 #define XTMP6 v20
 83 
 84 /* Helper macros. */
 85 
 86 #define _(...) /*_*/
 87 
 88 #define clear_vec(x) \
 89         movi    x.8h, #0;
 90 
 91 #define rolw(o, a, n) \
 92         ror     o, a, #(32 - n);
 93 
 94 /* Round function macros. */
 95 
 96 #define GG1_1(x, y, z, o, t) \
 97         eor     o, x, y;
 98 #define GG1_2(x, y, z, o, t) \
 99         eor     o, o, z;
100 #define GG1_3(x, y, z, o, t)
101 
102 #define FF1_1(x, y, z, o, t) GG1_1(x, y, z, o, t)
103 #define FF1_2(x, y, z, o, t)
104 #define FF1_3(x, y, z, o, t) GG1_2(x, y, z, o, t)
105 
106 #define GG2_1(x, y, z, o, t) \
107         bic     o, z, x;
108 #define GG2_2(x, y, z, o, t) \
109         and     t, y, x;
110 #define GG2_3(x, y, z, o, t) \
111         eor     o, o, t;
112 
113 #define FF2_1(x, y, z, o, t) \
114         eor     o, x, y;
115 #define FF2_2(x, y, z, o, t) \
116         and     t, x, y; \
117         and     o, o, z;
118 #define FF2_3(x, y, z, o, t) \
119         eor     o, o, t;
120 
121 #define R(i, a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \
122         K_LOAD(round);                                                        \
123         ldr     t5, [sp, #(wtype##_W1_ADDR(round, widx))];                    \
124         rolw(t0, a, 12);                              /* rol(a, 12) => t0 */  \
125       IOP(1, iop_param);                                                      \
126         FF##i##_1(a, b, c, t1, t2);                                           \
127         ldr     t6, [sp, #(wtype##_W1W2_ADDR(round, widx))];                  \
128         add     k, k, e;                                                      \
129       IOP(2, iop_param);                                                      \
130         GG##i##_1(e, f, g, t3, t4);                                           \
131         FF##i##_2(a, b, c, t1, t2);                                           \
132       IOP(3, iop_param);                                                      \
133         add     k, k, t0;                                                     \
134         add     h, h, t5;                                                     \
135         add     d, d, t6;                     /* w1w2 + d => d */             \
136       IOP(4, iop_param);                                                      \
137         rolw(k, k, 7);                        /* rol (t0 + e + t), 7) => k */ \
138         GG##i##_2(e, f, g, t3, t4);                                           \
139         add     h, h, k;                      /* h + w1 + k => h */           \
140       IOP(5, iop_param);                                                      \
141         FF##i##_3(a, b, c, t1, t2);                                           \
142         eor     t0, t0, k;                    /* k ^ t0 => t0 */              \
143         GG##i##_3(e, f, g, t3, t4);                                           \
144         add     d, d, t1;                     /* FF(a,b,c) + d => d */        \
145       IOP(6, iop_param);                                                      \
146         add     t3, t3, h;                    /* GG(e,f,g) + h => t3 */       \
147         rolw(b, b, 9);                        /* rol(b, 9) => b */            \
148         eor     h, t3, t3, ror #(32-9);                                       \
149       IOP(7, iop_param);                                                      \
150         add     d, d, t0;                     /* t0 + d => d */               \
151         rolw(f, f, 19);                       /* rol(f, 19) => f */           \
152       IOP(8, iop_param);                                                      \
153         eor     h, h, t3, ror #(32-17);       /* P0(t3) => h */
154 
155 #define R1(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \
156         R(1, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, wtype, IOP, iop_param)
157 
158 #define R2(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \
159         R(2, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, wtype, IOP, iop_param)
160 
161 #define KL(round) \
162         ldp     k_even, k_odd, [RKPTR, #(4*(round))];
163 
164 /* Input expansion macros. */
165 
166 /* Byte-swapped input address. */
167 #define IW_W_ADDR(round, widx, offs) \
168         (STACK_W + ((round) / 4) * 64 + (offs) + ((widx) * 4))
169 
170 /* Expanded input address. */
171 #define XW_W_ADDR(round, widx, offs) \
172         (STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4))
173 
174 /* Rounds 1-12, byte-swapped input block addresses. */
175 #define IW_W1_ADDR(round, widx)   IW_W_ADDR(round, widx, 32)
176 #define IW_W1W2_ADDR(round, widx) IW_W_ADDR(round, widx, 48)
177 
178 /* Rounds 1-12, expanded input block addresses. */
179 #define XW_W1_ADDR(round, widx)   XW_W_ADDR(round, widx, 0)
180 #define XW_W1W2_ADDR(round, widx) XW_W_ADDR(round, widx, 16)
181 
182 /* Input block loading.
183  * Interleaving within round function needed for in-order CPUs. */
184 #define LOAD_W_VEC_1_1() \
185         add     addr0, sp, #IW_W1_ADDR(0, 0);
186 #define LOAD_W_VEC_1_2() \
187         add     addr1, sp, #IW_W1_ADDR(4, 0);
188 #define LOAD_W_VEC_1_3() \
189         ld1     {W0.16b}, [RDATA], #16;
190 #define LOAD_W_VEC_1_4() \
191         ld1     {W1.16b}, [RDATA], #16;
192 #define LOAD_W_VEC_1_5() \
193         ld1     {W2.16b}, [RDATA], #16;
194 #define LOAD_W_VEC_1_6() \
195         ld1     {W3.16b}, [RDATA], #16;
196 #define LOAD_W_VEC_1_7() \
197         rev32   XTMP0.16b, W0.16b;
198 #define LOAD_W_VEC_1_8() \
199         rev32   XTMP1.16b, W1.16b;
200 #define LOAD_W_VEC_2_1() \
201         rev32   XTMP2.16b, W2.16b;
202 #define LOAD_W_VEC_2_2() \
203         rev32   XTMP3.16b, W3.16b;
204 #define LOAD_W_VEC_2_3() \
205         eor     XTMP4.16b, XTMP1.16b, XTMP0.16b;
206 #define LOAD_W_VEC_2_4() \
207         eor     XTMP5.16b, XTMP2.16b, XTMP1.16b;
208 #define LOAD_W_VEC_2_5() \
209         st1     {XTMP0.16b}, [addr0], #16;
210 #define LOAD_W_VEC_2_6() \
211         st1     {XTMP4.16b}, [addr0]; \
212         add     addr0, sp, #IW_W1_ADDR(8, 0);
213 #define LOAD_W_VEC_2_7() \
214         eor     XTMP6.16b, XTMP3.16b, XTMP2.16b;
215 #define LOAD_W_VEC_2_8() \
216         ext     W0.16b, XTMP0.16b, XTMP0.16b, #8;  /* W0: xx, w0, xx, xx */
217 #define LOAD_W_VEC_3_1() \
218         mov     W2.16b, XTMP1.16b;                 /* W2: xx, w6, w5, w4 */
219 #define LOAD_W_VEC_3_2() \
220         st1     {XTMP1.16b}, [addr1], #16;
221 #define LOAD_W_VEC_3_3() \
222         st1     {XTMP5.16b}, [addr1]; \
223         ext     W1.16b, XTMP0.16b, XTMP0.16b, #4;  /* W1: xx, w3, w2, w1 */
224 #define LOAD_W_VEC_3_4() \
225         ext     W3.16b, XTMP1.16b, XTMP2.16b, #12; /* W3: xx, w9, w8, w7 */
226 #define LOAD_W_VEC_3_5() \
227         ext     W4.16b, XTMP2.16b, XTMP3.16b, #8;  /* W4: xx, w12, w11, w10 */
228 #define LOAD_W_VEC_3_6() \
229         st1     {XTMP2.16b}, [addr0], #16;
230 #define LOAD_W_VEC_3_7() \
231         st1     {XTMP6.16b}, [addr0];
232 #define LOAD_W_VEC_3_8() \
233         ext     W5.16b, XTMP3.16b, XTMP3.16b, #4;  /* W5: xx, w15, w14, w13 */
234 
235 #define LOAD_W_VEC_1(iop_num, ...) \
236         LOAD_W_VEC_1_##iop_num()
237 #define LOAD_W_VEC_2(iop_num, ...) \
238         LOAD_W_VEC_2_##iop_num()
239 #define LOAD_W_VEC_3(iop_num, ...) \
240         LOAD_W_VEC_3_##iop_num()
241 
242 /* Message scheduling. Note: 3 words per vector register.
243  * Interleaving within round function needed for in-order CPUs. */
244 #define SCHED_W_1_1(round, w0, w1, w2, w3, w4, w5) \
245         /* Load (w[i - 16]) => XTMP0 */            \
246         /* Load (w[i - 13]) => XTMP5 */            \
247         ext     XTMP0.16b, w0.16b, w0.16b, #12;    /* XTMP0: w0, xx, xx, xx */
248 #define SCHED_W_1_2(round, w0, w1, w2, w3, w4, w5) \
249         ext     XTMP5.16b, w1.16b, w1.16b, #12;
250 #define SCHED_W_1_3(round, w0, w1, w2, w3, w4, w5) \
251         ext     XTMP0.16b, XTMP0.16b, w1.16b, #12; /* XTMP0: xx, w2, w1, w0 */
252 #define SCHED_W_1_4(round, w0, w1, w2, w3, w4, w5) \
253         ext     XTMP5.16b, XTMP5.16b, w2.16b, #12;
254 #define SCHED_W_1_5(round, w0, w1, w2, w3, w4, w5) \
255         /* w[i - 9] == w3 */                       \
256         /* W3 ^ XTMP0 => XTMP0 */                  \
257         eor     XTMP0.16b, XTMP0.16b, w3.16b;
258 #define SCHED_W_1_6(round, w0, w1, w2, w3, w4, w5) \
259         /* w[i - 3] == w5 */                       \
260         /* rol(XMM5, 15) ^ XTMP0 => XTMP0 */       \
261         /* rol(XTMP5, 7) => XTMP1 */               \
262         add     addr0, sp, #XW_W1_ADDR((round), 0); \
263         shl     XTMP2.4s, w5.4s, #15;
264 #define SCHED_W_1_7(round, w0, w1, w2, w3, w4, w5) \
265         shl     XTMP1.4s, XTMP5.4s, #7;
266 #define SCHED_W_1_8(round, w0, w1, w2, w3, w4, w5) \
267         sri     XTMP2.4s, w5.4s, #(32-15);
268 #define SCHED_W_2_1(round, w0, w1, w2, w3, w4, w5) \
269         sri     XTMP1.4s, XTMP5.4s, #(32-7);
270 #define SCHED_W_2_2(round, w0, w1, w2, w3, w4, w5) \
271         eor     XTMP0.16b, XTMP0.16b, XTMP2.16b;
272 #define SCHED_W_2_3(round, w0, w1, w2, w3, w4, w5) \
273         /* w[i - 6] == W4 */                       \
274         /* W4 ^ XTMP1 => XTMP1 */                  \
275         eor     XTMP1.16b, XTMP1.16b, w4.16b;
276 #define SCHED_W_2_4(round, w0, w1, w2, w3, w4, w5) \
277         /* P1(XTMP0) ^ XTMP1 => W0 */              \
278         shl     XTMP3.4s, XTMP0.4s, #15;
279 #define SCHED_W_2_5(round, w0, w1, w2, w3, w4, w5) \
280         shl     XTMP4.4s, XTMP0.4s, #23;
281 #define SCHED_W_2_6(round, w0, w1, w2, w3, w4, w5) \
282         eor     w0.16b, XTMP1.16b, XTMP0.16b;
283 #define SCHED_W_2_7(round, w0, w1, w2, w3, w4, w5) \
284         sri     XTMP3.4s, XTMP0.4s, #(32-15);
285 #define SCHED_W_2_8(round, w0, w1, w2, w3, w4, w5) \
286         sri     XTMP4.4s, XTMP0.4s, #(32-23);
287 #define SCHED_W_3_1(round, w0, w1, w2, w3, w4, w5) \
288         eor     w0.16b, w0.16b, XTMP3.16b;
289 #define SCHED_W_3_2(round, w0, w1, w2, w3, w4, w5) \
290         /* Load (w[i - 3]) => XTMP2 */             \
291         ext     XTMP2.16b, w4.16b, w4.16b, #12;
292 #define SCHED_W_3_3(round, w0, w1, w2, w3, w4, w5) \
293         eor     w0.16b, w0.16b, XTMP4.16b;
294 #define SCHED_W_3_4(round, w0, w1, w2, w3, w4, w5) \
295         ext     XTMP2.16b, XTMP2.16b, w5.16b, #12;
296 #define SCHED_W_3_5(round, w0, w1, w2, w3, w4, w5) \
297         /* W1 ^ W2 => XTMP3 */                     \
298         eor     XTMP3.16b, XTMP2.16b, w0.16b;
299 #define SCHED_W_3_6(round, w0, w1, w2, w3, w4, w5)
300 #define SCHED_W_3_7(round, w0, w1, w2, w3, w4, w5) \
301         st1     {XTMP2.16b-XTMP3.16b}, [addr0];
302 #define SCHED_W_3_8(round, w0, w1, w2, w3, w4, w5)
303 
304 #define SCHED_W_W0W1W2W3W4W5_1(iop_num, round) \
305         SCHED_W_1_##iop_num(round, W0, W1, W2, W3, W4, W5)
306 #define SCHED_W_W0W1W2W3W4W5_2(iop_num, round) \
307         SCHED_W_2_##iop_num(round, W0, W1, W2, W3, W4, W5)
308 #define SCHED_W_W0W1W2W3W4W5_3(iop_num, round) \
309         SCHED_W_3_##iop_num(round, W0, W1, W2, W3, W4, W5)
310 
311 #define SCHED_W_W1W2W3W4W5W0_1(iop_num, round) \
312         SCHED_W_1_##iop_num(round, W1, W2, W3, W4, W5, W0)
313 #define SCHED_W_W1W2W3W4W5W0_2(iop_num, round) \
314         SCHED_W_2_##iop_num(round, W1, W2, W3, W4, W5, W0)
315 #define SCHED_W_W1W2W3W4W5W0_3(iop_num, round) \
316         SCHED_W_3_##iop_num(round, W1, W2, W3, W4, W5, W0)
317 
318 #define SCHED_W_W2W3W4W5W0W1_1(iop_num, round) \
319         SCHED_W_1_##iop_num(round, W2, W3, W4, W5, W0, W1)
320 #define SCHED_W_W2W3W4W5W0W1_2(iop_num, round) \
321         SCHED_W_2_##iop_num(round, W2, W3, W4, W5, W0, W1)
322 #define SCHED_W_W2W3W4W5W0W1_3(iop_num, round) \
323         SCHED_W_3_##iop_num(round, W2, W3, W4, W5, W0, W1)
324 
325 #define SCHED_W_W3W4W5W0W1W2_1(iop_num, round) \
326         SCHED_W_1_##iop_num(round, W3, W4, W5, W0, W1, W2)
327 #define SCHED_W_W3W4W5W0W1W2_2(iop_num, round) \
328         SCHED_W_2_##iop_num(round, W3, W4, W5, W0, W1, W2)
329 #define SCHED_W_W3W4W5W0W1W2_3(iop_num, round) \
330         SCHED_W_3_##iop_num(round, W3, W4, W5, W0, W1, W2)
331 
332 #define SCHED_W_W4W5W0W1W2W3_1(iop_num, round) \
333         SCHED_W_1_##iop_num(round, W4, W5, W0, W1, W2, W3)
334 #define SCHED_W_W4W5W0W1W2W3_2(iop_num, round) \
335         SCHED_W_2_##iop_num(round, W4, W5, W0, W1, W2, W3)
336 #define SCHED_W_W4W5W0W1W2W3_3(iop_num, round) \
337         SCHED_W_3_##iop_num(round, W4, W5, W0, W1, W2, W3)
338 
339 #define SCHED_W_W5W0W1W2W3W4_1(iop_num, round) \
340         SCHED_W_1_##iop_num(round, W5, W0, W1, W2, W3, W4)
341 #define SCHED_W_W5W0W1W2W3W4_2(iop_num, round) \
342         SCHED_W_2_##iop_num(round, W5, W0, W1, W2, W3, W4)
343 #define SCHED_W_W5W0W1W2W3W4_3(iop_num, round) \
344         SCHED_W_3_##iop_num(round, W5, W0, W1, W2, W3, W4)
345 
346 
347         /*
348          * Transform blocks*64 bytes (blocks*16 32-bit words) at 'src'.
349          *
350          * void sm3_neon_transform(struct sm3_state *sst, u8 const *src,
351          *                         int blocks)
352          */
353         .text
354 .align 3
355 SYM_TYPED_FUNC_START(sm3_neon_transform)
356         ldp             ra, rb, [RSTATE, #0]
357         ldp             rc, rd, [RSTATE, #8]
358         ldp             re, rf, [RSTATE, #16]
359         ldp             rg, rh, [RSTATE, #24]
360 
361         stp             x28, x29, [sp, #-16]!
362         stp             x19, x20, [sp, #-16]!
363         stp             x21, x22, [sp, #-16]!
364         stp             x23, x24, [sp, #-16]!
365         stp             x25, x26, [sp, #-16]!
366         mov             RFRAME, sp
367 
368         sub             addr0, sp, #STACK_SIZE
369         adr_l           RKPTR, .LKtable
370         and             sp, addr0, #(~63)
371 
372         /* Preload first block. */
373         LOAD_W_VEC_1(1, 0)
374         LOAD_W_VEC_1(2, 0)
375         LOAD_W_VEC_1(3, 0)
376         LOAD_W_VEC_1(4, 0)
377         LOAD_W_VEC_1(5, 0)
378         LOAD_W_VEC_1(6, 0)
379         LOAD_W_VEC_1(7, 0)
380         LOAD_W_VEC_1(8, 0)
381         LOAD_W_VEC_2(1, 0)
382         LOAD_W_VEC_2(2, 0)
383         LOAD_W_VEC_2(3, 0)
384         LOAD_W_VEC_2(4, 0)
385         LOAD_W_VEC_2(5, 0)
386         LOAD_W_VEC_2(6, 0)
387         LOAD_W_VEC_2(7, 0)
388         LOAD_W_VEC_2(8, 0)
389         LOAD_W_VEC_3(1, 0)
390         LOAD_W_VEC_3(2, 0)
391         LOAD_W_VEC_3(3, 0)
392         LOAD_W_VEC_3(4, 0)
393         LOAD_W_VEC_3(5, 0)
394         LOAD_W_VEC_3(6, 0)
395         LOAD_W_VEC_3(7, 0)
396         LOAD_W_VEC_3(8, 0)
397 
398 .balign 16
399 .Loop:
400         /* Transform 0-3 */
401         R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 0, 0, IW, _, 0)
402         R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  1, 1, IW, _, 0)
403         R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 2, 2, IW, _, 0)
404         R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  3, 3, IW, _, 0)
405 
406         /* Transform 4-7 + Precalc 12-14 */
407         R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 4, 0, IW, _, 0)
408         R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  5, 1, IW, _, 0)
409         R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 6, 2, IW, SCHED_W_W0W1W2W3W4W5_1, 12)
410         R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  7, 3, IW, SCHED_W_W0W1W2W3W4W5_2, 12)
411 
412         /* Transform 8-11 + Precalc 12-17 */
413         R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 8, 0, IW, SCHED_W_W0W1W2W3W4W5_3, 12)
414         R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  9, 1, IW, SCHED_W_W1W2W3W4W5W0_1, 15)
415         R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 10, 2, IW, SCHED_W_W1W2W3W4W5W0_2, 15)
416         R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  11, 3, IW, SCHED_W_W1W2W3W4W5W0_3, 15)
417 
418         /* Transform 12-14 + Precalc 18-20 */
419         R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 12, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 18)
420         R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  13, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 18)
421         R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 14, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 18)
422 
423         /* Transform 15-17 + Precalc 21-23 */
424         R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  15, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 21)
425         R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 16, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 21)
426         R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  17, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 21)
427 
428         /* Transform 18-20 + Precalc 24-26 */
429         R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 18, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 24)
430         R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  19, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 24)
431         R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 20, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 24)
432 
433         /* Transform 21-23 + Precalc 27-29 */
434         R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  21, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 27)
435         R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 22, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 27)
436         R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  23, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 27)
437 
438         /* Transform 24-26 + Precalc 30-32 */
439         R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 24, 0, XW, SCHED_W_W0W1W2W3W4W5_1, 30)
440         R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  25, 1, XW, SCHED_W_W0W1W2W3W4W5_2, 30)
441         R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 26, 2, XW, SCHED_W_W0W1W2W3W4W5_3, 30)
442 
443         /* Transform 27-29 + Precalc 33-35 */
444         R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  27, 0, XW, SCHED_W_W1W2W3W4W5W0_1, 33)
445         R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 28, 1, XW, SCHED_W_W1W2W3W4W5W0_2, 33)
446         R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  29, 2, XW, SCHED_W_W1W2W3W4W5W0_3, 33)
447 
448         /* Transform 30-32 + Precalc 36-38 */
449         R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 30, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 36)
450         R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  31, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 36)
451         R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 32, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 36)
452 
453         /* Transform 33-35 + Precalc 39-41 */
454         R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  33, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 39)
455         R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 34, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 39)
456         R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  35, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 39)
457 
458         /* Transform 36-38 + Precalc 42-44 */
459         R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 36, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 42)
460         R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  37, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 42)
461         R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 38, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 42)
462 
463         /* Transform 39-41 + Precalc 45-47 */
464         R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  39, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 45)
465         R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 40, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 45)
466         R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  41, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 45)
467 
468         /* Transform 42-44 + Precalc 48-50 */
469         R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 42, 0, XW, SCHED_W_W0W1W2W3W4W5_1, 48)
470         R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  43, 1, XW, SCHED_W_W0W1W2W3W4W5_2, 48)
471         R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 44, 2, XW, SCHED_W_W0W1W2W3W4W5_3, 48)
472 
473         /* Transform 45-47 + Precalc 51-53 */
474         R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  45, 0, XW, SCHED_W_W1W2W3W4W5W0_1, 51)
475         R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 46, 1, XW, SCHED_W_W1W2W3W4W5W0_2, 51)
476         R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  47, 2, XW, SCHED_W_W1W2W3W4W5W0_3, 51)
477 
478         /* Transform 48-50 + Precalc 54-56 */
479         R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 48, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 54)
480         R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  49, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 54)
481         R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 50, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 54)
482 
483         /* Transform 51-53 + Precalc 57-59 */
484         R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  51, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 57)
485         R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 52, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 57)
486         R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  53, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 57)
487 
488         /* Transform 54-56 + Precalc 60-62 */
489         R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 54, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 60)
490         R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  55, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 60)
491         R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 56, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 60)
492 
493         /* Transform 57-59 + Precalc 63 */
494         R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  57, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 63)
495         R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 58, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 63)
496         R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  59, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 63)
497 
498         /* Transform 60 */
499         R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 60, 0, XW, _, _)
500         subs            RNBLKS, RNBLKS, #1
501         b.eq            .Lend
502 
503         /* Transform 61-63 + Preload next block */
504         R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  61, 1, XW, LOAD_W_VEC_1, _)
505         ldp             s0, s1, [RSTATE, #0]
506         R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, LOAD_W_VEC_2, _)
507         ldp             s2, s3, [RSTATE, #8]
508         R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  63, 0, XW, LOAD_W_VEC_3, _)
509 
510         /* Update the chaining variables. */
511         eor             ra, ra, s0
512         eor             rb, rb, s1
513         ldp             s0, s1, [RSTATE, #16]
514         eor             rc, rc, s2
515         ldp             k_even, k_odd, [RSTATE, #24]
516         eor             rd, rd, s3
517         eor             re, re, s0
518         stp             ra, rb, [RSTATE, #0]
519         eor             rf, rf, s1
520         stp             rc, rd, [RSTATE, #8]
521         eor             rg, rg, k_even
522         stp             re, rf, [RSTATE, #16]
523         eor             rh, rh, k_odd
524         stp             rg, rh, [RSTATE, #24]
525         b               .Loop
526 
527 .Lend:
528         /* Transform 61-63 */
529         R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  61, 1, XW, _, _)
530         ldp             s0, s1, [RSTATE, #0]
531         R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, _, _)
532         ldp             s2, s3, [RSTATE, #8]
533         R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  63, 0, XW, _, _)
534 
535         /* Update the chaining variables. */
536         eor             ra, ra, s0
537         clear_vec(W0)
538         eor             rb, rb, s1
539         clear_vec(W1)
540         ldp             s0, s1, [RSTATE, #16]
541         clear_vec(W2)
542         eor             rc, rc, s2
543         clear_vec(W3)
544         ldp             k_even, k_odd, [RSTATE, #24]
545         clear_vec(W4)
546         eor             rd, rd, s3
547         clear_vec(W5)
548         eor             re, re, s0
549         clear_vec(XTMP0)
550         stp             ra, rb, [RSTATE, #0]
551         clear_vec(XTMP1)
552         eor             rf, rf, s1
553         clear_vec(XTMP2)
554         stp             rc, rd, [RSTATE, #8]
555         clear_vec(XTMP3)
556         eor             rg, rg, k_even
557         clear_vec(XTMP4)
558         stp             re, rf, [RSTATE, #16]
559         clear_vec(XTMP5)
560         eor             rh, rh, k_odd
561         clear_vec(XTMP6)
562         stp             rg, rh, [RSTATE, #24]
563 
564         /* Clear message expansion area */
565         add             addr0, sp, #STACK_W
566         st1             {W0.16b-W3.16b}, [addr0], #64
567         st1             {W0.16b-W3.16b}, [addr0], #64
568         st1             {W0.16b-W3.16b}, [addr0]
569 
570         mov             sp, RFRAME
571 
572         ldp             x25, x26, [sp], #16
573         ldp             x23, x24, [sp], #16
574         ldp             x21, x22, [sp], #16
575         ldp             x19, x20, [sp], #16
576         ldp             x28, x29, [sp], #16
577 
578         ret
579 SYM_FUNC_END(sm3_neon_transform)
580 
581 
582         .section        ".rodata", "a"
583 
584         .align 4
585 .LKtable:
586         .long 0x79cc4519, 0xf3988a32, 0xe7311465, 0xce6228cb
587         .long 0x9cc45197, 0x3988a32f, 0x7311465e, 0xe6228cbc
588         .long 0xcc451979, 0x988a32f3, 0x311465e7, 0x6228cbce
589         .long 0xc451979c, 0x88a32f39, 0x11465e73, 0x228cbce6
590         .long 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c
591         .long 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce
592         .long 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec
593         .long 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5
594         .long 0x7a879d8a, 0xf50f3b14, 0xea1e7629, 0xd43cec53
595         .long 0xa879d8a7, 0x50f3b14f, 0xa1e7629e, 0x43cec53d
596         .long 0x879d8a7a, 0x0f3b14f5, 0x1e7629ea, 0x3cec53d4
597         .long 0x79d8a7a8, 0xf3b14f50, 0xe7629ea1, 0xcec53d43
598         .long 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c
599         .long 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce
600         .long 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec
601         .long 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php