~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm/crypto/sha1-armv7-neon.S

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0-or-later */
  2 /* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function
  3  *
  4  * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
  5  */
  6 
  7 #include <linux/linkage.h>
  8 #include <asm/assembler.h>
  9 
 10 .syntax unified
 11 .fpu neon
 12 
 13 .text
 14 
 15 
 16 /* Context structure */
 17 
 18 #define state_h0 0
 19 #define state_h1 4
 20 #define state_h2 8
 21 #define state_h3 12
 22 #define state_h4 16
 23 
 24 
 25 /* Constants */
 26 
 27 #define K1  0x5A827999
 28 #define K2  0x6ED9EBA1
 29 #define K3  0x8F1BBCDC
 30 #define K4  0xCA62C1D6
 31 .align 4
 32 .LK_VEC:
 33 .LK1:   .long K1, K1, K1, K1
 34 .LK2:   .long K2, K2, K2, K2
 35 .LK3:   .long K3, K3, K3, K3
 36 .LK4:   .long K4, K4, K4, K4
 37 
 38 
 39 /* Register macros */
 40 
 41 #define RSTATE r0
 42 #define RDATA r1
 43 #define RNBLKS r2
 44 #define ROLDSTACK r3
 45 #define RWK lr
 46 
 47 #define _a r4
 48 #define _b r5
 49 #define _c r6
 50 #define _d r7
 51 #define _e r8
 52 
 53 #define RT0 r9
 54 #define RT1 r10
 55 #define RT2 r11
 56 #define RT3 r12
 57 
 58 #define W0 q0
 59 #define W1 q7
 60 #define W2 q2
 61 #define W3 q3
 62 #define W4 q4
 63 #define W5 q6
 64 #define W6 q5
 65 #define W7 q1
 66 
 67 #define tmp0 q8
 68 #define tmp1 q9
 69 #define tmp2 q10
 70 #define tmp3 q11
 71 
 72 #define qK1 q12
 73 #define qK2 q13
 74 #define qK3 q14
 75 #define qK4 q15
 76 
 77 #ifdef CONFIG_CPU_BIG_ENDIAN
 78 #define ARM_LE(code...)
 79 #else
 80 #define ARM_LE(code...)         code
 81 #endif
 82 
 83 /* Round function macros. */
 84 
 85 #define WK_offs(i) (((i) & 15) * 4)
 86 
 87 #define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
 88               W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 89         ldr RT3, [sp, WK_offs(i)]; \
 90                 pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
 91         bic RT0, d, b; \
 92         add e, e, a, ror #(32 - 5); \
 93         and RT1, c, b; \
 94                 pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
 95         add RT0, RT0, RT3; \
 96         add e, e, RT1; \
 97         ror b, #(32 - 30); \
 98                 pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
 99         add e, e, RT0;
100 
101 #define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
102               W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
103         ldr RT3, [sp, WK_offs(i)]; \
104                 pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
105         eor RT0, d, b; \
106         add e, e, a, ror #(32 - 5); \
107         eor RT0, RT0, c; \
108                 pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
109         add e, e, RT3; \
110         ror b, #(32 - 30); \
111                 pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
112         add e, e, RT0; \
113 
114 #define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
115               W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
116         ldr RT3, [sp, WK_offs(i)]; \
117                 pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
118         eor RT0, b, c; \
119         and RT1, b, c; \
120         add e, e, a, ror #(32 - 5); \
121                 pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
122         and RT0, RT0, d; \
123         add RT1, RT1, RT3; \
124         add e, e, RT0; \
125         ror b, #(32 - 30); \
126                 pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
127         add e, e, RT1;
128 
129 #define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
130               W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
131         _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
132               W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
133 
134 #define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\
135            W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
136         _R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
137                W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
138 
139 #define R(a,b,c,d,e,f,i) \
140         _R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\
141                W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
142 
143 #define dummy(...)
144 
145 
146 /* Input expansion macros. */
147 
148 /********* Precalc macros for rounds 0-15 *************************************/
149 
150 #define W_PRECALC_00_15() \
151         add       RWK, sp, #(WK_offs(0));                       \
152         \
153         vld1.32   {W0, W7}, [RDATA]!;                           \
154  ARM_LE(vrev32.8  W0, W0;       )       /* big => little */     \
155         vld1.32   {W6, W5}, [RDATA]!;                           \
156         vadd.u32  tmp0, W0, curK;                               \
157  ARM_LE(vrev32.8  W7, W7;       )       /* big => little */     \
158  ARM_LE(vrev32.8  W6, W6;       )       /* big => little */     \
159         vadd.u32  tmp1, W7, curK;                               \
160  ARM_LE(vrev32.8  W5, W5;       )       /* big => little */     \
161         vadd.u32  tmp2, W6, curK;                               \
162         vst1.32   {tmp0, tmp1}, [RWK]!;                         \
163         vadd.u32  tmp3, W5, curK;                               \
164         vst1.32   {tmp2, tmp3}, [RWK];                          \
165 
166 #define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
167         vld1.32   {W0, W7}, [RDATA]!;                           \
168 
169 #define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
170         add       RWK, sp, #(WK_offs(0));                       \
171 
172 #define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
173  ARM_LE(vrev32.8  W0, W0;       )       /* big => little */     \
174 
175 #define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
176         vld1.32   {W6, W5}, [RDATA]!;                           \
177 
178 #define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
179         vadd.u32  tmp0, W0, curK;                               \
180 
181 #define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
182  ARM_LE(vrev32.8  W7, W7;       )       /* big => little */     \
183 
184 #define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
185  ARM_LE(vrev32.8  W6, W6;       )       /* big => little */     \
186 
187 #define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
188         vadd.u32  tmp1, W7, curK;                               \
189 
190 #define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
191  ARM_LE(vrev32.8  W5, W5;       )       /* big => little */     \
192 
193 #define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
194         vadd.u32  tmp2, W6, curK;                               \
195 
196 #define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
197         vst1.32   {tmp0, tmp1}, [RWK]!;                         \
198 
199 #define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
200         vadd.u32  tmp3, W5, curK;                               \
201 
202 #define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
203         vst1.32   {tmp2, tmp3}, [RWK];                          \
204 
205 
206 /********* Precalc macros for rounds 16-31 ************************************/
207 
208 #define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
209         veor      tmp0, tmp0;                   \
210         vext.8    W, W_m16, W_m12, #8;          \
211 
212 #define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
213         add       RWK, sp, #(WK_offs(i));       \
214         vext.8    tmp0, W_m04, tmp0, #4;        \
215 
216 #define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
217         veor      tmp0, tmp0, W_m16;            \
218         veor.32   W, W, W_m08;                  \
219 
220 #define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
221         veor      tmp1, tmp1;                   \
222         veor      W, W, tmp0;                   \
223 
224 #define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
225         vshl.u32  tmp0, W, #1;                  \
226 
227 #define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
228         vext.8    tmp1, tmp1, W, #(16-12);      \
229         vshr.u32  W, W, #31;                    \
230 
231 #define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
232         vorr      tmp0, tmp0, W;                \
233         vshr.u32  W, tmp1, #30;                 \
234 
235 #define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
236         vshl.u32  tmp1, tmp1, #2;               \
237 
238 #define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
239         veor      tmp0, tmp0, W;                \
240 
241 #define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
242         veor      W, tmp0, tmp1;                \
243 
244 #define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
245         vadd.u32  tmp0, W, curK;                \
246 
247 #define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
248         vst1.32   {tmp0}, [RWK];
249 
250 
251 /********* Precalc macros for rounds 32-79 ************************************/
252 
253 #define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
254         veor W, W_m28; \
255 
256 #define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
257         vext.8 tmp0, W_m08, W_m04, #8; \
258 
259 #define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
260         veor W, W_m16; \
261 
262 #define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
263         veor W, tmp0; \
264 
265 #define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
266         add RWK, sp, #(WK_offs(i&~3)); \
267 
268 #define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
269         vshl.u32 tmp1, W, #2; \
270 
271 #define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
272         vshr.u32 tmp0, W, #30; \
273 
274 #define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
275         vorr W, tmp0, tmp1; \
276 
277 #define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
278         vadd.u32 tmp0, W, curK; \
279 
280 #define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
281         vst1.32 {tmp0}, [RWK];
282 
283 
284 /*
285  * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
286  *
287  * unsigned int
288  * sha1_transform_neon (void *ctx, const unsigned char *data,
289  *                      unsigned int nblks)
290  */
291 .align 3
292 ENTRY(sha1_transform_neon)
293   /* input:
294    *    r0: ctx, CTX
295    *    r1: data (64*nblks bytes)
296    *    r2: nblks
297    */
298 
299   cmp RNBLKS, #0;
300   beq .Ldo_nothing;
301 
302   push {r4-r12, lr};
303   /*vpush {q4-q7};*/
304 
305   adr RT3, .LK_VEC;
306 
307   mov ROLDSTACK, sp;
308 
309   /* Align stack. */
310   sub RT0, sp, #(16*4);
311   and RT0, #(~(16-1));
312   mov sp, RT0;
313 
314   vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */
315 
316   /* Get the values of the chaining variables. */
317   ldm RSTATE, {_a-_e};
318 
319   vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */
320 
321 #undef curK
322 #define curK qK1
323   /* Precalc 0-15. */
324   W_PRECALC_00_15();
325 
326 .Loop:
327   /* Transform 0-15 + Precalc 16-31. */
328   _R( _a, _b, _c, _d, _e, F1,  0,
329       WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16,
330       W4, W5, W6, W7, W0, _, _, _ );
331   _R( _e, _a, _b, _c, _d, F1,  1,
332       WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16,
333       W4, W5, W6, W7, W0, _, _, _ );
334   _R( _d, _e, _a, _b, _c, F1,  2,
335       WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16,
336       W4, W5, W6, W7, W0, _, _, _ );
337   _R( _c, _d, _e, _a, _b, F1,  3,
338       WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16,
339       W4, W5, W6, W7, W0, _, _, _ );
340 
341 #undef curK
342 #define curK qK2
343   _R( _b, _c, _d, _e, _a, F1,  4,
344       WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20,
345       W3, W4, W5, W6, W7, _, _, _ );
346   _R( _a, _b, _c, _d, _e, F1,  5,
347       WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20,
348       W3, W4, W5, W6, W7, _, _, _ );
349   _R( _e, _a, _b, _c, _d, F1,  6,
350       WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20,
351       W3, W4, W5, W6, W7, _, _, _ );
352   _R( _d, _e, _a, _b, _c, F1,  7,
353       WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20,
354       W3, W4, W5, W6, W7, _, _, _ );
355 
356   _R( _c, _d, _e, _a, _b, F1,  8,
357       WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24,
358       W2, W3, W4, W5, W6, _, _, _ );
359   _R( _b, _c, _d, _e, _a, F1,  9,
360       WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24,
361       W2, W3, W4, W5, W6, _, _, _ );
362   _R( _a, _b, _c, _d, _e, F1, 10,
363       WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24,
364       W2, W3, W4, W5, W6, _, _, _ );
365   _R( _e, _a, _b, _c, _d, F1, 11,
366       WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24,
367       W2, W3, W4, W5, W6, _, _, _ );
368 
369   _R( _d, _e, _a, _b, _c, F1, 12,
370       WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28,
371       W1, W2, W3, W4, W5, _, _, _ );
372   _R( _c, _d, _e, _a, _b, F1, 13,
373       WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28,
374       W1, W2, W3, W4, W5, _, _, _ );
375   _R( _b, _c, _d, _e, _a, F1, 14,
376       WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28,
377       W1, W2, W3, W4, W5, _, _, _ );
378   _R( _a, _b, _c, _d, _e, F1, 15,
379       WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28,
380       W1, W2, W3, W4, W5, _, _, _ );
381 
382   /* Transform 16-63 + Precalc 32-79. */
383   _R( _e, _a, _b, _c, _d, F1, 16,
384       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32,
385       W0, W1, W2, W3, W4, W5, W6, W7);
386   _R( _d, _e, _a, _b, _c, F1, 17,
387       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32,
388       W0, W1, W2, W3, W4, W5, W6, W7);
389   _R( _c, _d, _e, _a, _b, F1, 18,
390       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 32,
391       W0, W1, W2, W3, W4, W5, W6, W7);
392   _R( _b, _c, _d, _e, _a, F1, 19,
393       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 32,
394       W0, W1, W2, W3, W4, W5, W6, W7);
395 
396   _R( _a, _b, _c, _d, _e, F2, 20,
397       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36,
398       W7, W0, W1, W2, W3, W4, W5, W6);
399   _R( _e, _a, _b, _c, _d, F2, 21,
400       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36,
401       W7, W0, W1, W2, W3, W4, W5, W6);
402   _R( _d, _e, _a, _b, _c, F2, 22,
403       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 36,
404       W7, W0, W1, W2, W3, W4, W5, W6);
405   _R( _c, _d, _e, _a, _b, F2, 23,
406       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 36,
407       W7, W0, W1, W2, W3, W4, W5, W6);
408 
409 #undef curK
410 #define curK qK3
411   _R( _b, _c, _d, _e, _a, F2, 24,
412       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40,
413       W6, W7, W0, W1, W2, W3, W4, W5);
414   _R( _a, _b, _c, _d, _e, F2, 25,
415       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40,
416       W6, W7, W0, W1, W2, W3, W4, W5);
417   _R( _e, _a, _b, _c, _d, F2, 26,
418       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 40,
419       W6, W7, W0, W1, W2, W3, W4, W5);
420   _R( _d, _e, _a, _b, _c, F2, 27,
421       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 40,
422       W6, W7, W0, W1, W2, W3, W4, W5);
423 
424   _R( _c, _d, _e, _a, _b, F2, 28,
425       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44,
426       W5, W6, W7, W0, W1, W2, W3, W4);
427   _R( _b, _c, _d, _e, _a, F2, 29,
428       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44,
429       W5, W6, W7, W0, W1, W2, W3, W4);
430   _R( _a, _b, _c, _d, _e, F2, 30,
431       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 44,
432       W5, W6, W7, W0, W1, W2, W3, W4);
433   _R( _e, _a, _b, _c, _d, F2, 31,
434       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 44,
435       W5, W6, W7, W0, W1, W2, W3, W4);
436 
437   _R( _d, _e, _a, _b, _c, F2, 32,
438       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48,
439       W4, W5, W6, W7, W0, W1, W2, W3);
440   _R( _c, _d, _e, _a, _b, F2, 33,
441       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48,
442       W4, W5, W6, W7, W0, W1, W2, W3);
443   _R( _b, _c, _d, _e, _a, F2, 34,
444       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 48,
445       W4, W5, W6, W7, W0, W1, W2, W3);
446   _R( _a, _b, _c, _d, _e, F2, 35,
447       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 48,
448       W4, W5, W6, W7, W0, W1, W2, W3);
449 
450   _R( _e, _a, _b, _c, _d, F2, 36,
451       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52,
452       W3, W4, W5, W6, W7, W0, W1, W2);
453   _R( _d, _e, _a, _b, _c, F2, 37,
454       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52,
455       W3, W4, W5, W6, W7, W0, W1, W2);
456   _R( _c, _d, _e, _a, _b, F2, 38,
457       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 52,
458       W3, W4, W5, W6, W7, W0, W1, W2);
459   _R( _b, _c, _d, _e, _a, F2, 39,
460       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 52,
461       W3, W4, W5, W6, W7, W0, W1, W2);
462 
463   _R( _a, _b, _c, _d, _e, F3, 40,
464       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56,
465       W2, W3, W4, W5, W6, W7, W0, W1);
466   _R( _e, _a, _b, _c, _d, F3, 41,
467       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56,
468       W2, W3, W4, W5, W6, W7, W0, W1);
469   _R( _d, _e, _a, _b, _c, F3, 42,
470       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 56,
471       W2, W3, W4, W5, W6, W7, W0, W1);
472   _R( _c, _d, _e, _a, _b, F3, 43,
473       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 56,
474       W2, W3, W4, W5, W6, W7, W0, W1);
475 
476 #undef curK
477 #define curK qK4
478   _R( _b, _c, _d, _e, _a, F3, 44,
479       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60,
480       W1, W2, W3, W4, W5, W6, W7, W0);
481   _R( _a, _b, _c, _d, _e, F3, 45,
482       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60,
483       W1, W2, W3, W4, W5, W6, W7, W0);
484   _R( _e, _a, _b, _c, _d, F3, 46,
485       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 60,
486       W1, W2, W3, W4, W5, W6, W7, W0);
487   _R( _d, _e, _a, _b, _c, F3, 47,
488       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 60,
489       W1, W2, W3, W4, W5, W6, W7, W0);
490 
491   _R( _c, _d, _e, _a, _b, F3, 48,
492       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64,
493       W0, W1, W2, W3, W4, W5, W6, W7);
494   _R( _b, _c, _d, _e, _a, F3, 49,
495       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64,
496       W0, W1, W2, W3, W4, W5, W6, W7);
497   _R( _a, _b, _c, _d, _e, F3, 50,
498       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 64,
499       W0, W1, W2, W3, W4, W5, W6, W7);
500   _R( _e, _a, _b, _c, _d, F3, 51,
501       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 64,
502       W0, W1, W2, W3, W4, W5, W6, W7);
503 
504   _R( _d, _e, _a, _b, _c, F3, 52,
505       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68,
506       W7, W0, W1, W2, W3, W4, W5, W6);
507   _R( _c, _d, _e, _a, _b, F3, 53,
508       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68,
509       W7, W0, W1, W2, W3, W4, W5, W6);
510   _R( _b, _c, _d, _e, _a, F3, 54,
511       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 68,
512       W7, W0, W1, W2, W3, W4, W5, W6);
513   _R( _a, _b, _c, _d, _e, F3, 55,
514       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 68,
515       W7, W0, W1, W2, W3, W4, W5, W6);
516 
517   _R( _e, _a, _b, _c, _d, F3, 56,
518       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72,
519       W6, W7, W0, W1, W2, W3, W4, W5);
520   _R( _d, _e, _a, _b, _c, F3, 57,
521       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72,
522       W6, W7, W0, W1, W2, W3, W4, W5);
523   _R( _c, _d, _e, _a, _b, F3, 58,
524       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 72,
525       W6, W7, W0, W1, W2, W3, W4, W5);
526   _R( _b, _c, _d, _e, _a, F3, 59,
527       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 72,
528       W6, W7, W0, W1, W2, W3, W4, W5);
529 
530   subs RNBLKS, #1;
531 
532   _R( _a, _b, _c, _d, _e, F4, 60,
533       WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76,
534       W5, W6, W7, W0, W1, W2, W3, W4);
535   _R( _e, _a, _b, _c, _d, F4, 61,
536       WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76,
537       W5, W6, W7, W0, W1, W2, W3, W4);
538   _R( _d, _e, _a, _b, _c, F4, 62,
539       WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 76,
540       W5, W6, W7, W0, W1, W2, W3, W4);
541   _R( _c, _d, _e, _a, _b, F4, 63,
542       WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 76,
543       W5, W6, W7, W0, W1, W2, W3, W4);
544 
545   beq .Lend;
546 
547   /* Transform 64-79 + Precalc 0-15 of next block. */
548 #undef curK
549 #define curK qK1
550   _R( _b, _c, _d, _e, _a, F4, 64,
551       WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ );
552   _R( _a, _b, _c, _d, _e, F4, 65,
553       WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ );
554   _R( _e, _a, _b, _c, _d, F4, 66,
555       WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ );
556   _R( _d, _e, _a, _b, _c, F4, 67,
557       WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ );
558 
559   _R( _c, _d, _e, _a, _b, F4, 68,
560       dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
561   _R( _b, _c, _d, _e, _a, F4, 69,
562       dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
563   _R( _a, _b, _c, _d, _e, F4, 70,
564       WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ );
565   _R( _e, _a, _b, _c, _d, F4, 71,
566       WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ );
567 
568   _R( _d, _e, _a, _b, _c, F4, 72,
569       dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
570   _R( _c, _d, _e, _a, _b, F4, 73,
571       dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
572   _R( _b, _c, _d, _e, _a, F4, 74,
573       WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ );
574   _R( _a, _b, _c, _d, _e, F4, 75,
575       WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ );
576 
577   _R( _e, _a, _b, _c, _d, F4, 76,
578       WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ );
579   _R( _d, _e, _a, _b, _c, F4, 77,
580       WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ );
581   _R( _c, _d, _e, _a, _b, F4, 78,
582       WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ );
583   _R( _b, _c, _d, _e, _a, F4, 79,
584       WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ );
585 
586   /* Update the chaining variables. */
587   ldm RSTATE, {RT0-RT3};
588   add _a, RT0;
589   ldr RT0, [RSTATE, #state_h4];
590   add _b, RT1;
591   add _c, RT2;
592   add _d, RT3;
593   add _e, RT0;
594   stm RSTATE, {_a-_e};
595 
596   b .Loop;
597 
598 .Lend:
599   /* Transform 64-79 */
600   R( _b, _c, _d, _e, _a, F4, 64 );
601   R( _a, _b, _c, _d, _e, F4, 65 );
602   R( _e, _a, _b, _c, _d, F4, 66 );
603   R( _d, _e, _a, _b, _c, F4, 67 );
604   R( _c, _d, _e, _a, _b, F4, 68 );
605   R( _b, _c, _d, _e, _a, F4, 69 );
606   R( _a, _b, _c, _d, _e, F4, 70 );
607   R( _e, _a, _b, _c, _d, F4, 71 );
608   R( _d, _e, _a, _b, _c, F4, 72 );
609   R( _c, _d, _e, _a, _b, F4, 73 );
610   R( _b, _c, _d, _e, _a, F4, 74 );
611   R( _a, _b, _c, _d, _e, F4, 75 );
612   R( _e, _a, _b, _c, _d, F4, 76 );
613   R( _d, _e, _a, _b, _c, F4, 77 );
614   R( _c, _d, _e, _a, _b, F4, 78 );
615   R( _b, _c, _d, _e, _a, F4, 79 );
616 
617   mov sp, ROLDSTACK;
618 
619   /* Update the chaining variables. */
620   ldm RSTATE, {RT0-RT3};
621   add _a, RT0;
622   ldr RT0, [RSTATE, #state_h4];
623   add _b, RT1;
624   add _c, RT2;
625   add _d, RT3;
626   /*vpop {q4-q7};*/
627   add _e, RT0;
628   stm RSTATE, {_a-_e};
629 
630   pop {r4-r12, pc};
631 
632 .Ldo_nothing:
633   bx lr
634 ENDPROC(sha1_transform_neon)

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php