1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function 3 * 4 * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> 5 */ 6 7 #include <linux/linkage.h> 8 #include <asm/assembler.h> 9 10 .syntax unified 11 .fpu neon 12 13 .text 14 15 16 /* Context structure */ 17 18 #define state_h0 0 19 #define state_h1 4 20 #define state_h2 8 21 #define state_h3 12 22 #define state_h4 16 23 24 25 /* Constants */ 26 27 #define K1 0x5A827999 28 #define K2 0x6ED9EBA1 29 #define K3 0x8F1BBCDC 30 #define K4 0xCA62C1D6 31 .align 4 32 .LK_VEC: 33 .LK1: .long K1, K1, K1, K1 34 .LK2: .long K2, K2, K2, K2 35 .LK3: .long K3, K3, K3, K3 36 .LK4: .long K4, K4, K4, K4 37 38 39 /* Register macros */ 40 41 #define RSTATE r0 42 #define RDATA r1 43 #define RNBLKS r2 44 #define ROLDSTACK r3 45 #define RWK lr 46 47 #define _a r4 48 #define _b r5 49 #define _c r6 50 #define _d r7 51 #define _e r8 52 53 #define RT0 r9 54 #define RT1 r10 55 #define RT2 r11 56 #define RT3 r12 57 58 #define W0 q0 59 #define W1 q7 60 #define W2 q2 61 #define W3 q3 62 #define W4 q4 63 #define W5 q6 64 #define W6 q5 65 #define W7 q1 66 67 #define tmp0 q8 68 #define tmp1 q9 69 #define tmp2 q10 70 #define tmp3 q11 71 72 #define qK1 q12 73 #define qK2 q13 74 #define qK3 q14 75 #define qK4 q15 76 77 #ifdef CONFIG_CPU_BIG_ENDIAN 78 #define ARM_LE(code...) 79 #else 80 #define ARM_LE(code...) code 81 #endif 82 83 /* Round function macros. */ 84 85 #define WK_offs(i) (((i) & 15) * 4) 86 87 #define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ 88 W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 89 ldr RT3, [sp, WK_offs(i)]; \ 90 pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ 91 bic RT0, d, b; \ 92 add e, e, a, ror #(32 - 5); \ 93 and RT1, c, b; \ 94 pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ 95 add RT0, RT0, RT3; \ 96 add e, e, RT1; \ 97 ror b, #(32 - 30); \ 98 pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ 99 add e, e, RT0; 100 101 #define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ 102 W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 103 ldr RT3, [sp, WK_offs(i)]; \ 104 pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ 105 eor RT0, d, b; \ 106 add e, e, a, ror #(32 - 5); \ 107 eor RT0, RT0, c; \ 108 pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ 109 add e, e, RT3; \ 110 ror b, #(32 - 30); \ 111 pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ 112 add e, e, RT0; \ 113 114 #define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ 115 W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 116 ldr RT3, [sp, WK_offs(i)]; \ 117 pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ 118 eor RT0, b, c; \ 119 and RT1, b, c; \ 120 add e, e, a, ror #(32 - 5); \ 121 pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ 122 and RT0, RT0, d; \ 123 add RT1, RT1, RT3; \ 124 add e, e, RT0; \ 125 ror b, #(32 - 30); \ 126 pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ 127 add e, e, RT1; 128 129 #define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ 130 W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 131 _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ 132 W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) 133 134 #define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\ 135 W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 136 _R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ 137 W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) 138 139 #define R(a,b,c,d,e,f,i) \ 140 _R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\ 141 W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) 142 143 #define dummy(...) 144 145 146 /* Input expansion macros. */ 147 148 /********* Precalc macros for rounds 0-15 *************************************/ 149 150 #define W_PRECALC_00_15() \ 151 add RWK, sp, #(WK_offs(0)); \ 152 \ 153 vld1.32 {W0, W7}, [RDATA]!; \ 154 ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ 155 vld1.32 {W6, W5}, [RDATA]!; \ 156 vadd.u32 tmp0, W0, curK; \ 157 ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ 158 ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ 159 vadd.u32 tmp1, W7, curK; \ 160 ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ 161 vadd.u32 tmp2, W6, curK; \ 162 vst1.32 {tmp0, tmp1}, [RWK]!; \ 163 vadd.u32 tmp3, W5, curK; \ 164 vst1.32 {tmp2, tmp3}, [RWK]; \ 165 166 #define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 167 vld1.32 {W0, W7}, [RDATA]!; \ 168 169 #define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 170 add RWK, sp, #(WK_offs(0)); \ 171 172 #define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 173 ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ 174 175 #define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 176 vld1.32 {W6, W5}, [RDATA]!; \ 177 178 #define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 179 vadd.u32 tmp0, W0, curK; \ 180 181 #define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 182 ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ 183 184 #define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 185 ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ 186 187 #define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 188 vadd.u32 tmp1, W7, curK; \ 189 190 #define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 191 ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ 192 193 #define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 194 vadd.u32 tmp2, W6, curK; \ 195 196 #define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 197 vst1.32 {tmp0, tmp1}, [RWK]!; \ 198 199 #define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 200 vadd.u32 tmp3, W5, curK; \ 201 202 #define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 203 vst1.32 {tmp2, tmp3}, [RWK]; \ 204 205 206 /********* Precalc macros for rounds 16-31 ************************************/ 207 208 #define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 209 veor tmp0, tmp0; \ 210 vext.8 W, W_m16, W_m12, #8; \ 211 212 #define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 213 add RWK, sp, #(WK_offs(i)); \ 214 vext.8 tmp0, W_m04, tmp0, #4; \ 215 216 #define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 217 veor tmp0, tmp0, W_m16; \ 218 veor.32 W, W, W_m08; \ 219 220 #define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 221 veor tmp1, tmp1; \ 222 veor W, W, tmp0; \ 223 224 #define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 225 vshl.u32 tmp0, W, #1; \ 226 227 #define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 228 vext.8 tmp1, tmp1, W, #(16-12); \ 229 vshr.u32 W, W, #31; \ 230 231 #define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 232 vorr tmp0, tmp0, W; \ 233 vshr.u32 W, tmp1, #30; \ 234 235 #define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 236 vshl.u32 tmp1, tmp1, #2; \ 237 238 #define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 239 veor tmp0, tmp0, W; \ 240 241 #define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 242 veor W, tmp0, tmp1; \ 243 244 #define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 245 vadd.u32 tmp0, W, curK; \ 246 247 #define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 248 vst1.32 {tmp0}, [RWK]; 249 250 251 /********* Precalc macros for rounds 32-79 ************************************/ 252 253 #define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 254 veor W, W_m28; \ 255 256 #define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 257 vext.8 tmp0, W_m08, W_m04, #8; \ 258 259 #define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 260 veor W, W_m16; \ 261 262 #define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 263 veor W, tmp0; \ 264 265 #define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 266 add RWK, sp, #(WK_offs(i&~3)); \ 267 268 #define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 269 vshl.u32 tmp1, W, #2; \ 270 271 #define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 272 vshr.u32 tmp0, W, #30; \ 273 274 #define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 275 vorr W, tmp0, tmp1; \ 276 277 #define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 278 vadd.u32 tmp0, W, curK; \ 279 280 #define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 281 vst1.32 {tmp0}, [RWK]; 282 283 284 /* 285 * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA. 286 * 287 * unsigned int 288 * sha1_transform_neon (void *ctx, const unsigned char *data, 289 * unsigned int nblks) 290 */ 291 .align 3 292 ENTRY(sha1_transform_neon) 293 /* input: 294 * r0: ctx, CTX 295 * r1: data (64*nblks bytes) 296 * r2: nblks 297 */ 298 299 cmp RNBLKS, #0; 300 beq .Ldo_nothing; 301 302 push {r4-r12, lr}; 303 /*vpush {q4-q7};*/ 304 305 adr RT3, .LK_VEC; 306 307 mov ROLDSTACK, sp; 308 309 /* Align stack. */ 310 sub RT0, sp, #(16*4); 311 and RT0, #(~(16-1)); 312 mov sp, RT0; 313 314 vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */ 315 316 /* Get the values of the chaining variables. */ 317 ldm RSTATE, {_a-_e}; 318 319 vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */ 320 321 #undef curK 322 #define curK qK1 323 /* Precalc 0-15. */ 324 W_PRECALC_00_15(); 325 326 .Loop: 327 /* Transform 0-15 + Precalc 16-31. */ 328 _R( _a, _b, _c, _d, _e, F1, 0, 329 WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16, 330 W4, W5, W6, W7, W0, _, _, _ ); 331 _R( _e, _a, _b, _c, _d, F1, 1, 332 WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16, 333 W4, W5, W6, W7, W0, _, _, _ ); 334 _R( _d, _e, _a, _b, _c, F1, 2, 335 WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16, 336 W4, W5, W6, W7, W0, _, _, _ ); 337 _R( _c, _d, _e, _a, _b, F1, 3, 338 WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16, 339 W4, W5, W6, W7, W0, _, _, _ ); 340 341 #undef curK 342 #define curK qK2 343 _R( _b, _c, _d, _e, _a, F1, 4, 344 WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20, 345 W3, W4, W5, W6, W7, _, _, _ ); 346 _R( _a, _b, _c, _d, _e, F1, 5, 347 WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20, 348 W3, W4, W5, W6, W7, _, _, _ ); 349 _R( _e, _a, _b, _c, _d, F1, 6, 350 WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20, 351 W3, W4, W5, W6, W7, _, _, _ ); 352 _R( _d, _e, _a, _b, _c, F1, 7, 353 WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20, 354 W3, W4, W5, W6, W7, _, _, _ ); 355 356 _R( _c, _d, _e, _a, _b, F1, 8, 357 WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24, 358 W2, W3, W4, W5, W6, _, _, _ ); 359 _R( _b, _c, _d, _e, _a, F1, 9, 360 WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24, 361 W2, W3, W4, W5, W6, _, _, _ ); 362 _R( _a, _b, _c, _d, _e, F1, 10, 363 WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24, 364 W2, W3, W4, W5, W6, _, _, _ ); 365 _R( _e, _a, _b, _c, _d, F1, 11, 366 WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24, 367 W2, W3, W4, W5, W6, _, _, _ ); 368 369 _R( _d, _e, _a, _b, _c, F1, 12, 370 WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28, 371 W1, W2, W3, W4, W5, _, _, _ ); 372 _R( _c, _d, _e, _a, _b, F1, 13, 373 WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28, 374 W1, W2, W3, W4, W5, _, _, _ ); 375 _R( _b, _c, _d, _e, _a, F1, 14, 376 WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28, 377 W1, W2, W3, W4, W5, _, _, _ ); 378 _R( _a, _b, _c, _d, _e, F1, 15, 379 WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28, 380 W1, W2, W3, W4, W5, _, _, _ ); 381 382 /* Transform 16-63 + Precalc 32-79. */ 383 _R( _e, _a, _b, _c, _d, F1, 16, 384 WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32, 385 W0, W1, W2, W3, W4, W5, W6, W7); 386 _R( _d, _e, _a, _b, _c, F1, 17, 387 WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32, 388 W0, W1, W2, W3, W4, W5, W6, W7); 389 _R( _c, _d, _e, _a, _b, F1, 18, 390 WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 32, 391 W0, W1, W2, W3, W4, W5, W6, W7); 392 _R( _b, _c, _d, _e, _a, F1, 19, 393 WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 32, 394 W0, W1, W2, W3, W4, W5, W6, W7); 395 396 _R( _a, _b, _c, _d, _e, F2, 20, 397 WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36, 398 W7, W0, W1, W2, W3, W4, W5, W6); 399 _R( _e, _a, _b, _c, _d, F2, 21, 400 WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36, 401 W7, W0, W1, W2, W3, W4, W5, W6); 402 _R( _d, _e, _a, _b, _c, F2, 22, 403 WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 36, 404 W7, W0, W1, W2, W3, W4, W5, W6); 405 _R( _c, _d, _e, _a, _b, F2, 23, 406 WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 36, 407 W7, W0, W1, W2, W3, W4, W5, W6); 408 409 #undef curK 410 #define curK qK3 411 _R( _b, _c, _d, _e, _a, F2, 24, 412 WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40, 413 W6, W7, W0, W1, W2, W3, W4, W5); 414 _R( _a, _b, _c, _d, _e, F2, 25, 415 WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40, 416 W6, W7, W0, W1, W2, W3, W4, W5); 417 _R( _e, _a, _b, _c, _d, F2, 26, 418 WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 40, 419 W6, W7, W0, W1, W2, W3, W4, W5); 420 _R( _d, _e, _a, _b, _c, F2, 27, 421 WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 40, 422 W6, W7, W0, W1, W2, W3, W4, W5); 423 424 _R( _c, _d, _e, _a, _b, F2, 28, 425 WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44, 426 W5, W6, W7, W0, W1, W2, W3, W4); 427 _R( _b, _c, _d, _e, _a, F2, 29, 428 WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44, 429 W5, W6, W7, W0, W1, W2, W3, W4); 430 _R( _a, _b, _c, _d, _e, F2, 30, 431 WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 44, 432 W5, W6, W7, W0, W1, W2, W3, W4); 433 _R( _e, _a, _b, _c, _d, F2, 31, 434 WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 44, 435 W5, W6, W7, W0, W1, W2, W3, W4); 436 437 _R( _d, _e, _a, _b, _c, F2, 32, 438 WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48, 439 W4, W5, W6, W7, W0, W1, W2, W3); 440 _R( _c, _d, _e, _a, _b, F2, 33, 441 WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48, 442 W4, W5, W6, W7, W0, W1, W2, W3); 443 _R( _b, _c, _d, _e, _a, F2, 34, 444 WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 48, 445 W4, W5, W6, W7, W0, W1, W2, W3); 446 _R( _a, _b, _c, _d, _e, F2, 35, 447 WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 48, 448 W4, W5, W6, W7, W0, W1, W2, W3); 449 450 _R( _e, _a, _b, _c, _d, F2, 36, 451 WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52, 452 W3, W4, W5, W6, W7, W0, W1, W2); 453 _R( _d, _e, _a, _b, _c, F2, 37, 454 WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52, 455 W3, W4, W5, W6, W7, W0, W1, W2); 456 _R( _c, _d, _e, _a, _b, F2, 38, 457 WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 52, 458 W3, W4, W5, W6, W7, W0, W1, W2); 459 _R( _b, _c, _d, _e, _a, F2, 39, 460 WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 52, 461 W3, W4, W5, W6, W7, W0, W1, W2); 462 463 _R( _a, _b, _c, _d, _e, F3, 40, 464 WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56, 465 W2, W3, W4, W5, W6, W7, W0, W1); 466 _R( _e, _a, _b, _c, _d, F3, 41, 467 WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56, 468 W2, W3, W4, W5, W6, W7, W0, W1); 469 _R( _d, _e, _a, _b, _c, F3, 42, 470 WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 56, 471 W2, W3, W4, W5, W6, W7, W0, W1); 472 _R( _c, _d, _e, _a, _b, F3, 43, 473 WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 56, 474 W2, W3, W4, W5, W6, W7, W0, W1); 475 476 #undef curK 477 #define curK qK4 478 _R( _b, _c, _d, _e, _a, F3, 44, 479 WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60, 480 W1, W2, W3, W4, W5, W6, W7, W0); 481 _R( _a, _b, _c, _d, _e, F3, 45, 482 WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60, 483 W1, W2, W3, W4, W5, W6, W7, W0); 484 _R( _e, _a, _b, _c, _d, F3, 46, 485 WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 60, 486 W1, W2, W3, W4, W5, W6, W7, W0); 487 _R( _d, _e, _a, _b, _c, F3, 47, 488 WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 60, 489 W1, W2, W3, W4, W5, W6, W7, W0); 490 491 _R( _c, _d, _e, _a, _b, F3, 48, 492 WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64, 493 W0, W1, W2, W3, W4, W5, W6, W7); 494 _R( _b, _c, _d, _e, _a, F3, 49, 495 WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64, 496 W0, W1, W2, W3, W4, W5, W6, W7); 497 _R( _a, _b, _c, _d, _e, F3, 50, 498 WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 64, 499 W0, W1, W2, W3, W4, W5, W6, W7); 500 _R( _e, _a, _b, _c, _d, F3, 51, 501 WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 64, 502 W0, W1, W2, W3, W4, W5, W6, W7); 503 504 _R( _d, _e, _a, _b, _c, F3, 52, 505 WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68, 506 W7, W0, W1, W2, W3, W4, W5, W6); 507 _R( _c, _d, _e, _a, _b, F3, 53, 508 WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68, 509 W7, W0, W1, W2, W3, W4, W5, W6); 510 _R( _b, _c, _d, _e, _a, F3, 54, 511 WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 68, 512 W7, W0, W1, W2, W3, W4, W5, W6); 513 _R( _a, _b, _c, _d, _e, F3, 55, 514 WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 68, 515 W7, W0, W1, W2, W3, W4, W5, W6); 516 517 _R( _e, _a, _b, _c, _d, F3, 56, 518 WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72, 519 W6, W7, W0, W1, W2, W3, W4, W5); 520 _R( _d, _e, _a, _b, _c, F3, 57, 521 WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72, 522 W6, W7, W0, W1, W2, W3, W4, W5); 523 _R( _c, _d, _e, _a, _b, F3, 58, 524 WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 72, 525 W6, W7, W0, W1, W2, W3, W4, W5); 526 _R( _b, _c, _d, _e, _a, F3, 59, 527 WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 72, 528 W6, W7, W0, W1, W2, W3, W4, W5); 529 530 subs RNBLKS, #1; 531 532 _R( _a, _b, _c, _d, _e, F4, 60, 533 WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76, 534 W5, W6, W7, W0, W1, W2, W3, W4); 535 _R( _e, _a, _b, _c, _d, F4, 61, 536 WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76, 537 W5, W6, W7, W0, W1, W2, W3, W4); 538 _R( _d, _e, _a, _b, _c, F4, 62, 539 WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 76, 540 W5, W6, W7, W0, W1, W2, W3, W4); 541 _R( _c, _d, _e, _a, _b, F4, 63, 542 WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 76, 543 W5, W6, W7, W0, W1, W2, W3, W4); 544 545 beq .Lend; 546 547 /* Transform 64-79 + Precalc 0-15 of next block. */ 548 #undef curK 549 #define curK qK1 550 _R( _b, _c, _d, _e, _a, F4, 64, 551 WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 552 _R( _a, _b, _c, _d, _e, F4, 65, 553 WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 554 _R( _e, _a, _b, _c, _d, F4, 66, 555 WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 556 _R( _d, _e, _a, _b, _c, F4, 67, 557 WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 558 559 _R( _c, _d, _e, _a, _b, F4, 68, 560 dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 561 _R( _b, _c, _d, _e, _a, F4, 69, 562 dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 563 _R( _a, _b, _c, _d, _e, F4, 70, 564 WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 565 _R( _e, _a, _b, _c, _d, F4, 71, 566 WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 567 568 _R( _d, _e, _a, _b, _c, F4, 72, 569 dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 570 _R( _c, _d, _e, _a, _b, F4, 73, 571 dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 572 _R( _b, _c, _d, _e, _a, F4, 74, 573 WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 574 _R( _a, _b, _c, _d, _e, F4, 75, 575 WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 576 577 _R( _e, _a, _b, _c, _d, F4, 76, 578 WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 579 _R( _d, _e, _a, _b, _c, F4, 77, 580 WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 581 _R( _c, _d, _e, _a, _b, F4, 78, 582 WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 583 _R( _b, _c, _d, _e, _a, F4, 79, 584 WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ ); 585 586 /* Update the chaining variables. */ 587 ldm RSTATE, {RT0-RT3}; 588 add _a, RT0; 589 ldr RT0, [RSTATE, #state_h4]; 590 add _b, RT1; 591 add _c, RT2; 592 add _d, RT3; 593 add _e, RT0; 594 stm RSTATE, {_a-_e}; 595 596 b .Loop; 597 598 .Lend: 599 /* Transform 64-79 */ 600 R( _b, _c, _d, _e, _a, F4, 64 ); 601 R( _a, _b, _c, _d, _e, F4, 65 ); 602 R( _e, _a, _b, _c, _d, F4, 66 ); 603 R( _d, _e, _a, _b, _c, F4, 67 ); 604 R( _c, _d, _e, _a, _b, F4, 68 ); 605 R( _b, _c, _d, _e, _a, F4, 69 ); 606 R( _a, _b, _c, _d, _e, F4, 70 ); 607 R( _e, _a, _b, _c, _d, F4, 71 ); 608 R( _d, _e, _a, _b, _c, F4, 72 ); 609 R( _c, _d, _e, _a, _b, F4, 73 ); 610 R( _b, _c, _d, _e, _a, F4, 74 ); 611 R( _a, _b, _c, _d, _e, F4, 75 ); 612 R( _e, _a, _b, _c, _d, F4, 76 ); 613 R( _d, _e, _a, _b, _c, F4, 77 ); 614 R( _c, _d, _e, _a, _b, F4, 78 ); 615 R( _b, _c, _d, _e, _a, F4, 79 ); 616 617 mov sp, ROLDSTACK; 618 619 /* Update the chaining variables. */ 620 ldm RSTATE, {RT0-RT3}; 621 add _a, RT0; 622 ldr RT0, [RSTATE, #state_h4]; 623 add _b, RT1; 624 add _c, RT2; 625 add _d, RT3; 626 /*vpop {q4-q7};*/ 627 add _e, RT0; 628 stm RSTATE, {_a-_e}; 629 630 pop {r4-r12, pc}; 631 632 .Ldo_nothing: 633 bx lr 634 ENDPROC(sha1_transform_neon)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.