~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/mips/crypto/chacha-core.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0 OR MIT */
  2 /*
  3  * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved.
  4  * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
  5  */
  6 
  7 #define MASK_U32                0x3c
  8 #define CHACHA20_BLOCK_SIZE     64
  9 #define STACK_SIZE              32
 10 
 11 #define X0      $t0
 12 #define X1      $t1
 13 #define X2      $t2
 14 #define X3      $t3
 15 #define X4      $t4
 16 #define X5      $t5
 17 #define X6      $t6
 18 #define X7      $t7
 19 #define X8      $t8
 20 #define X9      $t9
 21 #define X10     $v1
 22 #define X11     $s6
 23 #define X12     $s5
 24 #define X13     $s4
 25 #define X14     $s3
 26 #define X15     $s2
 27 /* Use regs which are overwritten on exit for Tx so we don't leak clear data. */
 28 #define T0      $s1
 29 #define T1      $s0
 30 #define T(n)    T ## n
 31 #define X(n)    X ## n
 32 
 33 /* Input arguments */
 34 #define STATE           $a0
 35 #define OUT             $a1
 36 #define IN              $a2
 37 #define BYTES           $a3
 38 
 39 /* Output argument */
 40 /* NONCE[0] is kept in a register and not in memory.
 41  * We don't want to touch original value in memory.
 42  * Must be incremented every loop iteration.
 43  */
 44 #define NONCE_0         $v0
 45 
 46 /* SAVED_X and SAVED_CA are set in the jump table.
 47  * Use regs which are overwritten on exit else we don't leak clear data.
 48  * They are used to handling the last bytes which are not multiple of 4.
 49  */
 50 #define SAVED_X         X15
 51 #define SAVED_CA        $s7
 52 
 53 #define IS_UNALIGNED    $s7
 54 
 55 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 56 #define MSB 0
 57 #define LSB 3
 58 #define ROTx rotl
 59 #define ROTR(n) rotr n, 24
 60 #define CPU_TO_LE32(n) \
 61         wsbh    n; \
 62         rotr    n, 16;
 63 #else
 64 #define MSB 3
 65 #define LSB 0
 66 #define ROTx rotr
 67 #define CPU_TO_LE32(n)
 68 #define ROTR(n)
 69 #endif
 70 
 71 #define FOR_EACH_WORD(x) \
 72         x( 0); \
 73         x( 1); \
 74         x( 2); \
 75         x( 3); \
 76         x( 4); \
 77         x( 5); \
 78         x( 6); \
 79         x( 7); \
 80         x( 8); \
 81         x( 9); \
 82         x(10); \
 83         x(11); \
 84         x(12); \
 85         x(13); \
 86         x(14); \
 87         x(15);
 88 
 89 #define FOR_EACH_WORD_REV(x) \
 90         x(15); \
 91         x(14); \
 92         x(13); \
 93         x(12); \
 94         x(11); \
 95         x(10); \
 96         x( 9); \
 97         x( 8); \
 98         x( 7); \
 99         x( 6); \
100         x( 5); \
101         x( 4); \
102         x( 3); \
103         x( 2); \
104         x( 1); \
105         x( 0);
106 
107 #define PLUS_ONE_0       1
108 #define PLUS_ONE_1       2
109 #define PLUS_ONE_2       3
110 #define PLUS_ONE_3       4
111 #define PLUS_ONE_4       5
112 #define PLUS_ONE_5       6
113 #define PLUS_ONE_6       7
114 #define PLUS_ONE_7       8
115 #define PLUS_ONE_8       9
116 #define PLUS_ONE_9      10
117 #define PLUS_ONE_10     11
118 #define PLUS_ONE_11     12
119 #define PLUS_ONE_12     13
120 #define PLUS_ONE_13     14
121 #define PLUS_ONE_14     15
122 #define PLUS_ONE_15     16
123 #define PLUS_ONE(x)     PLUS_ONE_ ## x
124 #define _CONCAT3(a,b,c) a ## b ## c
125 #define CONCAT3(a,b,c)  _CONCAT3(a,b,c)
126 
127 #define STORE_UNALIGNED(x) \
128 CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
129         .if (x != 12); \
130                 lw      T0, (x*4)(STATE); \
131         .endif; \
132         lwl     T1, (x*4)+MSB ## (IN); \
133         lwr     T1, (x*4)+LSB ## (IN); \
134         .if (x == 12); \
135                 addu    X ## x, NONCE_0; \
136         .else; \
137                 addu    X ## x, T0; \
138         .endif; \
139         CPU_TO_LE32(X ## x); \
140         xor     X ## x, T1; \
141         swl     X ## x, (x*4)+MSB ## (OUT); \
142         swr     X ## x, (x*4)+LSB ## (OUT);
143 
144 #define STORE_ALIGNED(x) \
145 CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
146         .if (x != 12); \
147                 lw      T0, (x*4)(STATE); \
148         .endif; \
149         lw      T1, (x*4) ## (IN); \
150         .if (x == 12); \
151                 addu    X ## x, NONCE_0; \
152         .else; \
153                 addu    X ## x, T0; \
154         .endif; \
155         CPU_TO_LE32(X ## x); \
156         xor     X ## x, T1; \
157         sw      X ## x, (x*4) ## (OUT);
158 
159 /* Jump table macro.
160  * Used for setup and handling the last bytes, which are not multiple of 4.
161  * X15 is free to store Xn
162  * Every jumptable entry must be equal in size.
163  */
164 #define JMPTBL_ALIGNED(x) \
165 .Lchacha_mips_jmptbl_aligned_ ## x: ; \
166         .set    noreorder; \
167         b       .Lchacha_mips_xor_aligned_ ## x ## _b; \
168         .if (x == 12); \
169                 addu    SAVED_X, X ## x, NONCE_0; \
170         .else; \
171                 addu    SAVED_X, X ## x, SAVED_CA; \
172         .endif; \
173         .set    reorder
174 
175 #define JMPTBL_UNALIGNED(x) \
176 .Lchacha_mips_jmptbl_unaligned_ ## x: ; \
177         .set    noreorder; \
178         b       .Lchacha_mips_xor_unaligned_ ## x ## _b; \
179         .if (x == 12); \
180                 addu    SAVED_X, X ## x, NONCE_0; \
181         .else; \
182                 addu    SAVED_X, X ## x, SAVED_CA; \
183         .endif; \
184         .set    reorder
185 
186 #define AXR(A, B, C, D,  K, L, M, N,  V, W, Y, Z,  S) \
187         addu    X(A), X(K); \
188         addu    X(B), X(L); \
189         addu    X(C), X(M); \
190         addu    X(D), X(N); \
191         xor     X(V), X(A); \
192         xor     X(W), X(B); \
193         xor     X(Y), X(C); \
194         xor     X(Z), X(D); \
195         rotl    X(V), S;    \
196         rotl    X(W), S;    \
197         rotl    X(Y), S;    \
198         rotl    X(Z), S;
199 
200 .text
201 .set    reorder
202 .set    noat
203 .globl  chacha_crypt_arch
204 .ent    chacha_crypt_arch
205 chacha_crypt_arch:
206         .frame  $sp, STACK_SIZE, $ra
207 
208         /* Load number of rounds */
209         lw      $at, 16($sp)
210 
211         addiu   $sp, -STACK_SIZE
212 
213         /* Return bytes = 0. */
214         beqz    BYTES, .Lchacha_mips_end
215 
216         lw      NONCE_0, 48(STATE)
217 
218         /* Save s0-s7 */
219         sw      $s0,  0($sp)
220         sw      $s1,  4($sp)
221         sw      $s2,  8($sp)
222         sw      $s3, 12($sp)
223         sw      $s4, 16($sp)
224         sw      $s5, 20($sp)
225         sw      $s6, 24($sp)
226         sw      $s7, 28($sp)
227 
228         /* Test IN or OUT is unaligned.
229          * IS_UNALIGNED = ( IN | OUT ) & 0x00000003
230          */
231         or      IS_UNALIGNED, IN, OUT
232         andi    IS_UNALIGNED, 0x3
233 
234         b       .Lchacha_rounds_start
235 
236 .align 4
237 .Loop_chacha_rounds:
238         addiu   IN,  CHACHA20_BLOCK_SIZE
239         addiu   OUT, CHACHA20_BLOCK_SIZE
240         addiu   NONCE_0, 1
241 
242 .Lchacha_rounds_start:
243         lw      X0,  0(STATE)
244         lw      X1,  4(STATE)
245         lw      X2,  8(STATE)
246         lw      X3,  12(STATE)
247 
248         lw      X4,  16(STATE)
249         lw      X5,  20(STATE)
250         lw      X6,  24(STATE)
251         lw      X7,  28(STATE)
252         lw      X8,  32(STATE)
253         lw      X9,  36(STATE)
254         lw      X10, 40(STATE)
255         lw      X11, 44(STATE)
256 
257         move    X12, NONCE_0
258         lw      X13, 52(STATE)
259         lw      X14, 56(STATE)
260         lw      X15, 60(STATE)
261 
262 .Loop_chacha_xor_rounds:
263         addiu   $at, -2
264         AXR( 0, 1, 2, 3,  4, 5, 6, 7, 12,13,14,15, 16);
265         AXR( 8, 9,10,11, 12,13,14,15,  4, 5, 6, 7, 12);
266         AXR( 0, 1, 2, 3,  4, 5, 6, 7, 12,13,14,15,  8);
267         AXR( 8, 9,10,11, 12,13,14,15,  4, 5, 6, 7,  7);
268         AXR( 0, 1, 2, 3,  5, 6, 7, 4, 15,12,13,14, 16);
269         AXR(10,11, 8, 9, 15,12,13,14,  5, 6, 7, 4, 12);
270         AXR( 0, 1, 2, 3,  5, 6, 7, 4, 15,12,13,14,  8);
271         AXR(10,11, 8, 9, 15,12,13,14,  5, 6, 7, 4,  7);
272         bnez    $at, .Loop_chacha_xor_rounds
273 
274         addiu   BYTES, -(CHACHA20_BLOCK_SIZE)
275 
276         /* Is data src/dst unaligned? Jump */
277         bnez    IS_UNALIGNED, .Loop_chacha_unaligned
278 
279         /* Set number rounds here to fill delayslot. */
280         lw      $at, (STACK_SIZE+16)($sp)
281 
282         /* BYTES < 0, it has no full block. */
283         bltz    BYTES, .Lchacha_mips_no_full_block_aligned
284 
285         FOR_EACH_WORD_REV(STORE_ALIGNED)
286 
287         /* BYTES > 0? Loop again. */
288         bgtz    BYTES, .Loop_chacha_rounds
289 
290         /* Place this here to fill delay slot */
291         addiu   NONCE_0, 1
292 
293         /* BYTES < 0? Handle last bytes */
294         bltz    BYTES, .Lchacha_mips_xor_bytes
295 
296 .Lchacha_mips_xor_done:
297         /* Restore used registers */
298         lw      $s0,  0($sp)
299         lw      $s1,  4($sp)
300         lw      $s2,  8($sp)
301         lw      $s3, 12($sp)
302         lw      $s4, 16($sp)
303         lw      $s5, 20($sp)
304         lw      $s6, 24($sp)
305         lw      $s7, 28($sp)
306 
307         /* Write NONCE_0 back to right location in state */
308         sw      NONCE_0, 48(STATE)
309 
310 .Lchacha_mips_end:
311         addiu   $sp, STACK_SIZE
312         jr      $ra
313 
314 .Lchacha_mips_no_full_block_aligned:
315         /* Restore the offset on BYTES */
316         addiu   BYTES, CHACHA20_BLOCK_SIZE
317 
318         /* Get number of full WORDS */
319         andi    $at, BYTES, MASK_U32
320 
321         /* Load upper half of jump table addr */
322         lui     T0, %hi(.Lchacha_mips_jmptbl_aligned_0)
323 
324         /* Calculate lower half jump table offset */
325         ins     T0, $at, 1, 6
326 
327         /* Add offset to STATE */
328         addu    T1, STATE, $at
329 
330         /* Add lower half jump table addr */
331         addiu   T0, %lo(.Lchacha_mips_jmptbl_aligned_0)
332 
333         /* Read value from STATE */
334         lw      SAVED_CA, 0(T1)
335 
336         /* Store remaining bytecounter as negative value */
337         subu    BYTES, $at, BYTES
338 
339         jr      T0
340 
341         /* Jump table */
342         FOR_EACH_WORD(JMPTBL_ALIGNED)
343 
344 
345 .Loop_chacha_unaligned:
346         /* Set number rounds here to fill delayslot. */
347         lw      $at, (STACK_SIZE+16)($sp)
348 
349         /* BYTES > 0, it has no full block. */
350         bltz    BYTES, .Lchacha_mips_no_full_block_unaligned
351 
352         FOR_EACH_WORD_REV(STORE_UNALIGNED)
353 
354         /* BYTES > 0? Loop again. */
355         bgtz    BYTES, .Loop_chacha_rounds
356 
357         /* Write NONCE_0 back to right location in state */
358         sw      NONCE_0, 48(STATE)
359 
360         .set noreorder
361         /* Fall through to byte handling */
362         bgez    BYTES, .Lchacha_mips_xor_done
363 .Lchacha_mips_xor_unaligned_0_b:
364 .Lchacha_mips_xor_aligned_0_b:
365         /* Place this here to fill delay slot */
366         addiu   NONCE_0, 1
367         .set reorder
368 
369 .Lchacha_mips_xor_bytes:
370         addu    IN, $at
371         addu    OUT, $at
372         /* First byte */
373         lbu     T1, 0(IN)
374         addiu   $at, BYTES, 1
375         CPU_TO_LE32(SAVED_X)
376         ROTR(SAVED_X)
377         xor     T1, SAVED_X
378         sb      T1, 0(OUT)
379         beqz    $at, .Lchacha_mips_xor_done
380         /* Second byte */
381         lbu     T1, 1(IN)
382         addiu   $at, BYTES, 2
383         ROTx    SAVED_X, 8
384         xor     T1, SAVED_X
385         sb      T1, 1(OUT)
386         beqz    $at, .Lchacha_mips_xor_done
387         /* Third byte */
388         lbu     T1, 2(IN)
389         ROTx    SAVED_X, 8
390         xor     T1, SAVED_X
391         sb      T1, 2(OUT)
392         b       .Lchacha_mips_xor_done
393 
394 .Lchacha_mips_no_full_block_unaligned:
395         /* Restore the offset on BYTES */
396         addiu   BYTES, CHACHA20_BLOCK_SIZE
397 
398         /* Get number of full WORDS */
399         andi    $at, BYTES, MASK_U32
400 
401         /* Load upper half of jump table addr */
402         lui     T0, %hi(.Lchacha_mips_jmptbl_unaligned_0)
403 
404         /* Calculate lower half jump table offset */
405         ins     T0, $at, 1, 6
406 
407         /* Add offset to STATE */
408         addu    T1, STATE, $at
409 
410         /* Add lower half jump table addr */
411         addiu   T0, %lo(.Lchacha_mips_jmptbl_unaligned_0)
412 
413         /* Read value from STATE */
414         lw      SAVED_CA, 0(T1)
415 
416         /* Store remaining bytecounter as negative value */
417         subu    BYTES, $at, BYTES
418 
419         jr      T0
420 
421         /* Jump table */
422         FOR_EACH_WORD(JMPTBL_UNALIGNED)
423 .end chacha_crypt_arch
424 .set at
425 
426 /* Input arguments
427  * STATE        $a0
428  * OUT          $a1
429  * NROUND       $a2
430  */
431 
432 #undef X12
433 #undef X13
434 #undef X14
435 #undef X15
436 
437 #define X12     $a3
438 #define X13     $at
439 #define X14     $v0
440 #define X15     STATE
441 
442 .set noat
443 .globl  hchacha_block_arch
444 .ent    hchacha_block_arch
445 hchacha_block_arch:
446         .frame  $sp, STACK_SIZE, $ra
447 
448         addiu   $sp, -STACK_SIZE
449 
450         /* Save X11(s6) */
451         sw      X11, 0($sp)
452 
453         lw      X0,  0(STATE)
454         lw      X1,  4(STATE)
455         lw      X2,  8(STATE)
456         lw      X3,  12(STATE)
457         lw      X4,  16(STATE)
458         lw      X5,  20(STATE)
459         lw      X6,  24(STATE)
460         lw      X7,  28(STATE)
461         lw      X8,  32(STATE)
462         lw      X9,  36(STATE)
463         lw      X10, 40(STATE)
464         lw      X11, 44(STATE)
465         lw      X12, 48(STATE)
466         lw      X13, 52(STATE)
467         lw      X14, 56(STATE)
468         lw      X15, 60(STATE)
469 
470 .Loop_hchacha_xor_rounds:
471         addiu   $a2, -2
472         AXR( 0, 1, 2, 3,  4, 5, 6, 7, 12,13,14,15, 16);
473         AXR( 8, 9,10,11, 12,13,14,15,  4, 5, 6, 7, 12);
474         AXR( 0, 1, 2, 3,  4, 5, 6, 7, 12,13,14,15,  8);
475         AXR( 8, 9,10,11, 12,13,14,15,  4, 5, 6, 7,  7);
476         AXR( 0, 1, 2, 3,  5, 6, 7, 4, 15,12,13,14, 16);
477         AXR(10,11, 8, 9, 15,12,13,14,  5, 6, 7, 4, 12);
478         AXR( 0, 1, 2, 3,  5, 6, 7, 4, 15,12,13,14,  8);
479         AXR(10,11, 8, 9, 15,12,13,14,  5, 6, 7, 4,  7);
480         bnez    $a2, .Loop_hchacha_xor_rounds
481 
482         /* Restore used register */
483         lw      X11, 0($sp)
484 
485         sw      X0,  0(OUT)
486         sw      X1,  4(OUT)
487         sw      X2,  8(OUT)
488         sw      X3,  12(OUT)
489         sw      X12, 16(OUT)
490         sw      X13, 20(OUT)
491         sw      X14, 24(OUT)
492         sw      X15, 28(OUT)
493 
494         addiu   $sp, STACK_SIZE
495         jr      $ra
496 .end hchacha_block_arch
497 .set at

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php