~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/loongarch/vdso/vgetrandom-chacha.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * Copyright (C) 2024 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved.
  4  */
  5 
  6 #include <asm/asm.h>
  7 #include <asm/regdef.h>
  8 #include <linux/linkage.h>
  9 
 10 .text
 11 
 12 .macro  OP_4REG op d0 d1 d2 d3 s0 s1 s2 s3
 13         \op     \d0, \d0, \s0
 14         \op     \d1, \d1, \s1
 15         \op     \d2, \d2, \s2
 16         \op     \d3, \d3, \s3
 17 .endm
 18 
 19 /*
 20  * Very basic LoongArch implementation of ChaCha20. Produces a given positive
 21  * number of blocks of output with a nonce of 0, taking an input key and
 22  * 8-byte counter. Importantly does not spill to the stack. Its arguments
 23  * are:
 24  *
 25  *      a0: output bytes
 26  *      a1: 32-byte key input
 27  *      a2: 8-byte counter input/output
 28  *      a3: number of 64-byte blocks to write to output
 29  */
 30 SYM_FUNC_START(__arch_chacha20_blocks_nostack)
 31 
 32 /* We don't need a frame pointer */
 33 #define s9              fp
 34 
 35 #define output          a0
 36 #define key             a1
 37 #define counter         a2
 38 #define nblocks         a3
 39 #define i               a4
 40 #define state0          s0
 41 #define state1          s1
 42 #define state2          s2
 43 #define state3          s3
 44 #define state4          s4
 45 #define state5          s5
 46 #define state6          s6
 47 #define state7          s7
 48 #define state8          s8
 49 #define state9          s9
 50 #define state10         a5
 51 #define state11         a6
 52 #define state12         a7
 53 #define state13         t0
 54 #define state14         t1
 55 #define state15         t2
 56 #define cnt_lo          t3
 57 #define cnt_hi          t4
 58 #define copy0           t5
 59 #define copy1           t6
 60 #define copy2           t7
 61 
 62 /* Reuse i as copy3 */
 63 #define copy3           i
 64 
 65 /* Packs to be used with OP_4REG */
 66 #define line0           state0, state1, state2, state3
 67 #define line1           state4, state5, state6, state7
 68 #define line2           state8, state9, state10, state11
 69 #define line3           state12, state13, state14, state15
 70 
 71 #define line1_perm      state5, state6, state7, state4
 72 #define line2_perm      state10, state11, state8, state9
 73 #define line3_perm      state15, state12, state13, state14
 74 
 75 #define copy            copy0, copy1, copy2, copy3
 76 
 77 #define _16             16, 16, 16, 16
 78 #define _20             20, 20, 20, 20
 79 #define _24             24, 24, 24, 24
 80 #define _25             25, 25, 25, 25
 81 
 82         /*
 83          * The ABI requires s0-s9 saved, and sp aligned to 16-byte.
 84          * This does not violate the stack-less requirement: no sensitive data
 85          * is spilled onto the stack.
 86          */
 87         PTR_ADDI        sp, sp, (-SZREG * 10) & STACK_ALIGN
 88         REG_S           s0, sp, 0
 89         REG_S           s1, sp, SZREG
 90         REG_S           s2, sp, SZREG * 2
 91         REG_S           s3, sp, SZREG * 3
 92         REG_S           s4, sp, SZREG * 4
 93         REG_S           s5, sp, SZREG * 5
 94         REG_S           s6, sp, SZREG * 6
 95         REG_S           s7, sp, SZREG * 7
 96         REG_S           s8, sp, SZREG * 8
 97         REG_S           s9, sp, SZREG * 9
 98 
 99         li.w            copy0, 0x61707865
100         li.w            copy1, 0x3320646e
101         li.w            copy2, 0x79622d32
102 
103         ld.w            cnt_lo, counter, 0
104         ld.w            cnt_hi, counter, 4
105 
106 .Lblock:
107         /* state[0,1,2,3] = "expand 32-byte k" */
108         move            state0, copy0
109         move            state1, copy1
110         move            state2, copy2
111         li.w            state3, 0x6b206574
112 
113         /* state[4,5,..,11] = key */
114         ld.w            state4, key, 0
115         ld.w            state5, key, 4
116         ld.w            state6, key, 8
117         ld.w            state7, key, 12
118         ld.w            state8, key, 16
119         ld.w            state9, key, 20
120         ld.w            state10, key, 24
121         ld.w            state11, key, 28
122 
123         /* state[12,13] = counter */
124         move            state12, cnt_lo
125         move            state13, cnt_hi
126 
127         /* state[14,15] = 0 */
128         move            state14, zero
129         move            state15, zero
130 
131         li.w            i, 10
132 .Lpermute:
133         /* odd round */
134         OP_4REG add.w   line0, line1
135         OP_4REG xor     line3, line0
136         OP_4REG rotri.w line3, _16
137 
138         OP_4REG add.w   line2, line3
139         OP_4REG xor     line1, line2
140         OP_4REG rotri.w line1, _20
141 
142         OP_4REG add.w   line0, line1
143         OP_4REG xor     line3, line0
144         OP_4REG rotri.w line3, _24
145 
146         OP_4REG add.w   line2, line3
147         OP_4REG xor     line1, line2
148         OP_4REG rotri.w line1, _25
149 
150         /* even round */
151         OP_4REG add.w   line0, line1_perm
152         OP_4REG xor     line3_perm, line0
153         OP_4REG rotri.w line3_perm, _16
154 
155         OP_4REG add.w   line2_perm, line3_perm
156         OP_4REG xor     line1_perm, line2_perm
157         OP_4REG rotri.w line1_perm, _20
158 
159         OP_4REG add.w   line0, line1_perm
160         OP_4REG xor     line3_perm, line0
161         OP_4REG rotri.w line3_perm, _24
162 
163         OP_4REG add.w   line2_perm, line3_perm
164         OP_4REG xor     line1_perm, line2_perm
165         OP_4REG rotri.w line1_perm, _25
166 
167         addi.w          i, i, -1
168         bnez            i, .Lpermute
169 
170         /*
171          * copy[3] = "expa", materialize it here because copy[3] shares the
172          * same register with i which just became dead.
173          */
174         li.w            copy3, 0x6b206574
175 
176         /* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */
177         OP_4REG add.w   line0, copy
178         st.w            state0, output, 0
179         st.w            state1, output, 4
180         st.w            state2, output, 8
181         st.w            state3, output, 12
182 
183         /* from now on state[0,1,2,3] are scratch registers  */
184 
185         /* state[0,1,2,3] = lo32(key) */
186         ld.w            state0, key, 0
187         ld.w            state1, key, 4
188         ld.w            state2, key, 8
189         ld.w            state3, key, 12
190 
191         /* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */
192         OP_4REG add.w   line1, line0
193         st.w            state4, output, 16
194         st.w            state5, output, 20
195         st.w            state6, output, 24
196         st.w            state7, output, 28
197 
198         /* state[0,1,2,3] = hi32(key) */
199         ld.w            state0, key, 16
200         ld.w            state1, key, 20
201         ld.w            state2, key, 24
202         ld.w            state3, key, 28
203 
204         /* output[8,9,10,11] = state[0,1,2,3] + state[8,9,10,11] */
205         OP_4REG add.w   line2, line0
206         st.w            state8, output, 32
207         st.w            state9, output, 36
208         st.w            state10, output, 40
209         st.w            state11, output, 44
210 
211         /* output[12,13,14,15] = state[12,13,14,15] + [cnt_lo, cnt_hi, 0, 0] */
212         add.w           state12, state12, cnt_lo
213         add.w           state13, state13, cnt_hi
214         st.w            state12, output, 48
215         st.w            state13, output, 52
216         st.w            state14, output, 56
217         st.w            state15, output, 60
218 
219         /* ++counter  */
220         addi.w          cnt_lo, cnt_lo, 1
221         sltui           state0, cnt_lo, 1
222         add.w           cnt_hi, cnt_hi, state0
223 
224         /* output += 64 */
225         PTR_ADDI        output, output, 64
226         /* --nblocks */
227         PTR_ADDI        nblocks, nblocks, -1
228         bnez            nblocks, .Lblock
229 
230         /* counter = [cnt_lo, cnt_hi] */
231         st.w            cnt_lo, counter, 0
232         st.w            cnt_hi, counter, 4
233 
234         /*
235          * Zero out the potentially sensitive regs, in case nothing uses these
236          * again. As at now copy[0,1,2,3] just contains "expand 32-byte k" and
237          * state[0,...,9] are s0-s9 those we'll restore in the epilogue, so we
238          * only need to zero state[11,...,15].
239          */
240         move            state10, zero
241         move            state11, zero
242         move            state12, zero
243         move            state13, zero
244         move            state14, zero
245         move            state15, zero
246 
247         REG_L           s0, sp, 0
248         REG_L           s1, sp, SZREG
249         REG_L           s2, sp, SZREG * 2
250         REG_L           s3, sp, SZREG * 3
251         REG_L           s4, sp, SZREG * 4
252         REG_L           s5, sp, SZREG * 5
253         REG_L           s6, sp, SZREG * 6
254         REG_L           s7, sp, SZREG * 7
255         REG_L           s8, sp, SZREG * 8
256         REG_L           s9, sp, SZREG * 9
257         PTR_ADDI        sp, sp, -((-SZREG * 10) & STACK_ALIGN)
258 
259         jr              ra
260 SYM_FUNC_END(__arch_chacha20_blocks_nostack)

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php