~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/riscv/crypto/chacha-riscv64-zvkb.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
  2 //
  3 // This file is dual-licensed, meaning that you can use it under your
  4 // choice of either of the following two licenses:
  5 //
  6 // Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
  7 //
  8 // Licensed under the Apache License 2.0 (the "License"). You can obtain
  9 // a copy in the file LICENSE in the source distribution or at
 10 // https://www.openssl.org/source/license.html
 11 //
 12 // or
 13 //
 14 // Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
 15 // Copyright 2024 Google LLC
 16 // All rights reserved.
 17 //
 18 // Redistribution and use in source and binary forms, with or without
 19 // modification, are permitted provided that the following conditions
 20 // are met:
 21 // 1. Redistributions of source code must retain the above copyright
 22 //    notice, this list of conditions and the following disclaimer.
 23 // 2. Redistributions in binary form must reproduce the above copyright
 24 //    notice, this list of conditions and the following disclaimer in the
 25 //    documentation and/or other materials provided with the distribution.
 26 //
 27 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 28 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 29 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 30 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 31 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 32 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 33 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 34 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 35 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 36 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 37 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 38 
 39 // The generated code of this file depends on the following RISC-V extensions:
 40 // - RV64I
 41 // - RISC-V Vector ('V') with VLEN >= 128
 42 // - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
 43 
 44 #include <linux/linkage.h>
 45 
 46 .text
 47 .option arch, +zvkb
 48 
 49 #define KEYP            a0
 50 #define INP             a1
 51 #define OUTP            a2
 52 #define LEN             a3
 53 #define IVP             a4
 54 
 55 #define CONSTS0         a5
 56 #define CONSTS1         a6
 57 #define CONSTS2         a7
 58 #define CONSTS3         t0
 59 #define TMP             t1
 60 #define VL              t2
 61 #define STRIDE          t3
 62 #define NROUNDS         t4
 63 #define KEY0            s0
 64 #define KEY1            s1
 65 #define KEY2            s2
 66 #define KEY3            s3
 67 #define KEY4            s4
 68 #define KEY5            s5
 69 #define KEY6            s6
 70 #define KEY7            s7
 71 #define COUNTER         s8
 72 #define NONCE0          s9
 73 #define NONCE1          s10
 74 #define NONCE2          s11
 75 
 76 .macro  chacha_round    a0, b0, c0, d0,  a1, b1, c1, d1, \
 77                         a2, b2, c2, d2,  a3, b3, c3, d3
 78         // a += b; d ^= a; d = rol(d, 16);
 79         vadd.vv         \a0, \a0, \b0
 80         vadd.vv         \a1, \a1, \b1
 81         vadd.vv         \a2, \a2, \b2
 82         vadd.vv         \a3, \a3, \b3
 83         vxor.vv         \d0, \d0, \a0
 84         vxor.vv         \d1, \d1, \a1
 85         vxor.vv         \d2, \d2, \a2
 86         vxor.vv         \d3, \d3, \a3
 87         vror.vi         \d0, \d0, 32 - 16
 88         vror.vi         \d1, \d1, 32 - 16
 89         vror.vi         \d2, \d2, 32 - 16
 90         vror.vi         \d3, \d3, 32 - 16
 91 
 92         // c += d; b ^= c; b = rol(b, 12);
 93         vadd.vv         \c0, \c0, \d0
 94         vadd.vv         \c1, \c1, \d1
 95         vadd.vv         \c2, \c2, \d2
 96         vadd.vv         \c3, \c3, \d3
 97         vxor.vv         \b0, \b0, \c0
 98         vxor.vv         \b1, \b1, \c1
 99         vxor.vv         \b2, \b2, \c2
100         vxor.vv         \b3, \b3, \c3
101         vror.vi         \b0, \b0, 32 - 12
102         vror.vi         \b1, \b1, 32 - 12
103         vror.vi         \b2, \b2, 32 - 12
104         vror.vi         \b3, \b3, 32 - 12
105 
106         // a += b; d ^= a; d = rol(d, 8);
107         vadd.vv         \a0, \a0, \b0
108         vadd.vv         \a1, \a1, \b1
109         vadd.vv         \a2, \a2, \b2
110         vadd.vv         \a3, \a3, \b3
111         vxor.vv         \d0, \d0, \a0
112         vxor.vv         \d1, \d1, \a1
113         vxor.vv         \d2, \d2, \a2
114         vxor.vv         \d3, \d3, \a3
115         vror.vi         \d0, \d0, 32 - 8
116         vror.vi         \d1, \d1, 32 - 8
117         vror.vi         \d2, \d2, 32 - 8
118         vror.vi         \d3, \d3, 32 - 8
119 
120         // c += d; b ^= c; b = rol(b, 7);
121         vadd.vv         \c0, \c0, \d0
122         vadd.vv         \c1, \c1, \d1
123         vadd.vv         \c2, \c2, \d2
124         vadd.vv         \c3, \c3, \d3
125         vxor.vv         \b0, \b0, \c0
126         vxor.vv         \b1, \b1, \c1
127         vxor.vv         \b2, \b2, \c2
128         vxor.vv         \b3, \b3, \c3
129         vror.vi         \b0, \b0, 32 - 7
130         vror.vi         \b1, \b1, 32 - 7
131         vror.vi         \b2, \b2, 32 - 7
132         vror.vi         \b3, \b3, 32 - 7
133 .endm
134 
135 // void chacha20_zvkb(const u32 key[8], const u8 *in, u8 *out, size_t len,
136 //                    const u32 iv[4]);
137 //
138 // |len| must be nonzero and a multiple of 64 (CHACHA_BLOCK_SIZE).
139 // The counter is treated as 32-bit, following the RFC7539 convention.
140 SYM_FUNC_START(chacha20_zvkb)
141         srli            LEN, LEN, 6     // Bytes to blocks
142 
143         addi            sp, sp, -96
144         sd              s0, 0(sp)
145         sd              s1, 8(sp)
146         sd              s2, 16(sp)
147         sd              s3, 24(sp)
148         sd              s4, 32(sp)
149         sd              s5, 40(sp)
150         sd              s6, 48(sp)
151         sd              s7, 56(sp)
152         sd              s8, 64(sp)
153         sd              s9, 72(sp)
154         sd              s10, 80(sp)
155         sd              s11, 88(sp)
156 
157         li              STRIDE, 64
158 
159         // Set up the initial state matrix in scalar registers.
160         li              CONSTS0, 0x61707865     // "expa" little endian
161         li              CONSTS1, 0x3320646e     // "nd 3" little endian
162         li              CONSTS2, 0x79622d32     // "2-by" little endian
163         li              CONSTS3, 0x6b206574     // "te k" little endian
164         lw              KEY0, 0(KEYP)
165         lw              KEY1, 4(KEYP)
166         lw              KEY2, 8(KEYP)
167         lw              KEY3, 12(KEYP)
168         lw              KEY4, 16(KEYP)
169         lw              KEY5, 20(KEYP)
170         lw              KEY6, 24(KEYP)
171         lw              KEY7, 28(KEYP)
172         lw              COUNTER, 0(IVP)
173         lw              NONCE0, 4(IVP)
174         lw              NONCE1, 8(IVP)
175         lw              NONCE2, 12(IVP)
176 
177 .Lblock_loop:
178         // Set vl to the number of blocks to process in this iteration.
179         vsetvli         VL, LEN, e32, m1, ta, ma
180 
181         // Set up the initial state matrix for the next VL blocks in v0-v15.
182         // v{i} holds the i'th 32-bit word of the state matrix for all blocks.
183         // Note that only the counter word, at index 12, differs across blocks.
184         vmv.v.x         v0, CONSTS0
185         vmv.v.x         v1, CONSTS1
186         vmv.v.x         v2, CONSTS2
187         vmv.v.x         v3, CONSTS3
188         vmv.v.x         v4, KEY0
189         vmv.v.x         v5, KEY1
190         vmv.v.x         v6, KEY2
191         vmv.v.x         v7, KEY3
192         vmv.v.x         v8, KEY4
193         vmv.v.x         v9, KEY5
194         vmv.v.x         v10, KEY6
195         vmv.v.x         v11, KEY7
196         vid.v           v12
197         vadd.vx         v12, v12, COUNTER
198         vmv.v.x         v13, NONCE0
199         vmv.v.x         v14, NONCE1
200         vmv.v.x         v15, NONCE2
201 
202         // Load the first half of the input data for each block into v16-v23.
203         // v{16+i} holds the i'th 32-bit word for all blocks.
204         vlsseg8e32.v    v16, (INP), STRIDE
205 
206         li              NROUNDS, 20
207 .Lnext_doubleround:
208         addi            NROUNDS, NROUNDS, -2
209         // column round
210         chacha_round    v0, v4, v8, v12, v1, v5, v9, v13, \
211                         v2, v6, v10, v14, v3, v7, v11, v15
212         // diagonal round
213         chacha_round    v0, v5, v10, v15, v1, v6, v11, v12, \
214                         v2, v7, v8, v13, v3, v4, v9, v14
215         bnez            NROUNDS, .Lnext_doubleround
216 
217         // Load the second half of the input data for each block into v24-v31.
218         // v{24+i} holds the {8+i}'th 32-bit word for all blocks.
219         addi            TMP, INP, 32
220         vlsseg8e32.v    v24, (TMP), STRIDE
221 
222         // Finalize the first half of the keystream for each block.
223         vadd.vx         v0, v0, CONSTS0
224         vadd.vx         v1, v1, CONSTS1
225         vadd.vx         v2, v2, CONSTS2
226         vadd.vx         v3, v3, CONSTS3
227         vadd.vx         v4, v4, KEY0
228         vadd.vx         v5, v5, KEY1
229         vadd.vx         v6, v6, KEY2
230         vadd.vx         v7, v7, KEY3
231 
232         // Encrypt/decrypt the first half of the data for each block.
233         vxor.vv         v16, v16, v0
234         vxor.vv         v17, v17, v1
235         vxor.vv         v18, v18, v2
236         vxor.vv         v19, v19, v3
237         vxor.vv         v20, v20, v4
238         vxor.vv         v21, v21, v5
239         vxor.vv         v22, v22, v6
240         vxor.vv         v23, v23, v7
241 
242         // Store the first half of the output data for each block.
243         vssseg8e32.v    v16, (OUTP), STRIDE
244 
245         // Finalize the second half of the keystream for each block.
246         vadd.vx         v8, v8, KEY4
247         vadd.vx         v9, v9, KEY5
248         vadd.vx         v10, v10, KEY6
249         vadd.vx         v11, v11, KEY7
250         vid.v           v0
251         vadd.vx         v12, v12, COUNTER
252         vadd.vx         v13, v13, NONCE0
253         vadd.vx         v14, v14, NONCE1
254         vadd.vx         v15, v15, NONCE2
255         vadd.vv         v12, v12, v0
256 
257         // Encrypt/decrypt the second half of the data for each block.
258         vxor.vv         v24, v24, v8
259         vxor.vv         v25, v25, v9
260         vxor.vv         v26, v26, v10
261         vxor.vv         v27, v27, v11
262         vxor.vv         v29, v29, v13
263         vxor.vv         v28, v28, v12
264         vxor.vv         v30, v30, v14
265         vxor.vv         v31, v31, v15
266 
267         // Store the second half of the output data for each block.
268         addi            TMP, OUTP, 32
269         vssseg8e32.v    v24, (TMP), STRIDE
270 
271         // Update the counter, the remaining number of blocks, and the input and
272         // output pointers according to the number of blocks processed (VL).
273         add             COUNTER, COUNTER, VL
274         sub             LEN, LEN, VL
275         slli            TMP, VL, 6
276         add             OUTP, OUTP, TMP
277         add             INP, INP, TMP
278         bnez            LEN, .Lblock_loop
279 
280         ld              s0, 0(sp)
281         ld              s1, 8(sp)
282         ld              s2, 16(sp)
283         ld              s3, 24(sp)
284         ld              s4, 32(sp)
285         ld              s5, 40(sp)
286         ld              s6, 48(sp)
287         ld              s7, 56(sp)
288         ld              s8, 64(sp)
289         ld              s9, 72(sp)
290         ld              s10, 80(sp)
291         ld              s11, 88(sp)
292         addi            sp, sp, 96
293         ret
294 SYM_FUNC_END(chacha20_zvkb)

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php