1 /* SPDX-License-Identifier: Apache-2.0 OR BSD- 2 // 3 // This file is dual-licensed, meaning that yo 4 // choice of either of the following two licen 5 // 6 // Copyright 2023 The OpenSSL Project Authors. 7 // 8 // Licensed under the Apache License 2.0 (the 9 // a copy in the file LICENSE in the source di 10 // https://www.openssl.org/source/license.html 11 // 12 // or 13 // 14 // Copyright (c) 2023, Jerry Shih <jerry.shih@s 15 // Copyright 2024 Google LLC 16 // All rights reserved. 17 // 18 // Redistribution and use in source and binary 19 // modification, are permitted provided that t 20 // are met: 21 // 1. Redistributions of source code must reta 22 // notice, this list of conditions and the 23 // 2. Redistributions in binary form must repr 24 // notice, this list of conditions and the 25 // documentation and/or other materials pro 26 // 27 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT 28 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTI 29 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCH 30 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 31 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIR 32 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGE 33 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 // THEORY OF LIABILITY, WHETHER IN CONTRACT, S 36 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 37 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE PO 38 39 // The generated code of this file depends on 40 // - RV64I 41 // - RISC-V Vector ('V') with VLEN >= 128 42 // - RISC-V Vector Cryptography Bit-manipulati 43 44 #include <linux/linkage.h> 45 46 .text 47 .option arch, +zvkb 48 49 #define KEYP a0 50 #define INP a1 51 #define OUTP a2 52 #define LEN a3 53 #define IVP a4 54 55 #define CONSTS0 a5 56 #define CONSTS1 a6 57 #define CONSTS2 a7 58 #define CONSTS3 t0 59 #define TMP t1 60 #define VL t2 61 #define STRIDE t3 62 #define NROUNDS t4 63 #define KEY0 s0 64 #define KEY1 s1 65 #define KEY2 s2 66 #define KEY3 s3 67 #define KEY4 s4 68 #define KEY5 s5 69 #define KEY6 s6 70 #define KEY7 s7 71 #define COUNTER s8 72 #define NONCE0 s9 73 #define NONCE1 s10 74 #define NONCE2 s11 75 76 .macro chacha_round a0, b0, c0, d0, a1, b 77 a2, b2, c2, d2, a3, b 78 // a += b; d ^= a; d = rol(d, 16); 79 vadd.vv \a0, \a0, \b0 80 vadd.vv \a1, \a1, \b1 81 vadd.vv \a2, \a2, \b2 82 vadd.vv \a3, \a3, \b3 83 vxor.vv \d0, \d0, \a0 84 vxor.vv \d1, \d1, \a1 85 vxor.vv \d2, \d2, \a2 86 vxor.vv \d3, \d3, \a3 87 vror.vi \d0, \d0, 32 - 16 88 vror.vi \d1, \d1, 32 - 16 89 vror.vi \d2, \d2, 32 - 16 90 vror.vi \d3, \d3, 32 - 16 91 92 // c += d; b ^= c; b = rol(b, 12); 93 vadd.vv \c0, \c0, \d0 94 vadd.vv \c1, \c1, \d1 95 vadd.vv \c2, \c2, \d2 96 vadd.vv \c3, \c3, \d3 97 vxor.vv \b0, \b0, \c0 98 vxor.vv \b1, \b1, \c1 99 vxor.vv \b2, \b2, \c2 100 vxor.vv \b3, \b3, \c3 101 vror.vi \b0, \b0, 32 - 12 102 vror.vi \b1, \b1, 32 - 12 103 vror.vi \b2, \b2, 32 - 12 104 vror.vi \b3, \b3, 32 - 12 105 106 // a += b; d ^= a; d = rol(d, 8); 107 vadd.vv \a0, \a0, \b0 108 vadd.vv \a1, \a1, \b1 109 vadd.vv \a2, \a2, \b2 110 vadd.vv \a3, \a3, \b3 111 vxor.vv \d0, \d0, \a0 112 vxor.vv \d1, \d1, \a1 113 vxor.vv \d2, \d2, \a2 114 vxor.vv \d3, \d3, \a3 115 vror.vi \d0, \d0, 32 - 8 116 vror.vi \d1, \d1, 32 - 8 117 vror.vi \d2, \d2, 32 - 8 118 vror.vi \d3, \d3, 32 - 8 119 120 // c += d; b ^= c; b = rol(b, 7); 121 vadd.vv \c0, \c0, \d0 122 vadd.vv \c1, \c1, \d1 123 vadd.vv \c2, \c2, \d2 124 vadd.vv \c3, \c3, \d3 125 vxor.vv \b0, \b0, \c0 126 vxor.vv \b1, \b1, \c1 127 vxor.vv \b2, \b2, \c2 128 vxor.vv \b3, \b3, \c3 129 vror.vi \b0, \b0, 32 - 7 130 vror.vi \b1, \b1, 32 - 7 131 vror.vi \b2, \b2, 32 - 7 132 vror.vi \b3, \b3, 32 - 7 133 .endm 134 135 // void chacha20_zvkb(const u32 key[8], const 136 // const u32 iv[4]); 137 // 138 // |len| must be nonzero and a multiple of 64 139 // The counter is treated as 32-bit, following 140 SYM_FUNC_START(chacha20_zvkb) 141 srli LEN, LEN, 6 // Byt 142 143 addi sp, sp, -96 144 sd s0, 0(sp) 145 sd s1, 8(sp) 146 sd s2, 16(sp) 147 sd s3, 24(sp) 148 sd s4, 32(sp) 149 sd s5, 40(sp) 150 sd s6, 48(sp) 151 sd s7, 56(sp) 152 sd s8, 64(sp) 153 sd s9, 72(sp) 154 sd s10, 80(sp) 155 sd s11, 88(sp) 156 157 li STRIDE, 64 158 159 // Set up the initial state matrix in 160 li CONSTS0, 0x61707865 161 li CONSTS1, 0x3320646e 162 li CONSTS2, 0x79622d32 163 li CONSTS3, 0x6b206574 164 lw KEY0, 0(KEYP) 165 lw KEY1, 4(KEYP) 166 lw KEY2, 8(KEYP) 167 lw KEY3, 12(KEYP) 168 lw KEY4, 16(KEYP) 169 lw KEY5, 20(KEYP) 170 lw KEY6, 24(KEYP) 171 lw KEY7, 28(KEYP) 172 lw COUNTER, 0(IVP) 173 lw NONCE0, 4(IVP) 174 lw NONCE1, 8(IVP) 175 lw NONCE2, 12(IVP) 176 177 .Lblock_loop: 178 // Set vl to the number of blocks to p 179 vsetvli VL, LEN, e32, m1, ta, 180 181 // Set up the initial state matrix for 182 // v{i} holds the i'th 32-bit word of 183 // Note that only the counter word, at 184 vmv.v.x v0, CONSTS0 185 vmv.v.x v1, CONSTS1 186 vmv.v.x v2, CONSTS2 187 vmv.v.x v3, CONSTS3 188 vmv.v.x v4, KEY0 189 vmv.v.x v5, KEY1 190 vmv.v.x v6, KEY2 191 vmv.v.x v7, KEY3 192 vmv.v.x v8, KEY4 193 vmv.v.x v9, KEY5 194 vmv.v.x v10, KEY6 195 vmv.v.x v11, KEY7 196 vid.v v12 197 vadd.vx v12, v12, COUNTER 198 vmv.v.x v13, NONCE0 199 vmv.v.x v14, NONCE1 200 vmv.v.x v15, NONCE2 201 202 // Load the first half of the input da 203 // v{16+i} holds the i'th 32-bit word 204 vlsseg8e32.v v16, (INP), STRIDE 205 206 li NROUNDS, 20 207 .Lnext_doubleround: 208 addi NROUNDS, NROUNDS, -2 209 // column round 210 chacha_round v0, v4, v8, v12, v1, v 211 v2, v6, v10, v14, v3, 212 // diagonal round 213 chacha_round v0, v5, v10, v15, v1, 214 v2, v7, v8, v13, v3, v 215 bnez NROUNDS, .Lnext_double 216 217 // Load the second half of the input d 218 // v{24+i} holds the {8+i}'th 32-bit w 219 addi TMP, INP, 32 220 vlsseg8e32.v v24, (TMP), STRIDE 221 222 // Finalize the first half of the keys 223 vadd.vx v0, v0, CONSTS0 224 vadd.vx v1, v1, CONSTS1 225 vadd.vx v2, v2, CONSTS2 226 vadd.vx v3, v3, CONSTS3 227 vadd.vx v4, v4, KEY0 228 vadd.vx v5, v5, KEY1 229 vadd.vx v6, v6, KEY2 230 vadd.vx v7, v7, KEY3 231 232 // Encrypt/decrypt the first half of t 233 vxor.vv v16, v16, v0 234 vxor.vv v17, v17, v1 235 vxor.vv v18, v18, v2 236 vxor.vv v19, v19, v3 237 vxor.vv v20, v20, v4 238 vxor.vv v21, v21, v5 239 vxor.vv v22, v22, v6 240 vxor.vv v23, v23, v7 241 242 // Store the first half of the output 243 vssseg8e32.v v16, (OUTP), STRIDE 244 245 // Finalize the second half of the key 246 vadd.vx v8, v8, KEY4 247 vadd.vx v9, v9, KEY5 248 vadd.vx v10, v10, KEY6 249 vadd.vx v11, v11, KEY7 250 vid.v v0 251 vadd.vx v12, v12, COUNTER 252 vadd.vx v13, v13, NONCE0 253 vadd.vx v14, v14, NONCE1 254 vadd.vx v15, v15, NONCE2 255 vadd.vv v12, v12, v0 256 257 // Encrypt/decrypt the second half of 258 vxor.vv v24, v24, v8 259 vxor.vv v25, v25, v9 260 vxor.vv v26, v26, v10 261 vxor.vv v27, v27, v11 262 vxor.vv v29, v29, v13 263 vxor.vv v28, v28, v12 264 vxor.vv v30, v30, v14 265 vxor.vv v31, v31, v15 266 267 // Store the second half of the output 268 addi TMP, OUTP, 32 269 vssseg8e32.v v24, (TMP), STRIDE 270 271 // Update the counter, the remaining n 272 // output pointers according to the nu 273 add COUNTER, COUNTER, VL 274 sub LEN, LEN, VL 275 slli TMP, VL, 6 276 add OUTP, OUTP, TMP 277 add INP, INP, TMP 278 bnez LEN, .Lblock_loop 279 280 ld s0, 0(sp) 281 ld s1, 8(sp) 282 ld s2, 16(sp) 283 ld s3, 24(sp) 284 ld s4, 32(sp) 285 ld s5, 40(sp) 286 ld s6, 48(sp) 287 ld s7, 56(sp) 288 ld s8, 64(sp) 289 ld s9, 72(sp) 290 ld s10, 80(sp) 291 ld s11, 88(sp) 292 addi sp, sp, 96 293 ret 294 SYM_FUNC_END(chacha20_zvkb)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.