1 /* SPDX-License-Identifier: Apache-2.0 OR BSD- 2 // 3 // This file is dual-licensed, meaning that yo 4 // choice of either of the following two licen 5 // 6 // Copyright 2023 The OpenSSL Project Authors. 7 // 8 // Licensed under the Apache License 2.0 (the 9 // a copy in the file LICENSE in the source di 10 // https://www.openssl.org/source/license.html 11 // 12 // or 13 // 14 // Copyright (c) 2023, Jerry Shih <jerry.shih@s 15 // Copyright 2024 Google LLC 16 // All rights reserved. 17 // 18 // Redistribution and use in source and binary 19 // modification, are permitted provided that t 20 // are met: 21 // 1. Redistributions of source code must reta 22 // notice, this list of conditions and the 23 // 2. Redistributions in binary form must repr 24 // notice, this list of conditions and the 25 // documentation and/or other materials pro 26 // 27 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT 28 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTI 29 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCH 30 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 31 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIR 32 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGE 33 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 // THEORY OF LIABILITY, WHETHER IN CONTRACT, S 36 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 37 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE PO 38 39 // The generated code of this file depends on 40 // - RV64I 41 // - RISC-V Vector ('V') with VLEN >= 128 && V 42 // - RISC-V Vector AES block cipher extension 43 // - RISC-V Vector Bit-manipulation extension 44 // - RISC-V Vector GCM/GMAC extension ('Zvkg') 45 46 #include <linux/linkage.h> 47 48 .text 49 .option arch, +zvkned, +zvbb, +zvkg 50 51 #include "aes-macros.S" 52 53 #define KEYP a0 54 #define INP a1 55 #define OUTP a2 56 #define LEN a3 57 #define TWEAKP a4 58 59 #define LEN32 a5 60 #define TAIL_LEN a6 61 #define VL a7 62 #define VLMAX t4 63 64 // v1-v15 contain the AES round keys, but they 65 // the AES round keys have been loaded. 66 #define TWEAKS v16 // LMUL=4 (mos 67 #define TWEAKS_BREV v20 // LMUL=4 (mos 68 #define MULTS_BREV v24 // LMUL=4 (mos 69 #define TMP0 v28 70 #define TMP1 v29 71 #define TMP2 v30 72 #define TMP3 v31 73 74 // xts_init initializes the following values: 75 // 76 // TWEAKS: N 128-bit tweaks T*(x^i) for i 77 // TWEAKS_BREV: same as TWEAKS, but bit-r 78 // MULTS_BREV: N 128-bit values x^N, bit- 79 // 80 // N is the maximum number of blocks that will 81 // computed using vsetvli. 82 // 83 // The field convention used by XTS is the sam 84 // bits reversed within each byte. The zvkg e 85 // instruction which does multiplication in th 86 // computation we use vgmul to do multiplicati 87 // serially multiplying by x using shifting+xo 88 // the inputs and outputs to vgmul must be bit 89 .macro xts_init 90 91 // Load the first tweak T. 92 vsetivli zero, 4, e32, m1, ta, 93 vle32.v TWEAKS, (TWEAKP) 94 95 // If there's only one block (or no bl 96 // sequence computation because (at mo 97 li t0, 16 98 ble LEN, t0, .Linit_single 99 100 // Save a copy of T bit-reversed in v1 101 vbrev8.v v12, TWEAKS 102 103 // 104 // Generate x^i for i in 0..(N - 1), i 105 // that N <= 128. Though, this code a 106 // equivalently VLEN < 2048) due to th 107 // values here and in the x^N computat 108 // 109 vsetvli VL, LEN32, e32, m4, ta 110 srli t0, VL, 2 // t0 111 // Generate two sequences, each with N 112 // v0=[1, 1, 1, ...] and v1=[0, 1, 2, 113 vsetvli zero, t0, e32, m1, ta, 114 vmv.v.i v0, 1 115 vid.v v1 116 // Use vzext to zero-extend the sequen 117 // as two sequences, each with 2*N 32- 118 // v2=[1, 0, 1, 0, 1, 0, ...] and v4=[ 119 vsetvli zero, t0, e64, m2, ta, 120 vzext.vf2 v2, v0 121 vzext.vf2 v4, v1 122 slli t1, t0, 1 // t1 123 vsetvli zero, t1, e32, m2, ta, 124 // Use vwsll to compute [1<<0, 0<<0, 1 125 // widening to 64 bits per element. W 126 // values, this is the needed sequence 127 vwsll.vv v8, v2, v4 128 129 // Copy the bit-reversed T to all N el 130 // multiply by x^i. This gives the se 131 vsetvli zero, LEN32, e32, m4, 132 vmv.v.i TWEAKS_BREV, 0 133 vaesz.vs TWEAKS_BREV, v12 134 vbrev8.v v8, v8 135 vgmul.vv TWEAKS_BREV, v8 136 137 // Save a copy of the sequence T*(x^i) 138 vbrev8.v TWEAKS, TWEAKS_BREV 139 140 // Generate N copies of x^N, i.e. 128- 141 li t1, 1 142 sll t1, t1, t0 // t1 143 vsetivli zero, 2, e64, m1, ta, 144 vmv.v.i v0, 0 145 vsetivli zero, 1, e64, m1, tu, 146 vmv.v.x v0, t1 147 vbrev8.v v0, v0 148 vsetvli zero, LEN32, e32, m4, 149 vmv.v.i MULTS_BREV, 0 150 vaesz.vs MULTS_BREV, v0 151 152 j .Linit_done\@ 153 154 .Linit_single_block\@: 155 vbrev8.v TWEAKS_BREV, TWEAKS 156 .Linit_done\@: 157 .endm 158 159 // Set the first 128 bits of MULTS_BREV to 0x4 160 // the multiplier required to advance the twea 161 .macro load_x 162 li t0, 0x40 163 vsetivli zero, 4, e32, m1, ta, 164 vmv.v.i MULTS_BREV, 0 165 vsetivli zero, 1, e8, m1, tu, m 166 vmv.v.x MULTS_BREV, t0 167 .endm 168 169 .macro __aes_xts_crypt enc, keylen 170 // With 16 < len <= 31, there's no mai 171 beqz LEN32, .Lcts_without_m 172 173 vsetvli VLMAX, zero, e32, m4, 174 1: 175 vsetvli VL, LEN32, e32, m4, ta 176 2: 177 // Encrypt or decrypt VL/4 blocks. 178 vle32.v TMP0, (INP) 179 vxor.vv TMP0, TMP0, TWEAKS 180 aes_crypt TMP0, \enc, \keylen 181 vxor.vv TMP0, TMP0, TWEAKS 182 vse32.v TMP0, (OUTP) 183 184 // Update the pointers and the remaini 185 slli t0, VL, 2 186 add INP, INP, t0 187 add OUTP, OUTP, t0 188 sub LEN32, LEN32, VL 189 190 // Check whether more blocks remain. 191 beqz LEN32, .Lmain_loop_don 192 193 // Compute the next sequence of tweaks 194 // sequence by x^N. Store the result 195 // regular order (i.e. with the bit re 196 vgmul.vv TWEAKS_BREV, MULTS_BRE 197 vbrev8.v TWEAKS, TWEAKS_BREV 198 199 // Since we compute the tweak multipli 200 // that each iteration process the sam 201 // This conflicts slightly with the be 202 // Extension, where CPUs can select a 203 // two iterations. E.g., vl might tak 204 // [16, 16, 16, 12, 12], whereas we ne 205 // can use x^4 again instead of comput 206 // keep the vl at VLMAX if there is at 207 bge LEN32, VLMAX, 2b 208 j 1b 209 210 .Lmain_loop_done\@: 211 load_x 212 213 // Compute the next tweak. 214 addi t0, VL, -4 215 vsetivli zero, 4, e32, m4, ta, 216 vslidedown.vx TWEAKS_BREV, TWEAKS_BR 217 vsetivli zero, 4, e32, m1, ta, 218 vgmul.vv TWEAKS_BREV, MULTS_BRE 219 220 bnez TAIL_LEN, .Lcts\@ 221 222 // Update *TWEAKP to contain the next 223 vbrev8.v TWEAKS, TWEAKS_BREV 224 vse32.v TWEAKS, (TWEAKP) 225 ret 226 227 .Lcts_without_main_loop\@: 228 load_x 229 .Lcts\@: 230 // TWEAKS_BREV now contains the next t 231 vsetivli zero, 4, e32, m1, ta, 232 vmv.v.v TMP0, TWEAKS_BREV 233 vgmul.vv TMP0, MULTS_BREV 234 // Undo the bit reversal of the next t 235 // and TMP2, such that TMP1 is the fir 236 .if \enc 237 vbrev8.v TMP1, TWEAKS_BREV 238 vbrev8.v TMP2, TMP0 239 .else 240 vbrev8.v TMP1, TMP0 241 vbrev8.v TMP2, TWEAKS_BREV 242 .endif 243 244 // Encrypt/decrypt the last full block 245 vle32.v TMP0, (INP) 246 vxor.vv TMP0, TMP0, TMP1 247 aes_crypt TMP0, \enc, \keylen 248 vxor.vv TMP0, TMP0, TMP1 249 250 // Swap the first TAIL_LEN bytes of th 251 // Note that to support in-place encry 252 // the input tail must happen before t 253 addi t0, INP, 16 254 addi t1, OUTP, 16 255 vmv.v.v TMP3, TMP0 256 vsetvli zero, TAIL_LEN, e8, m1 257 vle8.v TMP0, (t0) 258 vse8.v TMP3, (t1) 259 260 // Encrypt/decrypt again and store the 261 vsetivli zero, 4, e32, m1, ta, 262 vxor.vv TMP0, TMP0, TMP2 263 aes_crypt TMP0, \enc, \keylen 264 vxor.vv TMP0, TMP0, TMP2 265 vse32.v TMP0, (OUTP) 266 267 ret 268 .endm 269 270 .macro aes_xts_crypt enc 271 272 // Check whether the length is a multi 273 andi TAIL_LEN, LEN, 15 274 beqz TAIL_LEN, 1f 275 276 // The length isn't a multiple of the 277 // stealing will be required. Ciphert 278 // handling of the partial block and t 279 // the length of both from the length 280 sub LEN, LEN, TAIL_LEN 281 addi LEN, LEN, -16 282 1: 283 srli LEN32, LEN, 2 284 // LEN and LEN32 now contain the total 285 // processed in the main loop, in byte 286 287 xts_init 288 aes_begin KEYP, 128f, 192f 289 __aes_xts_crypt \enc, 256 290 128: 291 __aes_xts_crypt \enc, 128 292 192: 293 __aes_xts_crypt \enc, 192 294 .endm 295 296 // void aes_xts_encrypt_zvkned_zvbb_zvkg(const 297 // const 298 // u8 tw 299 // 300 // |key| is the data key. |tweak| contains th 301 // the original IV with the tweak key was alre 302 // incremental computation, but |len| must alw 303 // |len| must be a multiple of 16 except on th 304 // multiple of 16, then this function updates 305 SYM_FUNC_START(aes_xts_encrypt_zvkned_zvbb_zvk 306 aes_xts_crypt 1 307 SYM_FUNC_END(aes_xts_encrypt_zvkned_zvbb_zvkg) 308 309 // Same prototype and calling convention as th 310 SYM_FUNC_START(aes_xts_decrypt_zvkned_zvbb_zvk 311 aes_xts_crypt 0 312 SYM_FUNC_END(aes_xts_decrypt_zvkned_zvbb_zvkg)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.