~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/riscv/crypto/aes-riscv64-zvkned-zvbb-zvkg.S

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
  2 //
  3 // This file is dual-licensed, meaning that you can use it under your
  4 // choice of either of the following two licenses:
  5 //
  6 // Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
  7 //
  8 // Licensed under the Apache License 2.0 (the "License"). You can obtain
  9 // a copy in the file LICENSE in the source distribution or at
 10 // https://www.openssl.org/source/license.html
 11 //
 12 // or
 13 //
 14 // Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
 15 // Copyright 2024 Google LLC
 16 // All rights reserved.
 17 //
 18 // Redistribution and use in source and binary forms, with or without
 19 // modification, are permitted provided that the following conditions
 20 // are met:
 21 // 1. Redistributions of source code must retain the above copyright
 22 //    notice, this list of conditions and the following disclaimer.
 23 // 2. Redistributions in binary form must reproduce the above copyright
 24 //    notice, this list of conditions and the following disclaimer in the
 25 //    documentation and/or other materials provided with the distribution.
 26 //
 27 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 28 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 29 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 30 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 31 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 32 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 33 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 34 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 35 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 36 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 37 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 38 
 39 // The generated code of this file depends on the following RISC-V extensions:
 40 // - RV64I
 41 // - RISC-V Vector ('V') with VLEN >= 128 && VLEN < 2048
 42 // - RISC-V Vector AES block cipher extension ('Zvkned')
 43 // - RISC-V Vector Bit-manipulation extension ('Zvbb')
 44 // - RISC-V Vector GCM/GMAC extension ('Zvkg')
 45 
 46 #include <linux/linkage.h>
 47 
 48 .text
 49 .option arch, +zvkned, +zvbb, +zvkg
 50 
 51 #include "aes-macros.S"
 52 
 53 #define KEYP            a0
 54 #define INP             a1
 55 #define OUTP            a2
 56 #define LEN             a3
 57 #define TWEAKP          a4
 58 
 59 #define LEN32           a5
 60 #define TAIL_LEN        a6
 61 #define VL              a7
 62 #define VLMAX           t4
 63 
 64 // v1-v15 contain the AES round keys, but they are used for temporaries before
 65 // the AES round keys have been loaded.
 66 #define TWEAKS          v16     // LMUL=4 (most of the time)
 67 #define TWEAKS_BREV     v20     // LMUL=4 (most of the time)
 68 #define MULTS_BREV      v24     // LMUL=4 (most of the time)
 69 #define TMP0            v28
 70 #define TMP1            v29
 71 #define TMP2            v30
 72 #define TMP3            v31
 73 
 74 // xts_init initializes the following values:
 75 //
 76 //      TWEAKS: N 128-bit tweaks T*(x^i) for i in 0..(N - 1)
 77 //      TWEAKS_BREV: same as TWEAKS, but bit-reversed
 78 //      MULTS_BREV: N 128-bit values x^N, bit-reversed.  Only if N > 1.
 79 //
 80 // N is the maximum number of blocks that will be processed per loop iteration,
 81 // computed using vsetvli.
 82 //
 83 // The field convention used by XTS is the same as that of GHASH, but with the
 84 // bits reversed within each byte.  The zvkg extension provides the vgmul
 85 // instruction which does multiplication in this field.  Therefore, for tweak
 86 // computation we use vgmul to do multiplications in parallel, instead of
 87 // serially multiplying by x using shifting+xoring.  Note that for this to work,
 88 // the inputs and outputs to vgmul must be bit-reversed (we do it with vbrev8).
 89 .macro  xts_init
 90 
 91         // Load the first tweak T.
 92         vsetivli        zero, 4, e32, m1, ta, ma
 93         vle32.v         TWEAKS, (TWEAKP)
 94 
 95         // If there's only one block (or no blocks at all), then skip the tweak
 96         // sequence computation because (at most) T itself is needed.
 97         li              t0, 16
 98         ble             LEN, t0, .Linit_single_block\@
 99 
100         // Save a copy of T bit-reversed in v12.
101         vbrev8.v        v12, TWEAKS
102 
103         //
104         // Generate x^i for i in 0..(N - 1), i.e. 128-bit values 1 << i assuming
105         // that N <= 128.  Though, this code actually requires N < 64 (or
106         // equivalently VLEN < 2048) due to the use of 64-bit intermediate
107         // values here and in the x^N computation later.
108         //
109         vsetvli         VL, LEN32, e32, m4, ta, ma
110         srli            t0, VL, 2       // t0 = N (num blocks)
111         // Generate two sequences, each with N 32-bit values:
112         // v0=[1, 1, 1, ...] and v1=[0, 1, 2, ...].
113         vsetvli         zero, t0, e32, m1, ta, ma
114         vmv.v.i         v0, 1
115         vid.v           v1
116         // Use vzext to zero-extend the sequences to 64 bits.  Reinterpret them
117         // as two sequences, each with 2*N 32-bit values:
118         // v2=[1, 0, 1, 0, 1, 0, ...] and v4=[0, 0, 1, 0, 2, 0, ...].
119         vsetvli         zero, t0, e64, m2, ta, ma
120         vzext.vf2       v2, v0
121         vzext.vf2       v4, v1
122         slli            t1, t0, 1       // t1 = 2*N
123         vsetvli         zero, t1, e32, m2, ta, ma
124         // Use vwsll to compute [1<<0, 0<<0, 1<<1, 0<<0, 1<<2, 0<<0, ...],
125         // widening to 64 bits per element.  When reinterpreted as N 128-bit
126         // values, this is the needed sequence of 128-bit values 1 << i (x^i).
127         vwsll.vv        v8, v2, v4
128 
129         // Copy the bit-reversed T to all N elements of TWEAKS_BREV, then
130         // multiply by x^i.  This gives the sequence T*(x^i), bit-reversed.
131         vsetvli         zero, LEN32, e32, m4, ta, ma
132         vmv.v.i         TWEAKS_BREV, 0
133         vaesz.vs        TWEAKS_BREV, v12
134         vbrev8.v        v8, v8
135         vgmul.vv        TWEAKS_BREV, v8
136 
137         // Save a copy of the sequence T*(x^i) with the bit reversal undone.
138         vbrev8.v        TWEAKS, TWEAKS_BREV
139 
140         // Generate N copies of x^N, i.e. 128-bit values 1 << N, bit-reversed.
141         li              t1, 1
142         sll             t1, t1, t0      // t1 = 1 << N
143         vsetivli        zero, 2, e64, m1, ta, ma
144         vmv.v.i         v0, 0
145         vsetivli        zero, 1, e64, m1, tu, ma
146         vmv.v.x         v0, t1
147         vbrev8.v        v0, v0
148         vsetvli         zero, LEN32, e32, m4, ta, ma
149         vmv.v.i         MULTS_BREV, 0
150         vaesz.vs        MULTS_BREV, v0
151 
152         j               .Linit_done\@
153 
154 .Linit_single_block\@:
155         vbrev8.v        TWEAKS_BREV, TWEAKS
156 .Linit_done\@:
157 .endm
158 
159 // Set the first 128 bits of MULTS_BREV to 0x40, i.e. 'x' bit-reversed.  This is
160 // the multiplier required to advance the tweak by one.
161 .macro  load_x
162         li              t0, 0x40
163         vsetivli        zero, 4, e32, m1, ta, ma
164         vmv.v.i         MULTS_BREV, 0
165         vsetivli        zero, 1, e8, m1, tu, ma
166         vmv.v.x         MULTS_BREV, t0
167 .endm
168 
169 .macro  __aes_xts_crypt enc, keylen
170         // With 16 < len <= 31, there's no main loop, just ciphertext stealing.
171         beqz            LEN32, .Lcts_without_main_loop\@
172 
173         vsetvli         VLMAX, zero, e32, m4, ta, ma
174 1:
175         vsetvli         VL, LEN32, e32, m4, ta, ma
176 2:
177         // Encrypt or decrypt VL/4 blocks.
178         vle32.v         TMP0, (INP)
179         vxor.vv         TMP0, TMP0, TWEAKS
180         aes_crypt       TMP0, \enc, \keylen
181         vxor.vv         TMP0, TMP0, TWEAKS
182         vse32.v         TMP0, (OUTP)
183 
184         // Update the pointers and the remaining length.
185         slli            t0, VL, 2
186         add             INP, INP, t0
187         add             OUTP, OUTP, t0
188         sub             LEN32, LEN32, VL
189 
190         // Check whether more blocks remain.
191         beqz            LEN32, .Lmain_loop_done\@
192 
193         // Compute the next sequence of tweaks by multiplying the previous
194         // sequence by x^N.  Store the result in both bit-reversed order and
195         // regular order (i.e. with the bit reversal undone).
196         vgmul.vv        TWEAKS_BREV, MULTS_BREV
197         vbrev8.v        TWEAKS, TWEAKS_BREV
198 
199         // Since we compute the tweak multipliers x^N in advance, we require
200         // that each iteration process the same length except possibly the last.
201         // This conflicts slightly with the behavior allowed by RISC-V Vector
202         // Extension, where CPUs can select a lower length for both of the last
203         // two iterations.  E.g., vl might take the sequence of values
204         // [16, 16, 16, 12, 12], whereas we need [16, 16, 16, 16, 8] so that we
205         // can use x^4 again instead of computing x^3.  Therefore, we explicitly
206         // keep the vl at VLMAX if there is at least VLMAX remaining.
207         bge             LEN32, VLMAX, 2b
208         j               1b
209 
210 .Lmain_loop_done\@:
211         load_x
212 
213         // Compute the next tweak.
214         addi            t0, VL, -4
215         vsetivli        zero, 4, e32, m4, ta, ma
216         vslidedown.vx   TWEAKS_BREV, TWEAKS_BREV, t0    // Extract last tweak
217         vsetivli        zero, 4, e32, m1, ta, ma
218         vgmul.vv        TWEAKS_BREV, MULTS_BREV         // Advance to next tweak
219 
220         bnez            TAIL_LEN, .Lcts\@
221 
222         // Update *TWEAKP to contain the next tweak.
223         vbrev8.v        TWEAKS, TWEAKS_BREV
224         vse32.v         TWEAKS, (TWEAKP)
225         ret
226 
227 .Lcts_without_main_loop\@:
228         load_x
229 .Lcts\@:
230         // TWEAKS_BREV now contains the next tweak.  Compute the one after that.
231         vsetivli        zero, 4, e32, m1, ta, ma
232         vmv.v.v         TMP0, TWEAKS_BREV
233         vgmul.vv        TMP0, MULTS_BREV
234         // Undo the bit reversal of the next two tweaks and store them in TMP1
235         // and TMP2, such that TMP1 is the first needed and TMP2 the second.
236 .if \enc
237         vbrev8.v        TMP1, TWEAKS_BREV
238         vbrev8.v        TMP2, TMP0
239 .else
240         vbrev8.v        TMP1, TMP0
241         vbrev8.v        TMP2, TWEAKS_BREV
242 .endif
243 
244         // Encrypt/decrypt the last full block.
245         vle32.v         TMP0, (INP)
246         vxor.vv         TMP0, TMP0, TMP1
247         aes_crypt       TMP0, \enc, \keylen
248         vxor.vv         TMP0, TMP0, TMP1
249 
250         // Swap the first TAIL_LEN bytes of the above result with the tail.
251         // Note that to support in-place encryption/decryption, the load from
252         // the input tail must happen before the store to the output tail.
253         addi            t0, INP, 16
254         addi            t1, OUTP, 16
255         vmv.v.v         TMP3, TMP0
256         vsetvli         zero, TAIL_LEN, e8, m1, tu, ma
257         vle8.v          TMP0, (t0)
258         vse8.v          TMP3, (t1)
259 
260         // Encrypt/decrypt again and store the last full block.
261         vsetivli        zero, 4, e32, m1, ta, ma
262         vxor.vv         TMP0, TMP0, TMP2
263         aes_crypt       TMP0, \enc, \keylen
264         vxor.vv         TMP0, TMP0, TMP2
265         vse32.v         TMP0, (OUTP)
266 
267         ret
268 .endm
269 
270 .macro  aes_xts_crypt   enc
271 
272         // Check whether the length is a multiple of the AES block size.
273         andi            TAIL_LEN, LEN, 15
274         beqz            TAIL_LEN, 1f
275 
276         // The length isn't a multiple of the AES block size, so ciphertext
277         // stealing will be required.  Ciphertext stealing involves special
278         // handling of the partial block and the last full block, so subtract
279         // the length of both from the length to be processed in the main loop.
280         sub             LEN, LEN, TAIL_LEN
281         addi            LEN, LEN, -16
282 1:
283         srli            LEN32, LEN, 2
284         // LEN and LEN32 now contain the total length of the blocks that will be
285         // processed in the main loop, in bytes and 32-bit words respectively.
286 
287         xts_init
288         aes_begin       KEYP, 128f, 192f
289         __aes_xts_crypt \enc, 256
290 128:
291         __aes_xts_crypt \enc, 128
292 192:
293         __aes_xts_crypt \enc, 192
294 .endm
295 
296 // void aes_xts_encrypt_zvkned_zvbb_zvkg(const struct crypto_aes_ctx *key,
297 //                                       const u8 *in, u8 *out, size_t len,
298 //                                       u8 tweak[16]);
299 //
300 // |key| is the data key.  |tweak| contains the next tweak; the encryption of
301 // the original IV with the tweak key was already done.  This function supports
302 // incremental computation, but |len| must always be >= 16 (AES_BLOCK_SIZE), and
303 // |len| must be a multiple of 16 except on the last call.  If |len| is a
304 // multiple of 16, then this function updates |tweak| to contain the next tweak.
305 SYM_FUNC_START(aes_xts_encrypt_zvkned_zvbb_zvkg)
306         aes_xts_crypt   1
307 SYM_FUNC_END(aes_xts_encrypt_zvkned_zvbb_zvkg)
308 
309 // Same prototype and calling convention as the encryption function
310 SYM_FUNC_START(aes_xts_decrypt_zvkned_zvbb_zvkg)
311         aes_xts_crypt   0
312 SYM_FUNC_END(aes_xts_decrypt_zvkned_zvbb_zvkg)

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php