~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/riscv/crypto/aes-riscv64-zvkned-zvbb-zvkg.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/riscv/crypto/aes-riscv64-zvkned-zvbb-zvkg.S (Version linux-6.12-rc7) and /arch/sparc64/crypto/aes-riscv64-zvkned-zvbb-zvkg.S (Version linux-5.10.229)


  1 /* SPDX-License-Identifier: Apache-2.0 OR BSD-    
  2 //                                                
  3 // This file is dual-licensed, meaning that yo    
  4 // choice of either of the following two licen    
  5 //                                                
  6 // Copyright 2023 The OpenSSL Project Authors.    
  7 //                                                
  8 // Licensed under the Apache License 2.0 (the     
  9 // a copy in the file LICENSE in the source di    
 10 // https://www.openssl.org/source/license.html    
 11 //                                                
 12 // or                                             
 13 //                                                
 14 // Copyright (c) 2023, Jerry Shih <jerry.shih@s    
 15 // Copyright 2024 Google LLC                      
 16 // All rights reserved.                           
 17 //                                                
 18 // Redistribution and use in source and binary    
 19 // modification, are permitted provided that t    
 20 // are met:                                       
 21 // 1. Redistributions of source code must reta    
 22 //    notice, this list of conditions and the     
 23 // 2. Redistributions in binary form must repr    
 24 //    notice, this list of conditions and the     
 25 //    documentation and/or other materials pro    
 26 //                                                
 27 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT     
 28 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTI    
 29 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCH    
 30 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO     
 31 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIR    
 32 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGE    
 33 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS    
 34 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION)    
 35 // THEORY OF LIABILITY, WHETHER IN CONTRACT, S    
 36 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING    
 37 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE PO    
 38                                                   
 39 // The generated code of this file depends on     
 40 // - RV64I                                        
 41 // - RISC-V Vector ('V') with VLEN >= 128 && V    
 42 // - RISC-V Vector AES block cipher extension     
 43 // - RISC-V Vector Bit-manipulation extension     
 44 // - RISC-V Vector GCM/GMAC extension ('Zvkg')    
 45                                                   
 46 #include <linux/linkage.h>                        
 47                                                   
 48 .text                                             
 49 .option arch, +zvkned, +zvbb, +zvkg               
 50                                                   
 51 #include "aes-macros.S"                           
 52                                                   
 53 #define KEYP            a0                        
 54 #define INP             a1                        
 55 #define OUTP            a2                        
 56 #define LEN             a3                        
 57 #define TWEAKP          a4                        
 58                                                   
 59 #define LEN32           a5                        
 60 #define TAIL_LEN        a6                        
 61 #define VL              a7                        
 62 #define VLMAX           t4                        
 63                                                   
 64 // v1-v15 contain the AES round keys, but they    
 65 // the AES round keys have been loaded.           
 66 #define TWEAKS          v16     // LMUL=4 (mos    
 67 #define TWEAKS_BREV     v20     // LMUL=4 (mos    
 68 #define MULTS_BREV      v24     // LMUL=4 (mos    
 69 #define TMP0            v28                       
 70 #define TMP1            v29                       
 71 #define TMP2            v30                       
 72 #define TMP3            v31                       
 73                                                   
 74 // xts_init initializes the following values:     
 75 //                                                
 76 //      TWEAKS: N 128-bit tweaks T*(x^i) for i    
 77 //      TWEAKS_BREV: same as TWEAKS, but bit-r    
 78 //      MULTS_BREV: N 128-bit values x^N, bit-    
 79 //                                                
 80 // N is the maximum number of blocks that will    
 81 // computed using vsetvli.                        
 82 //                                                
 83 // The field convention used by XTS is the sam    
 84 // bits reversed within each byte.  The zvkg e    
 85 // instruction which does multiplication in th    
 86 // computation we use vgmul to do multiplicati    
 87 // serially multiplying by x using shifting+xo    
 88 // the inputs and outputs to vgmul must be bit    
 89 .macro  xts_init                                  
 90                                                   
 91         // Load the first tweak T.                
 92         vsetivli        zero, 4, e32, m1, ta,     
 93         vle32.v         TWEAKS, (TWEAKP)          
 94                                                   
 95         // If there's only one block (or no bl    
 96         // sequence computation because (at mo    
 97         li              t0, 16                    
 98         ble             LEN, t0, .Linit_single    
 99                                                   
100         // Save a copy of T bit-reversed in v1    
101         vbrev8.v        v12, TWEAKS               
102                                                   
103         //                                        
104         // Generate x^i for i in 0..(N - 1), i    
105         // that N <= 128.  Though, this code a    
106         // equivalently VLEN < 2048) due to th    
107         // values here and in the x^N computat    
108         //                                        
109         vsetvli         VL, LEN32, e32, m4, ta    
110         srli            t0, VL, 2       // t0     
111         // Generate two sequences, each with N    
112         // v0=[1, 1, 1, ...] and v1=[0, 1, 2,     
113         vsetvli         zero, t0, e32, m1, ta,    
114         vmv.v.i         v0, 1                     
115         vid.v           v1                        
116         // Use vzext to zero-extend the sequen    
117         // as two sequences, each with 2*N 32-    
118         // v2=[1, 0, 1, 0, 1, 0, ...] and v4=[    
119         vsetvli         zero, t0, e64, m2, ta,    
120         vzext.vf2       v2, v0                    
121         vzext.vf2       v4, v1                    
122         slli            t1, t0, 1       // t1     
123         vsetvli         zero, t1, e32, m2, ta,    
124         // Use vwsll to compute [1<<0, 0<<0, 1    
125         // widening to 64 bits per element.  W    
126         // values, this is the needed sequence    
127         vwsll.vv        v8, v2, v4                
128                                                   
129         // Copy the bit-reversed T to all N el    
130         // multiply by x^i.  This gives the se    
131         vsetvli         zero, LEN32, e32, m4,     
132         vmv.v.i         TWEAKS_BREV, 0            
133         vaesz.vs        TWEAKS_BREV, v12          
134         vbrev8.v        v8, v8                    
135         vgmul.vv        TWEAKS_BREV, v8           
136                                                   
137         // Save a copy of the sequence T*(x^i)    
138         vbrev8.v        TWEAKS, TWEAKS_BREV       
139                                                   
140         // Generate N copies of x^N, i.e. 128-    
141         li              t1, 1                     
142         sll             t1, t1, t0      // t1     
143         vsetivli        zero, 2, e64, m1, ta,     
144         vmv.v.i         v0, 0                     
145         vsetivli        zero, 1, e64, m1, tu,     
146         vmv.v.x         v0, t1                    
147         vbrev8.v        v0, v0                    
148         vsetvli         zero, LEN32, e32, m4,     
149         vmv.v.i         MULTS_BREV, 0             
150         vaesz.vs        MULTS_BREV, v0            
151                                                   
152         j               .Linit_done\@             
153                                                   
154 .Linit_single_block\@:                            
155         vbrev8.v        TWEAKS_BREV, TWEAKS       
156 .Linit_done\@:                                    
157 .endm                                             
158                                                   
159 // Set the first 128 bits of MULTS_BREV to 0x4    
160 // the multiplier required to advance the twea    
161 .macro  load_x                                    
162         li              t0, 0x40                  
163         vsetivli        zero, 4, e32, m1, ta,     
164         vmv.v.i         MULTS_BREV, 0             
165         vsetivli        zero, 1, e8, m1, tu, m    
166         vmv.v.x         MULTS_BREV, t0            
167 .endm                                             
168                                                   
169 .macro  __aes_xts_crypt enc, keylen               
170         // With 16 < len <= 31, there's no mai    
171         beqz            LEN32, .Lcts_without_m    
172                                                   
173         vsetvli         VLMAX, zero, e32, m4,     
174 1:                                                
175         vsetvli         VL, LEN32, e32, m4, ta    
176 2:                                                
177         // Encrypt or decrypt VL/4 blocks.        
178         vle32.v         TMP0, (INP)               
179         vxor.vv         TMP0, TMP0, TWEAKS        
180         aes_crypt       TMP0, \enc, \keylen       
181         vxor.vv         TMP0, TMP0, TWEAKS        
182         vse32.v         TMP0, (OUTP)              
183                                                   
184         // Update the pointers and the remaini    
185         slli            t0, VL, 2                 
186         add             INP, INP, t0              
187         add             OUTP, OUTP, t0            
188         sub             LEN32, LEN32, VL          
189                                                   
190         // Check whether more blocks remain.      
191         beqz            LEN32, .Lmain_loop_don    
192                                                   
193         // Compute the next sequence of tweaks    
194         // sequence by x^N.  Store the result     
195         // regular order (i.e. with the bit re    
196         vgmul.vv        TWEAKS_BREV, MULTS_BRE    
197         vbrev8.v        TWEAKS, TWEAKS_BREV       
198                                                   
199         // Since we compute the tweak multipli    
200         // that each iteration process the sam    
201         // This conflicts slightly with the be    
202         // Extension, where CPUs can select a     
203         // two iterations.  E.g., vl might tak    
204         // [16, 16, 16, 12, 12], whereas we ne    
205         // can use x^4 again instead of comput    
206         // keep the vl at VLMAX if there is at    
207         bge             LEN32, VLMAX, 2b          
208         j               1b                        
209                                                   
210 .Lmain_loop_done\@:                               
211         load_x                                    
212                                                   
213         // Compute the next tweak.                
214         addi            t0, VL, -4                
215         vsetivli        zero, 4, e32, m4, ta,     
216         vslidedown.vx   TWEAKS_BREV, TWEAKS_BR    
217         vsetivli        zero, 4, e32, m1, ta,     
218         vgmul.vv        TWEAKS_BREV, MULTS_BRE    
219                                                   
220         bnez            TAIL_LEN, .Lcts\@         
221                                                   
222         // Update *TWEAKP to contain the next     
223         vbrev8.v        TWEAKS, TWEAKS_BREV       
224         vse32.v         TWEAKS, (TWEAKP)          
225         ret                                       
226                                                   
227 .Lcts_without_main_loop\@:                        
228         load_x                                    
229 .Lcts\@:                                          
230         // TWEAKS_BREV now contains the next t    
231         vsetivli        zero, 4, e32, m1, ta,     
232         vmv.v.v         TMP0, TWEAKS_BREV         
233         vgmul.vv        TMP0, MULTS_BREV          
234         // Undo the bit reversal of the next t    
235         // and TMP2, such that TMP1 is the fir    
236 .if \enc                                          
237         vbrev8.v        TMP1, TWEAKS_BREV         
238         vbrev8.v        TMP2, TMP0                
239 .else                                             
240         vbrev8.v        TMP1, TMP0                
241         vbrev8.v        TMP2, TWEAKS_BREV         
242 .endif                                            
243                                                   
244         // Encrypt/decrypt the last full block    
245         vle32.v         TMP0, (INP)               
246         vxor.vv         TMP0, TMP0, TMP1          
247         aes_crypt       TMP0, \enc, \keylen       
248         vxor.vv         TMP0, TMP0, TMP1          
249                                                   
250         // Swap the first TAIL_LEN bytes of th    
251         // Note that to support in-place encry    
252         // the input tail must happen before t    
253         addi            t0, INP, 16               
254         addi            t1, OUTP, 16              
255         vmv.v.v         TMP3, TMP0                
256         vsetvli         zero, TAIL_LEN, e8, m1    
257         vle8.v          TMP0, (t0)                
258         vse8.v          TMP3, (t1)                
259                                                   
260         // Encrypt/decrypt again and store the    
261         vsetivli        zero, 4, e32, m1, ta,     
262         vxor.vv         TMP0, TMP0, TMP2          
263         aes_crypt       TMP0, \enc, \keylen       
264         vxor.vv         TMP0, TMP0, TMP2          
265         vse32.v         TMP0, (OUTP)              
266                                                   
267         ret                                       
268 .endm                                             
269                                                   
270 .macro  aes_xts_crypt   enc                       
271                                                   
272         // Check whether the length is a multi    
273         andi            TAIL_LEN, LEN, 15         
274         beqz            TAIL_LEN, 1f              
275                                                   
276         // The length isn't a multiple of the     
277         // stealing will be required.  Ciphert    
278         // handling of the partial block and t    
279         // the length of both from the length     
280         sub             LEN, LEN, TAIL_LEN        
281         addi            LEN, LEN, -16             
282 1:                                                
283         srli            LEN32, LEN, 2             
284         // LEN and LEN32 now contain the total    
285         // processed in the main loop, in byte    
286                                                   
287         xts_init                                  
288         aes_begin       KEYP, 128f, 192f          
289         __aes_xts_crypt \enc, 256                 
290 128:                                              
291         __aes_xts_crypt \enc, 128                 
292 192:                                              
293         __aes_xts_crypt \enc, 192                 
294 .endm                                             
295                                                   
296 // void aes_xts_encrypt_zvkned_zvbb_zvkg(const    
297 //                                       const    
298 //                                       u8 tw    
299 //                                                
300 // |key| is the data key.  |tweak| contains th    
301 // the original IV with the tweak key was alre    
302 // incremental computation, but |len| must alw    
303 // |len| must be a multiple of 16 except on th    
304 // multiple of 16, then this function updates     
305 SYM_FUNC_START(aes_xts_encrypt_zvkned_zvbb_zvk    
306         aes_xts_crypt   1                         
307 SYM_FUNC_END(aes_xts_encrypt_zvkned_zvbb_zvkg)    
308                                                   
309 // Same prototype and calling convention as th    
310 SYM_FUNC_START(aes_xts_decrypt_zvkned_zvbb_zvk    
311         aes_xts_crypt   0                         
312 SYM_FUNC_END(aes_xts_decrypt_zvkned_zvbb_zvkg)    
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php