~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/riscv/crypto/chacha-riscv64-zvkb.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/riscv/crypto/chacha-riscv64-zvkb.S (Version linux-6.12-rc7) and /arch/sparc/crypto/chacha-riscv64-zvkb.S (Version linux-6.1.116)


  1 /* SPDX-License-Identifier: Apache-2.0 OR BSD-    
  2 //                                                
  3 // This file is dual-licensed, meaning that yo    
  4 // choice of either of the following two licen    
  5 //                                                
  6 // Copyright 2023 The OpenSSL Project Authors.    
  7 //                                                
  8 // Licensed under the Apache License 2.0 (the     
  9 // a copy in the file LICENSE in the source di    
 10 // https://www.openssl.org/source/license.html    
 11 //                                                
 12 // or                                             
 13 //                                                
 14 // Copyright (c) 2023, Jerry Shih <jerry.shih@s    
 15 // Copyright 2024 Google LLC                      
 16 // All rights reserved.                           
 17 //                                                
 18 // Redistribution and use in source and binary    
 19 // modification, are permitted provided that t    
 20 // are met:                                       
 21 // 1. Redistributions of source code must reta    
 22 //    notice, this list of conditions and the     
 23 // 2. Redistributions in binary form must repr    
 24 //    notice, this list of conditions and the     
 25 //    documentation and/or other materials pro    
 26 //                                                
 27 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT     
 28 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTI    
 29 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCH    
 30 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO     
 31 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIR    
 32 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGE    
 33 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS    
 34 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION)    
 35 // THEORY OF LIABILITY, WHETHER IN CONTRACT, S    
 36 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING    
 37 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE PO    
 38                                                   
 39 // The generated code of this file depends on     
 40 // - RV64I                                        
 41 // - RISC-V Vector ('V') with VLEN >= 128         
 42 // - RISC-V Vector Cryptography Bit-manipulati    
 43                                                   
 44 #include <linux/linkage.h>                        
 45                                                   
 46 .text                                             
 47 .option arch, +zvkb                               
 48                                                   
 49 #define KEYP            a0                        
 50 #define INP             a1                        
 51 #define OUTP            a2                        
 52 #define LEN             a3                        
 53 #define IVP             a4                        
 54                                                   
 55 #define CONSTS0         a5                        
 56 #define CONSTS1         a6                        
 57 #define CONSTS2         a7                        
 58 #define CONSTS3         t0                        
 59 #define TMP             t1                        
 60 #define VL              t2                        
 61 #define STRIDE          t3                        
 62 #define NROUNDS         t4                        
 63 #define KEY0            s0                        
 64 #define KEY1            s1                        
 65 #define KEY2            s2                        
 66 #define KEY3            s3                        
 67 #define KEY4            s4                        
 68 #define KEY5            s5                        
 69 #define KEY6            s6                        
 70 #define KEY7            s7                        
 71 #define COUNTER         s8                        
 72 #define NONCE0          s9                        
 73 #define NONCE1          s10                       
 74 #define NONCE2          s11                       
 75                                                   
 76 .macro  chacha_round    a0, b0, c0, d0,  a1, b    
 77                         a2, b2, c2, d2,  a3, b    
 78         // a += b; d ^= a; d = rol(d, 16);        
 79         vadd.vv         \a0, \a0, \b0             
 80         vadd.vv         \a1, \a1, \b1             
 81         vadd.vv         \a2, \a2, \b2             
 82         vadd.vv         \a3, \a3, \b3             
 83         vxor.vv         \d0, \d0, \a0             
 84         vxor.vv         \d1, \d1, \a1             
 85         vxor.vv         \d2, \d2, \a2             
 86         vxor.vv         \d3, \d3, \a3             
 87         vror.vi         \d0, \d0, 32 - 16         
 88         vror.vi         \d1, \d1, 32 - 16         
 89         vror.vi         \d2, \d2, 32 - 16         
 90         vror.vi         \d3, \d3, 32 - 16         
 91                                                   
 92         // c += d; b ^= c; b = rol(b, 12);        
 93         vadd.vv         \c0, \c0, \d0             
 94         vadd.vv         \c1, \c1, \d1             
 95         vadd.vv         \c2, \c2, \d2             
 96         vadd.vv         \c3, \c3, \d3             
 97         vxor.vv         \b0, \b0, \c0             
 98         vxor.vv         \b1, \b1, \c1             
 99         vxor.vv         \b2, \b2, \c2             
100         vxor.vv         \b3, \b3, \c3             
101         vror.vi         \b0, \b0, 32 - 12         
102         vror.vi         \b1, \b1, 32 - 12         
103         vror.vi         \b2, \b2, 32 - 12         
104         vror.vi         \b3, \b3, 32 - 12         
105                                                   
106         // a += b; d ^= a; d = rol(d, 8);         
107         vadd.vv         \a0, \a0, \b0             
108         vadd.vv         \a1, \a1, \b1             
109         vadd.vv         \a2, \a2, \b2             
110         vadd.vv         \a3, \a3, \b3             
111         vxor.vv         \d0, \d0, \a0             
112         vxor.vv         \d1, \d1, \a1             
113         vxor.vv         \d2, \d2, \a2             
114         vxor.vv         \d3, \d3, \a3             
115         vror.vi         \d0, \d0, 32 - 8          
116         vror.vi         \d1, \d1, 32 - 8          
117         vror.vi         \d2, \d2, 32 - 8          
118         vror.vi         \d3, \d3, 32 - 8          
119                                                   
120         // c += d; b ^= c; b = rol(b, 7);         
121         vadd.vv         \c0, \c0, \d0             
122         vadd.vv         \c1, \c1, \d1             
123         vadd.vv         \c2, \c2, \d2             
124         vadd.vv         \c3, \c3, \d3             
125         vxor.vv         \b0, \b0, \c0             
126         vxor.vv         \b1, \b1, \c1             
127         vxor.vv         \b2, \b2, \c2             
128         vxor.vv         \b3, \b3, \c3             
129         vror.vi         \b0, \b0, 32 - 7          
130         vror.vi         \b1, \b1, 32 - 7          
131         vror.vi         \b2, \b2, 32 - 7          
132         vror.vi         \b3, \b3, 32 - 7          
133 .endm                                             
134                                                   
135 // void chacha20_zvkb(const u32 key[8], const     
136 //                    const u32 iv[4]);           
137 //                                                
138 // |len| must be nonzero and a multiple of 64     
139 // The counter is treated as 32-bit, following    
140 SYM_FUNC_START(chacha20_zvkb)                     
141         srli            LEN, LEN, 6     // Byt    
142                                                   
143         addi            sp, sp, -96               
144         sd              s0, 0(sp)                 
145         sd              s1, 8(sp)                 
146         sd              s2, 16(sp)                
147         sd              s3, 24(sp)                
148         sd              s4, 32(sp)                
149         sd              s5, 40(sp)                
150         sd              s6, 48(sp)                
151         sd              s7, 56(sp)                
152         sd              s8, 64(sp)                
153         sd              s9, 72(sp)                
154         sd              s10, 80(sp)               
155         sd              s11, 88(sp)               
156                                                   
157         li              STRIDE, 64                
158                                                   
159         // Set up the initial state matrix in     
160         li              CONSTS0, 0x61707865       
161         li              CONSTS1, 0x3320646e       
162         li              CONSTS2, 0x79622d32       
163         li              CONSTS3, 0x6b206574       
164         lw              KEY0, 0(KEYP)             
165         lw              KEY1, 4(KEYP)             
166         lw              KEY2, 8(KEYP)             
167         lw              KEY3, 12(KEYP)            
168         lw              KEY4, 16(KEYP)            
169         lw              KEY5, 20(KEYP)            
170         lw              KEY6, 24(KEYP)            
171         lw              KEY7, 28(KEYP)            
172         lw              COUNTER, 0(IVP)           
173         lw              NONCE0, 4(IVP)            
174         lw              NONCE1, 8(IVP)            
175         lw              NONCE2, 12(IVP)           
176                                                   
177 .Lblock_loop:                                     
178         // Set vl to the number of blocks to p    
179         vsetvli         VL, LEN, e32, m1, ta,     
180                                                   
181         // Set up the initial state matrix for    
182         // v{i} holds the i'th 32-bit word of     
183         // Note that only the counter word, at    
184         vmv.v.x         v0, CONSTS0               
185         vmv.v.x         v1, CONSTS1               
186         vmv.v.x         v2, CONSTS2               
187         vmv.v.x         v3, CONSTS3               
188         vmv.v.x         v4, KEY0                  
189         vmv.v.x         v5, KEY1                  
190         vmv.v.x         v6, KEY2                  
191         vmv.v.x         v7, KEY3                  
192         vmv.v.x         v8, KEY4                  
193         vmv.v.x         v9, KEY5                  
194         vmv.v.x         v10, KEY6                 
195         vmv.v.x         v11, KEY7                 
196         vid.v           v12                       
197         vadd.vx         v12, v12, COUNTER         
198         vmv.v.x         v13, NONCE0               
199         vmv.v.x         v14, NONCE1               
200         vmv.v.x         v15, NONCE2               
201                                                   
202         // Load the first half of the input da    
203         // v{16+i} holds the i'th 32-bit word     
204         vlsseg8e32.v    v16, (INP), STRIDE        
205                                                   
206         li              NROUNDS, 20               
207 .Lnext_doubleround:                               
208         addi            NROUNDS, NROUNDS, -2      
209         // column round                           
210         chacha_round    v0, v4, v8, v12, v1, v    
211                         v2, v6, v10, v14, v3,     
212         // diagonal round                         
213         chacha_round    v0, v5, v10, v15, v1,     
214                         v2, v7, v8, v13, v3, v    
215         bnez            NROUNDS, .Lnext_double    
216                                                   
217         // Load the second half of the input d    
218         // v{24+i} holds the {8+i}'th 32-bit w    
219         addi            TMP, INP, 32              
220         vlsseg8e32.v    v24, (TMP), STRIDE        
221                                                   
222         // Finalize the first half of the keys    
223         vadd.vx         v0, v0, CONSTS0           
224         vadd.vx         v1, v1, CONSTS1           
225         vadd.vx         v2, v2, CONSTS2           
226         vadd.vx         v3, v3, CONSTS3           
227         vadd.vx         v4, v4, KEY0              
228         vadd.vx         v5, v5, KEY1              
229         vadd.vx         v6, v6, KEY2              
230         vadd.vx         v7, v7, KEY3              
231                                                   
232         // Encrypt/decrypt the first half of t    
233         vxor.vv         v16, v16, v0              
234         vxor.vv         v17, v17, v1              
235         vxor.vv         v18, v18, v2              
236         vxor.vv         v19, v19, v3              
237         vxor.vv         v20, v20, v4              
238         vxor.vv         v21, v21, v5              
239         vxor.vv         v22, v22, v6              
240         vxor.vv         v23, v23, v7              
241                                                   
242         // Store the first half of the output     
243         vssseg8e32.v    v16, (OUTP), STRIDE       
244                                                   
245         // Finalize the second half of the key    
246         vadd.vx         v8, v8, KEY4              
247         vadd.vx         v9, v9, KEY5              
248         vadd.vx         v10, v10, KEY6            
249         vadd.vx         v11, v11, KEY7            
250         vid.v           v0                        
251         vadd.vx         v12, v12, COUNTER         
252         vadd.vx         v13, v13, NONCE0          
253         vadd.vx         v14, v14, NONCE1          
254         vadd.vx         v15, v15, NONCE2          
255         vadd.vv         v12, v12, v0              
256                                                   
257         // Encrypt/decrypt the second half of     
258         vxor.vv         v24, v24, v8              
259         vxor.vv         v25, v25, v9              
260         vxor.vv         v26, v26, v10             
261         vxor.vv         v27, v27, v11             
262         vxor.vv         v29, v29, v13             
263         vxor.vv         v28, v28, v12             
264         vxor.vv         v30, v30, v14             
265         vxor.vv         v31, v31, v15             
266                                                   
267         // Store the second half of the output    
268         addi            TMP, OUTP, 32             
269         vssseg8e32.v    v24, (TMP), STRIDE        
270                                                   
271         // Update the counter, the remaining n    
272         // output pointers according to the nu    
273         add             COUNTER, COUNTER, VL      
274         sub             LEN, LEN, VL              
275         slli            TMP, VL, 6                
276         add             OUTP, OUTP, TMP           
277         add             INP, INP, TMP             
278         bnez            LEN, .Lblock_loop         
279                                                   
280         ld              s0, 0(sp)                 
281         ld              s1, 8(sp)                 
282         ld              s2, 16(sp)                
283         ld              s3, 24(sp)                
284         ld              s4, 32(sp)                
285         ld              s5, 40(sp)                
286         ld              s6, 48(sp)                
287         ld              s7, 56(sp)                
288         ld              s8, 64(sp)                
289         ld              s9, 72(sp)                
290         ld              s10, 80(sp)               
291         ld              s11, 88(sp)               
292         addi            sp, sp, 96                
293         ret                                       
294 SYM_FUNC_END(chacha20_zvkb)                       
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php