~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/lib/xor-neon.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/arm64/lib/xor-neon.c (Architecture m68k) and /arch/mips/lib/xor-neon.c (Architecture mips)


  1 // SPDX-License-Identifier: GPL-2.0-only            1 
  2 /*                                                
  3  * arch/arm64/lib/xor-neon.c                      
  4  *                                                
  5  * Authors: Jackie Liu <liuyun01@kylinos.cn>      
  6  * Copyright (C) 2018,Tianjin KYLIN Informatio    
  7  */                                               
  8                                                   
  9 #include <linux/raid/xor.h>                       
 10 #include <linux/module.h>                         
 11 #include <asm/neon-intrinsics.h>                  
 12                                                   
 13 static void xor_arm64_neon_2(unsigned long byt    
 14         const unsigned long * __restrict p2)      
 15 {                                                 
 16         uint64_t *dp1 = (uint64_t *)p1;           
 17         uint64_t *dp2 = (uint64_t *)p2;           
 18                                                   
 19         register uint64x2_t v0, v1, v2, v3;       
 20         long lines = bytes / (sizeof(uint64x2_    
 21                                                   
 22         do {                                      
 23                 /* p1 ^= p2 */                    
 24                 v0 = veorq_u64(vld1q_u64(dp1 +    
 25                 v1 = veorq_u64(vld1q_u64(dp1 +    
 26                 v2 = veorq_u64(vld1q_u64(dp1 +    
 27                 v3 = veorq_u64(vld1q_u64(dp1 +    
 28                                                   
 29                 /* store */                       
 30                 vst1q_u64(dp1 +  0, v0);          
 31                 vst1q_u64(dp1 +  2, v1);          
 32                 vst1q_u64(dp1 +  4, v2);          
 33                 vst1q_u64(dp1 +  6, v3);          
 34                                                   
 35                 dp1 += 8;                         
 36                 dp2 += 8;                         
 37         } while (--lines > 0);                    
 38 }                                                 
 39                                                   
 40 static void xor_arm64_neon_3(unsigned long byt    
 41         const unsigned long * __restrict p2,      
 42         const unsigned long * __restrict p3)      
 43 {                                                 
 44         uint64_t *dp1 = (uint64_t *)p1;           
 45         uint64_t *dp2 = (uint64_t *)p2;           
 46         uint64_t *dp3 = (uint64_t *)p3;           
 47                                                   
 48         register uint64x2_t v0, v1, v2, v3;       
 49         long lines = bytes / (sizeof(uint64x2_    
 50                                                   
 51         do {                                      
 52                 /* p1 ^= p2 */                    
 53                 v0 = veorq_u64(vld1q_u64(dp1 +    
 54                 v1 = veorq_u64(vld1q_u64(dp1 +    
 55                 v2 = veorq_u64(vld1q_u64(dp1 +    
 56                 v3 = veorq_u64(vld1q_u64(dp1 +    
 57                                                   
 58                 /* p1 ^= p3 */                    
 59                 v0 = veorq_u64(v0, vld1q_u64(d    
 60                 v1 = veorq_u64(v1, vld1q_u64(d    
 61                 v2 = veorq_u64(v2, vld1q_u64(d    
 62                 v3 = veorq_u64(v3, vld1q_u64(d    
 63                                                   
 64                 /* store */                       
 65                 vst1q_u64(dp1 +  0, v0);          
 66                 vst1q_u64(dp1 +  2, v1);          
 67                 vst1q_u64(dp1 +  4, v2);          
 68                 vst1q_u64(dp1 +  6, v3);          
 69                                                   
 70                 dp1 += 8;                         
 71                 dp2 += 8;                         
 72                 dp3 += 8;                         
 73         } while (--lines > 0);                    
 74 }                                                 
 75                                                   
 76 static void xor_arm64_neon_4(unsigned long byt    
 77         const unsigned long * __restrict p2,      
 78         const unsigned long * __restrict p3,      
 79         const unsigned long * __restrict p4)      
 80 {                                                 
 81         uint64_t *dp1 = (uint64_t *)p1;           
 82         uint64_t *dp2 = (uint64_t *)p2;           
 83         uint64_t *dp3 = (uint64_t *)p3;           
 84         uint64_t *dp4 = (uint64_t *)p4;           
 85                                                   
 86         register uint64x2_t v0, v1, v2, v3;       
 87         long lines = bytes / (sizeof(uint64x2_    
 88                                                   
 89         do {                                      
 90                 /* p1 ^= p2 */                    
 91                 v0 = veorq_u64(vld1q_u64(dp1 +    
 92                 v1 = veorq_u64(vld1q_u64(dp1 +    
 93                 v2 = veorq_u64(vld1q_u64(dp1 +    
 94                 v3 = veorq_u64(vld1q_u64(dp1 +    
 95                                                   
 96                 /* p1 ^= p3 */                    
 97                 v0 = veorq_u64(v0, vld1q_u64(d    
 98                 v1 = veorq_u64(v1, vld1q_u64(d    
 99                 v2 = veorq_u64(v2, vld1q_u64(d    
100                 v3 = veorq_u64(v3, vld1q_u64(d    
101                                                   
102                 /* p1 ^= p4 */                    
103                 v0 = veorq_u64(v0, vld1q_u64(d    
104                 v1 = veorq_u64(v1, vld1q_u64(d    
105                 v2 = veorq_u64(v2, vld1q_u64(d    
106                 v3 = veorq_u64(v3, vld1q_u64(d    
107                                                   
108                 /* store */                       
109                 vst1q_u64(dp1 +  0, v0);          
110                 vst1q_u64(dp1 +  2, v1);          
111                 vst1q_u64(dp1 +  4, v2);          
112                 vst1q_u64(dp1 +  6, v3);          
113                                                   
114                 dp1 += 8;                         
115                 dp2 += 8;                         
116                 dp3 += 8;                         
117                 dp4 += 8;                         
118         } while (--lines > 0);                    
119 }                                                 
120                                                   
121 static void xor_arm64_neon_5(unsigned long byt    
122         const unsigned long * __restrict p2,      
123         const unsigned long * __restrict p3,      
124         const unsigned long * __restrict p4,      
125         const unsigned long * __restrict p5)      
126 {                                                 
127         uint64_t *dp1 = (uint64_t *)p1;           
128         uint64_t *dp2 = (uint64_t *)p2;           
129         uint64_t *dp3 = (uint64_t *)p3;           
130         uint64_t *dp4 = (uint64_t *)p4;           
131         uint64_t *dp5 = (uint64_t *)p5;           
132                                                   
133         register uint64x2_t v0, v1, v2, v3;       
134         long lines = bytes / (sizeof(uint64x2_    
135                                                   
136         do {                                      
137                 /* p1 ^= p2 */                    
138                 v0 = veorq_u64(vld1q_u64(dp1 +    
139                 v1 = veorq_u64(vld1q_u64(dp1 +    
140                 v2 = veorq_u64(vld1q_u64(dp1 +    
141                 v3 = veorq_u64(vld1q_u64(dp1 +    
142                                                   
143                 /* p1 ^= p3 */                    
144                 v0 = veorq_u64(v0, vld1q_u64(d    
145                 v1 = veorq_u64(v1, vld1q_u64(d    
146                 v2 = veorq_u64(v2, vld1q_u64(d    
147                 v3 = veorq_u64(v3, vld1q_u64(d    
148                                                   
149                 /* p1 ^= p4 */                    
150                 v0 = veorq_u64(v0, vld1q_u64(d    
151                 v1 = veorq_u64(v1, vld1q_u64(d    
152                 v2 = veorq_u64(v2, vld1q_u64(d    
153                 v3 = veorq_u64(v3, vld1q_u64(d    
154                                                   
155                 /* p1 ^= p5 */                    
156                 v0 = veorq_u64(v0, vld1q_u64(d    
157                 v1 = veorq_u64(v1, vld1q_u64(d    
158                 v2 = veorq_u64(v2, vld1q_u64(d    
159                 v3 = veorq_u64(v3, vld1q_u64(d    
160                                                   
161                 /* store */                       
162                 vst1q_u64(dp1 +  0, v0);          
163                 vst1q_u64(dp1 +  2, v1);          
164                 vst1q_u64(dp1 +  4, v2);          
165                 vst1q_u64(dp1 +  6, v3);          
166                                                   
167                 dp1 += 8;                         
168                 dp2 += 8;                         
169                 dp3 += 8;                         
170                 dp4 += 8;                         
171                 dp5 += 8;                         
172         } while (--lines > 0);                    
173 }                                                 
174                                                   
175 struct xor_block_template xor_block_inner_neon    
176         .name   = "__inner_neon__",               
177         .do_2   = xor_arm64_neon_2,               
178         .do_3   = xor_arm64_neon_3,               
179         .do_4   = xor_arm64_neon_4,               
180         .do_5   = xor_arm64_neon_5,               
181 };                                                
182 EXPORT_SYMBOL(xor_block_inner_neon);              
183                                                   
184 static inline uint64x2_t eor3(uint64x2_t p, ui    
185 {                                                 
186         uint64x2_t res;                           
187                                                   
188         asm(ARM64_ASM_PREAMBLE ".arch_extensio    
189             "eor3 %0.16b, %1.16b, %2.16b, %3.1    
190             : "=w"(res) : "w"(p), "w"(q), "w"(    
191         return res;                               
192 }                                                 
193                                                   
194 static void xor_arm64_eor3_3(unsigned long byt    
195         unsigned long * __restrict p1,            
196         const unsigned long * __restrict p2,      
197         const unsigned long * __restrict p3)      
198 {                                                 
199         uint64_t *dp1 = (uint64_t *)p1;           
200         uint64_t *dp2 = (uint64_t *)p2;           
201         uint64_t *dp3 = (uint64_t *)p3;           
202                                                   
203         register uint64x2_t v0, v1, v2, v3;       
204         long lines = bytes / (sizeof(uint64x2_    
205                                                   
206         do {                                      
207                 /* p1 ^= p2 ^ p3 */               
208                 v0 = eor3(vld1q_u64(dp1 + 0),     
209                           vld1q_u64(dp3 + 0));    
210                 v1 = eor3(vld1q_u64(dp1 + 2),     
211                           vld1q_u64(dp3 + 2));    
212                 v2 = eor3(vld1q_u64(dp1 + 4),     
213                           vld1q_u64(dp3 + 4));    
214                 v3 = eor3(vld1q_u64(dp1 + 6),     
215                           vld1q_u64(dp3 + 6));    
216                                                   
217                 /* store */                       
218                 vst1q_u64(dp1 + 0, v0);           
219                 vst1q_u64(dp1 + 2, v1);           
220                 vst1q_u64(dp1 + 4, v2);           
221                 vst1q_u64(dp1 + 6, v3);           
222                                                   
223                 dp1 += 8;                         
224                 dp2 += 8;                         
225                 dp3 += 8;                         
226         } while (--lines > 0);                    
227 }                                                 
228                                                   
229 static void xor_arm64_eor3_4(unsigned long byt    
230         unsigned long * __restrict p1,            
231         const unsigned long * __restrict p2,      
232         const unsigned long * __restrict p3,      
233         const unsigned long * __restrict p4)      
234 {                                                 
235         uint64_t *dp1 = (uint64_t *)p1;           
236         uint64_t *dp2 = (uint64_t *)p2;           
237         uint64_t *dp3 = (uint64_t *)p3;           
238         uint64_t *dp4 = (uint64_t *)p4;           
239                                                   
240         register uint64x2_t v0, v1, v2, v3;       
241         long lines = bytes / (sizeof(uint64x2_    
242                                                   
243         do {                                      
244                 /* p1 ^= p2 ^ p3 */               
245                 v0 = eor3(vld1q_u64(dp1 + 0),     
246                           vld1q_u64(dp3 + 0));    
247                 v1 = eor3(vld1q_u64(dp1 + 2),     
248                           vld1q_u64(dp3 + 2));    
249                 v2 = eor3(vld1q_u64(dp1 + 4),     
250                           vld1q_u64(dp3 + 4));    
251                 v3 = eor3(vld1q_u64(dp1 + 6),     
252                           vld1q_u64(dp3 + 6));    
253                                                   
254                 /* p1 ^= p4 */                    
255                 v0 = veorq_u64(v0, vld1q_u64(d    
256                 v1 = veorq_u64(v1, vld1q_u64(d    
257                 v2 = veorq_u64(v2, vld1q_u64(d    
258                 v3 = veorq_u64(v3, vld1q_u64(d    
259                                                   
260                 /* store */                       
261                 vst1q_u64(dp1 + 0, v0);           
262                 vst1q_u64(dp1 + 2, v1);           
263                 vst1q_u64(dp1 + 4, v2);           
264                 vst1q_u64(dp1 + 6, v3);           
265                                                   
266                 dp1 += 8;                         
267                 dp2 += 8;                         
268                 dp3 += 8;                         
269                 dp4 += 8;                         
270         } while (--lines > 0);                    
271 }                                                 
272                                                   
273 static void xor_arm64_eor3_5(unsigned long byt    
274         unsigned long * __restrict p1,            
275         const unsigned long * __restrict p2,      
276         const unsigned long * __restrict p3,      
277         const unsigned long * __restrict p4,      
278         const unsigned long * __restrict p5)      
279 {                                                 
280         uint64_t *dp1 = (uint64_t *)p1;           
281         uint64_t *dp2 = (uint64_t *)p2;           
282         uint64_t *dp3 = (uint64_t *)p3;           
283         uint64_t *dp4 = (uint64_t *)p4;           
284         uint64_t *dp5 = (uint64_t *)p5;           
285                                                   
286         register uint64x2_t v0, v1, v2, v3;       
287         long lines = bytes / (sizeof(uint64x2_    
288                                                   
289         do {                                      
290                 /* p1 ^= p2 ^ p3 */               
291                 v0 = eor3(vld1q_u64(dp1 + 0),     
292                           vld1q_u64(dp3 + 0));    
293                 v1 = eor3(vld1q_u64(dp1 + 2),     
294                           vld1q_u64(dp3 + 2));    
295                 v2 = eor3(vld1q_u64(dp1 + 4),     
296                           vld1q_u64(dp3 + 4));    
297                 v3 = eor3(vld1q_u64(dp1 + 6),     
298                           vld1q_u64(dp3 + 6));    
299                                                   
300                 /* p1 ^= p4 ^ p5 */               
301                 v0 = eor3(v0, vld1q_u64(dp4 +     
302                 v1 = eor3(v1, vld1q_u64(dp4 +     
303                 v2 = eor3(v2, vld1q_u64(dp4 +     
304                 v3 = eor3(v3, vld1q_u64(dp4 +     
305                                                   
306                 /* store */                       
307                 vst1q_u64(dp1 + 0, v0);           
308                 vst1q_u64(dp1 + 2, v1);           
309                 vst1q_u64(dp1 + 4, v2);           
310                 vst1q_u64(dp1 + 6, v3);           
311                                                   
312                 dp1 += 8;                         
313                 dp2 += 8;                         
314                 dp3 += 8;                         
315                 dp4 += 8;                         
316                 dp5 += 8;                         
317         } while (--lines > 0);                    
318 }                                                 
319                                                   
320 static int __init xor_neon_init(void)             
321 {                                                 
322         if (IS_ENABLED(CONFIG_AS_HAS_SHA3) &&     
323                 xor_block_inner_neon.do_3 = xo    
324                 xor_block_inner_neon.do_4 = xo    
325                 xor_block_inner_neon.do_5 = xo    
326         }                                         
327         return 0;                                 
328 }                                                 
329 module_init(xor_neon_init);                       
330                                                   
331 static void __exit xor_neon_exit(void)            
332 {                                                 
333 }                                                 
334 module_exit(xor_neon_exit);                       
335                                                   
336 MODULE_AUTHOR("Jackie Liu <liuyun01@kylinos.cn    
337 MODULE_DESCRIPTION("ARMv8 XOR Extensions");       
338 MODULE_LICENSE("GPL");                            
339                                                   

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php