~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/x86/crypto/serpent-sse2-x86_64-asm_64.S (Version linux-6.12-rc7) and /arch/i386/crypto/serpent-sse2-x86_64-asm_64.S (Version linux-5.2.21)


  1 /* SPDX-License-Identifier: GPL-2.0-or-later *    
  2 /*                                                
  3  * Serpent Cipher 8-way parallel algorithm (x8    
  4  *                                                
  5  * Copyright (C) 2011 Jussi Kivilinna <jussi.ki    
  6  *                                                
  7  * Based on crypto/serpent.c by                   
  8  *  Copyright (C) 2002 Dag Arne Osvik <osvik@ii    
  9  *                2003 Herbert Valerio Riedel <    
 10  */                                               
 11                                                   
 12 #include <linux/linkage.h>                        
 13                                                   
 14 .file "serpent-sse2-x86_64-asm_64.S"              
 15 .text                                             
 16                                                   
 17 #define CTX %rdi                                  
 18                                                   
 19 /*********************************************    
 20   8-way SSE2 serpent                              
 21  *********************************************    
 22 #define RA1 %xmm0                                 
 23 #define RB1 %xmm1                                 
 24 #define RC1 %xmm2                                 
 25 #define RD1 %xmm3                                 
 26 #define RE1 %xmm4                                 
 27                                                   
 28 #define RA2 %xmm5                                 
 29 #define RB2 %xmm6                                 
 30 #define RC2 %xmm7                                 
 31 #define RD2 %xmm8                                 
 32 #define RE2 %xmm9                                 
 33                                                   
 34 #define RNOT %xmm10                               
 35                                                   
 36 #define RK0 %xmm11                                
 37 #define RK1 %xmm12                                
 38 #define RK2 %xmm13                                
 39 #define RK3 %xmm14                                
 40                                                   
 41 #define S0_1(x0, x1, x2, x3, x4) \                
 42         movdqa x3,              x4; \             
 43         por x0,                 x3; \             
 44         pxor x4,                x0; \             
 45         pxor x2,                x4; \             
 46         pxor RNOT,              x4; \             
 47         pxor x1,                x3; \             
 48         pand x0,                x1; \             
 49         pxor x4,                x1; \             
 50         pxor x0,                x2;               
 51 #define S0_2(x0, x1, x2, x3, x4) \                
 52         pxor x3,                x0; \             
 53         por x0,                 x4; \             
 54         pxor x2,                x0; \             
 55         pand x1,                x2; \             
 56         pxor x2,                x3; \             
 57         pxor RNOT,              x1; \             
 58         pxor x4,                x2; \             
 59         pxor x2,                x1;               
 60                                                   
 61 #define S1_1(x0, x1, x2, x3, x4) \                
 62         movdqa x1,              x4; \             
 63         pxor x0,                x1; \             
 64         pxor x3,                x0; \             
 65         pxor RNOT,              x3; \             
 66         pand x1,                x4; \             
 67         por x1,                 x0; \             
 68         pxor x2,                x3; \             
 69         pxor x3,                x0; \             
 70         pxor x3,                x1;               
 71 #define S1_2(x0, x1, x2, x3, x4) \                
 72         pxor x4,                x3; \             
 73         por x4,                 x1; \             
 74         pxor x2,                x4; \             
 75         pand x0,                x2; \             
 76         pxor x1,                x2; \             
 77         por x0,                 x1; \             
 78         pxor RNOT,              x0; \             
 79         pxor x2,                x0; \             
 80         pxor x1,                x4;               
 81                                                   
 82 #define S2_1(x0, x1, x2, x3, x4) \                
 83         pxor RNOT,              x3; \             
 84         pxor x0,                x1; \             
 85         movdqa x0,              x4; \             
 86         pand x2,                x0; \             
 87         pxor x3,                x0; \             
 88         por x4,                 x3; \             
 89         pxor x1,                x2; \             
 90         pxor x1,                x3; \             
 91         pand x0,                x1;               
 92 #define S2_2(x0, x1, x2, x3, x4) \                
 93         pxor x2,                x0; \             
 94         pand x3,                x2; \             
 95         por x1,                 x3; \             
 96         pxor RNOT,              x0; \             
 97         pxor x0,                x3; \             
 98         pxor x0,                x4; \             
 99         pxor x2,                x0; \             
100         por x2,                 x1;               
101                                                   
102 #define S3_1(x0, x1, x2, x3, x4) \                
103         movdqa x1,              x4; \             
104         pxor x3,                x1; \             
105         por x0,                 x3; \             
106         pand x0,                x4; \             
107         pxor x2,                x0; \             
108         pxor x1,                x2; \             
109         pand x3,                x1; \             
110         pxor x3,                x2; \             
111         por x4,                 x0; \             
112         pxor x3,                x4;               
113 #define S3_2(x0, x1, x2, x3, x4) \                
114         pxor x0,                x1; \             
115         pand x3,                x0; \             
116         pand x4,                x3; \             
117         pxor x2,                x3; \             
118         por x1,                 x4; \             
119         pand x1,                x2; \             
120         pxor x3,                x4; \             
121         pxor x3,                x0; \             
122         pxor x2,                x3;               
123                                                   
124 #define S4_1(x0, x1, x2, x3, x4) \                
125         movdqa x3,              x4; \             
126         pand x0,                x3; \             
127         pxor x4,                x0; \             
128         pxor x2,                x3; \             
129         por x4,                 x2; \             
130         pxor x1,                x0; \             
131         pxor x3,                x4; \             
132         por x0,                 x2; \             
133         pxor x1,                x2;               
134 #define S4_2(x0, x1, x2, x3, x4) \                
135         pand x0,                x1; \             
136         pxor x4,                x1; \             
137         pand x2,                x4; \             
138         pxor x3,                x2; \             
139         pxor x0,                x4; \             
140         por x1,                 x3; \             
141         pxor RNOT,              x1; \             
142         pxor x0,                x3;               
143                                                   
144 #define S5_1(x0, x1, x2, x3, x4) \                
145         movdqa x1,              x4; \             
146         por x0,                 x1; \             
147         pxor x1,                x2; \             
148         pxor RNOT,              x3; \             
149         pxor x0,                x4; \             
150         pxor x2,                x0; \             
151         pand x4,                x1; \             
152         por x3,                 x4; \             
153         pxor x0,                x4;               
154 #define S5_2(x0, x1, x2, x3, x4) \                
155         pand x3,                x0; \             
156         pxor x3,                x1; \             
157         pxor x2,                x3; \             
158         pxor x1,                x0; \             
159         pand x4,                x2; \             
160         pxor x2,                x1; \             
161         pand x0,                x2; \             
162         pxor x2,                x3;               
163                                                   
164 #define S6_1(x0, x1, x2, x3, x4) \                
165         movdqa x1,              x4; \             
166         pxor x0,                x3; \             
167         pxor x2,                x1; \             
168         pxor x0,                x2; \             
169         pand x3,                x0; \             
170         por x3,                 x1; \             
171         pxor RNOT,              x4; \             
172         pxor x1,                x0; \             
173         pxor x2,                x1;               
174 #define S6_2(x0, x1, x2, x3, x4) \                
175         pxor x4,                x3; \             
176         pxor x0,                x4; \             
177         pand x0,                x2; \             
178         pxor x1,                x4; \             
179         pxor x3,                x2; \             
180         pand x1,                x3; \             
181         pxor x0,                x3; \             
182         pxor x2,                x1;               
183                                                   
184 #define S7_1(x0, x1, x2, x3, x4) \                
185         pxor RNOT,              x1; \             
186         movdqa x1,              x4; \             
187         pxor RNOT,              x0; \             
188         pand x2,                x1; \             
189         pxor x3,                x1; \             
190         por x4,                 x3; \             
191         pxor x2,                x4; \             
192         pxor x3,                x2; \             
193         pxor x0,                x3; \             
194         por x1,                 x0;               
195 #define S7_2(x0, x1, x2, x3, x4) \                
196         pand x0,                x2; \             
197         pxor x4,                x0; \             
198         pxor x3,                x4; \             
199         pand x0,                x3; \             
200         pxor x1,                x4; \             
201         pxor x4,                x2; \             
202         pxor x1,                x3; \             
203         por x0,                 x4; \             
204         pxor x1,                x4;               
205                                                   
206 #define SI0_1(x0, x1, x2, x3, x4) \               
207         movdqa x3,              x4; \             
208         pxor x0,                x1; \             
209         por x1,                 x3; \             
210         pxor x1,                x4; \             
211         pxor RNOT,              x0; \             
212         pxor x3,                x2; \             
213         pxor x0,                x3; \             
214         pand x1,                x0; \             
215         pxor x2,                x0;               
216 #define SI0_2(x0, x1, x2, x3, x4) \               
217         pand x3,                x2; \             
218         pxor x4,                x3; \             
219         pxor x3,                x2; \             
220         pxor x3,                x1; \             
221         pand x0,                x3; \             
222         pxor x0,                x1; \             
223         pxor x2,                x0; \             
224         pxor x3,                x4;               
225                                                   
226 #define SI1_1(x0, x1, x2, x3, x4) \               
227         pxor x3,                x1; \             
228         movdqa x0,              x4; \             
229         pxor x2,                x0; \             
230         pxor RNOT,              x2; \             
231         por x1,                 x4; \             
232         pxor x3,                x4; \             
233         pand x1,                x3; \             
234         pxor x2,                x1; \             
235         pand x4,                x2;               
236 #define SI1_2(x0, x1, x2, x3, x4) \               
237         pxor x1,                x4; \             
238         por x3,                 x1; \             
239         pxor x0,                x3; \             
240         pxor x0,                x2; \             
241         por x4,                 x0; \             
242         pxor x4,                x2; \             
243         pxor x0,                x1; \             
244         pxor x1,                x4;               
245                                                   
246 #define SI2_1(x0, x1, x2, x3, x4) \               
247         pxor x1,                x2; \             
248         movdqa x3,              x4; \             
249         pxor RNOT,              x3; \             
250         por x2,                 x3; \             
251         pxor x4,                x2; \             
252         pxor x0,                x4; \             
253         pxor x1,                x3; \             
254         por x2,                 x1; \             
255         pxor x0,                x2;               
256 #define SI2_2(x0, x1, x2, x3, x4) \               
257         pxor x4,                x1; \             
258         por x3,                 x4; \             
259         pxor x3,                x2; \             
260         pxor x2,                x4; \             
261         pand x1,                x2; \             
262         pxor x3,                x2; \             
263         pxor x4,                x3; \             
264         pxor x0,                x4;               
265                                                   
266 #define SI3_1(x0, x1, x2, x3, x4) \               
267         pxor x1,                x2; \             
268         movdqa x1,              x4; \             
269         pand x2,                x1; \             
270         pxor x0,                x1; \             
271         por x4,                 x0; \             
272         pxor x3,                x4; \             
273         pxor x3,                x0; \             
274         por x1,                 x3; \             
275         pxor x2,                x1;               
276 #define SI3_2(x0, x1, x2, x3, x4) \               
277         pxor x3,                x1; \             
278         pxor x2,                x0; \             
279         pxor x3,                x2; \             
280         pand x1,                x3; \             
281         pxor x0,                x1; \             
282         pand x2,                x0; \             
283         pxor x3,                x4; \             
284         pxor x0,                x3; \             
285         pxor x1,                x0;               
286                                                   
287 #define SI4_1(x0, x1, x2, x3, x4) \               
288         pxor x3,                x2; \             
289         movdqa x0,              x4; \             
290         pand x1,                x0; \             
291         pxor x2,                x0; \             
292         por x3,                 x2; \             
293         pxor RNOT,              x4; \             
294         pxor x0,                x1; \             
295         pxor x2,                x0; \             
296         pand x4,                x2;               
297 #define SI4_2(x0, x1, x2, x3, x4) \               
298         pxor x0,                x2; \             
299         por x4,                 x0; \             
300         pxor x3,                x0; \             
301         pand x2,                x3; \             
302         pxor x3,                x4; \             
303         pxor x1,                x3; \             
304         pand x0,                x1; \             
305         pxor x1,                x4; \             
306         pxor x3,                x0;               
307                                                   
308 #define SI5_1(x0, x1, x2, x3, x4) \               
309         movdqa x1,              x4; \             
310         por x2,                 x1; \             
311         pxor x4,                x2; \             
312         pxor x3,                x1; \             
313         pand x4,                x3; \             
314         pxor x3,                x2; \             
315         por x0,                 x3; \             
316         pxor RNOT,              x0; \             
317         pxor x2,                x3; \             
318         por x0,                 x2;               
319 #define SI5_2(x0, x1, x2, x3, x4) \               
320         pxor x1,                x4; \             
321         pxor x4,                x2; \             
322         pand x0,                x4; \             
323         pxor x1,                x0; \             
324         pxor x3,                x1; \             
325         pand x2,                x0; \             
326         pxor x3,                x2; \             
327         pxor x2,                x0; \             
328         pxor x4,                x2; \             
329         pxor x3,                x4;               
330                                                   
331 #define SI6_1(x0, x1, x2, x3, x4) \               
332         pxor x2,                x0; \             
333         movdqa x0,              x4; \             
334         pand x3,                x0; \             
335         pxor x3,                x2; \             
336         pxor x2,                x0; \             
337         pxor x1,                x3; \             
338         por x4,                 x2; \             
339         pxor x3,                x2; \             
340         pand x0,                x3;               
341 #define SI6_2(x0, x1, x2, x3, x4) \               
342         pxor RNOT,              x0; \             
343         pxor x1,                x3; \             
344         pand x2,                x1; \             
345         pxor x0,                x4; \             
346         pxor x4,                x3; \             
347         pxor x2,                x4; \             
348         pxor x1,                x0; \             
349         pxor x0,                x2;               
350                                                   
351 #define SI7_1(x0, x1, x2, x3, x4) \               
352         movdqa x3,              x4; \             
353         pand x0,                x3; \             
354         pxor x2,                x0; \             
355         por x4,                 x2; \             
356         pxor x1,                x4; \             
357         pxor RNOT,              x0; \             
358         por x3,                 x1; \             
359         pxor x0,                x4; \             
360         pand x2,                x0; \             
361         pxor x1,                x0;               
362 #define SI7_2(x0, x1, x2, x3, x4) \               
363         pand x2,                x1; \             
364         pxor x2,                x3; \             
365         pxor x3,                x4; \             
366         pand x3,                x2; \             
367         por x0,                 x3; \             
368         pxor x4,                x1; \             
369         pxor x4,                x3; \             
370         pand x0,                x4; \             
371         pxor x2,                x4;               
372                                                   
373 #define get_key(i, j, t) \                        
374         movd (4*(i)+(j))*4(CTX), t; \             
375         pshufd $0, t, t;                          
376                                                   
377 #define K2(x0, x1, x2, x3, x4, i) \               
378         get_key(i, 0, RK0); \                     
379         get_key(i, 1, RK1); \                     
380         get_key(i, 2, RK2); \                     
381         get_key(i, 3, RK3); \                     
382         pxor RK0,               x0 ## 1; \        
383         pxor RK1,               x1 ## 1; \        
384         pxor RK2,               x2 ## 1; \        
385         pxor RK3,               x3 ## 1; \        
386                 pxor RK0,               x0 ##     
387                 pxor RK1,               x1 ##     
388                 pxor RK2,               x2 ##     
389                 pxor RK3,               x3 ##     
390                                                   
391 #define LK2(x0, x1, x2, x3, x4, i) \              
392         movdqa x0 ## 1,         x4 ## 1; \        
393         pslld $13,              x0 ## 1; \        
394         psrld $(32 - 13),       x4 ## 1; \        
395         por x4 ## 1,            x0 ## 1; \        
396         pxor x0 ## 1,           x1 ## 1; \        
397         movdqa x2 ## 1,         x4 ## 1; \        
398         pslld $3,               x2 ## 1; \        
399         psrld $(32 - 3),        x4 ## 1; \        
400         por x4 ## 1,            x2 ## 1; \        
401         pxor x2 ## 1,           x1 ## 1; \        
402                 movdqa x0 ## 2,         x4 ##     
403                 pslld $13,              x0 ##     
404                 psrld $(32 - 13),       x4 ##     
405                 por x4 ## 2,            x0 ##     
406                 pxor x0 ## 2,           x1 ##     
407                 movdqa x2 ## 2,         x4 ##     
408                 pslld $3,               x2 ##     
409                 psrld $(32 - 3),        x4 ##     
410                 por x4 ## 2,            x2 ##     
411                 pxor x2 ## 2,           x1 ##     
412         movdqa x1 ## 1,         x4 ## 1; \        
413         pslld $1,               x1 ## 1; \        
414         psrld $(32 - 1),        x4 ## 1; \        
415         por x4 ## 1,            x1 ## 1; \        
416         movdqa x0 ## 1,         x4 ## 1; \        
417         pslld $3,               x4 ## 1; \        
418         pxor x2 ## 1,           x3 ## 1; \        
419         pxor x4 ## 1,           x3 ## 1; \        
420         movdqa x3 ## 1,         x4 ## 1; \        
421         get_key(i, 1, RK1); \                     
422                 movdqa x1 ## 2,         x4 ##     
423                 pslld $1,               x1 ##     
424                 psrld $(32 - 1),        x4 ##     
425                 por x4 ## 2,            x1 ##     
426                 movdqa x0 ## 2,         x4 ##     
427                 pslld $3,               x4 ##     
428                 pxor x2 ## 2,           x3 ##     
429                 pxor x4 ## 2,           x3 ##     
430                 movdqa x3 ## 2,         x4 ##     
431                 get_key(i, 3, RK3); \             
432         pslld $7,               x3 ## 1; \        
433         psrld $(32 - 7),        x4 ## 1; \        
434         por x4 ## 1,            x3 ## 1; \        
435         movdqa x1 ## 1,         x4 ## 1; \        
436         pslld $7,               x4 ## 1; \        
437         pxor x1 ## 1,           x0 ## 1; \        
438         pxor x3 ## 1,           x0 ## 1; \        
439         pxor x3 ## 1,           x2 ## 1; \        
440         pxor x4 ## 1,           x2 ## 1; \        
441         get_key(i, 0, RK0); \                     
442                 pslld $7,               x3 ##     
443                 psrld $(32 - 7),        x4 ##     
444                 por x4 ## 2,            x3 ##     
445                 movdqa x1 ## 2,         x4 ##     
446                 pslld $7,               x4 ##     
447                 pxor x1 ## 2,           x0 ##     
448                 pxor x3 ## 2,           x0 ##     
449                 pxor x3 ## 2,           x2 ##     
450                 pxor x4 ## 2,           x2 ##     
451                 get_key(i, 2, RK2); \             
452         pxor RK1,               x1 ## 1; \        
453         pxor RK3,               x3 ## 1; \        
454         movdqa x0 ## 1,         x4 ## 1; \        
455         pslld $5,               x0 ## 1; \        
456         psrld $(32 - 5),        x4 ## 1; \        
457         por x4 ## 1,            x0 ## 1; \        
458         movdqa x2 ## 1,         x4 ## 1; \        
459         pslld $22,              x2 ## 1; \        
460         psrld $(32 - 22),       x4 ## 1; \        
461         por x4 ## 1,            x2 ## 1; \        
462         pxor RK0,               x0 ## 1; \        
463         pxor RK2,               x2 ## 1; \        
464                 pxor RK1,               x1 ##     
465                 pxor RK3,               x3 ##     
466                 movdqa x0 ## 2,         x4 ##     
467                 pslld $5,               x0 ##     
468                 psrld $(32 - 5),        x4 ##     
469                 por x4 ## 2,            x0 ##     
470                 movdqa x2 ## 2,         x4 ##     
471                 pslld $22,              x2 ##     
472                 psrld $(32 - 22),       x4 ##     
473                 por x4 ## 2,            x2 ##     
474                 pxor RK0,               x0 ##     
475                 pxor RK2,               x2 ##     
476                                                   
477 #define KL2(x0, x1, x2, x3, x4, i) \              
478         pxor RK0,               x0 ## 1; \        
479         pxor RK2,               x2 ## 1; \        
480         movdqa x0 ## 1,         x4 ## 1; \        
481         psrld $5,               x0 ## 1; \        
482         pslld $(32 - 5),        x4 ## 1; \        
483         por x4 ## 1,            x0 ## 1; \        
484         pxor RK3,               x3 ## 1; \        
485         pxor RK1,               x1 ## 1; \        
486         movdqa x2 ## 1,         x4 ## 1; \        
487         psrld $22,              x2 ## 1; \        
488         pslld $(32 - 22),       x4 ## 1; \        
489         por x4 ## 1,            x2 ## 1; \        
490         pxor x3 ## 1,           x2 ## 1; \        
491                 pxor RK0,               x0 ##     
492                 pxor RK2,               x2 ##     
493                 movdqa x0 ## 2,         x4 ##     
494                 psrld $5,               x0 ##     
495                 pslld $(32 - 5),        x4 ##     
496                 por x4 ## 2,            x0 ##     
497                 pxor RK3,               x3 ##     
498                 pxor RK1,               x1 ##     
499                 movdqa x2 ## 2,         x4 ##     
500                 psrld $22,              x2 ##     
501                 pslld $(32 - 22),       x4 ##     
502                 por x4 ## 2,            x2 ##     
503                 pxor x3 ## 2,           x2 ##     
504         pxor x3 ## 1,           x0 ## 1; \        
505         movdqa x1 ## 1,         x4 ## 1; \        
506         pslld $7,               x4 ## 1; \        
507         pxor x1 ## 1,           x0 ## 1; \        
508         pxor x4 ## 1,           x2 ## 1; \        
509         movdqa x1 ## 1,         x4 ## 1; \        
510         psrld $1,               x1 ## 1; \        
511         pslld $(32 - 1),        x4 ## 1; \        
512         por x4 ## 1,            x1 ## 1; \        
513                 pxor x3 ## 2,           x0 ##     
514                 movdqa x1 ## 2,         x4 ##     
515                 pslld $7,               x4 ##     
516                 pxor x1 ## 2,           x0 ##     
517                 pxor x4 ## 2,           x2 ##     
518                 movdqa x1 ## 2,         x4 ##     
519                 psrld $1,               x1 ##     
520                 pslld $(32 - 1),        x4 ##     
521                 por x4 ## 2,            x1 ##     
522         movdqa x3 ## 1,         x4 ## 1; \        
523         psrld $7,               x3 ## 1; \        
524         pslld $(32 - 7),        x4 ## 1; \        
525         por x4 ## 1,            x3 ## 1; \        
526         pxor x0 ## 1,           x1 ## 1; \        
527         movdqa x0 ## 1,         x4 ## 1; \        
528         pslld $3,               x4 ## 1; \        
529         pxor x4 ## 1,           x3 ## 1; \        
530         movdqa x0 ## 1,         x4 ## 1; \        
531                 movdqa x3 ## 2,         x4 ##     
532                 psrld $7,               x3 ##     
533                 pslld $(32 - 7),        x4 ##     
534                 por x4 ## 2,            x3 ##     
535                 pxor x0 ## 2,           x1 ##     
536                 movdqa x0 ## 2,         x4 ##     
537                 pslld $3,               x4 ##     
538                 pxor x4 ## 2,           x3 ##     
539                 movdqa x0 ## 2,         x4 ##     
540         psrld $13,              x0 ## 1; \        
541         pslld $(32 - 13),       x4 ## 1; \        
542         por x4 ## 1,            x0 ## 1; \        
543         pxor x2 ## 1,           x1 ## 1; \        
544         pxor x2 ## 1,           x3 ## 1; \        
545         movdqa x2 ## 1,         x4 ## 1; \        
546         psrld $3,               x2 ## 1; \        
547         pslld $(32 - 3),        x4 ## 1; \        
548         por x4 ## 1,            x2 ## 1; \        
549                 psrld $13,              x0 ##     
550                 pslld $(32 - 13),       x4 ##     
551                 por x4 ## 2,            x0 ##     
552                 pxor x2 ## 2,           x1 ##     
553                 pxor x2 ## 2,           x3 ##     
554                 movdqa x2 ## 2,         x4 ##     
555                 psrld $3,               x2 ##     
556                 pslld $(32 - 3),        x4 ##     
557                 por x4 ## 2,            x2 ##     
558                                                   
559 #define S(SBOX, x0, x1, x2, x3, x4) \             
560         SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1,     
561         SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1,     
562         SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2,     
563         SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2,     
564                                                   
565 #define SP(SBOX, x0, x1, x2, x3, x4, i) \         
566         get_key(i, 0, RK0); \                     
567         SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1,     
568         get_key(i, 2, RK2); \                     
569         SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2,     
570         get_key(i, 3, RK3); \                     
571         SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1,     
572         get_key(i, 1, RK1); \                     
573         SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2,     
574                                                   
575 #define transpose_4x4(x0, x1, x2, x3, t0, t1,     
576         movdqa x0,              t2; \             
577         punpckldq x1,           x0; \             
578         punpckhdq x1,           t2; \             
579         movdqa x2,              t1; \             
580         punpckhdq x3,           x2; \             
581         punpckldq x3,           t1; \             
582         movdqa x0,              x1; \             
583         punpcklqdq t1,          x0; \             
584         punpckhqdq t1,          x1; \             
585         movdqa t2,              x3; \             
586         punpcklqdq x2,          t2; \             
587         punpckhqdq x2,          x3; \             
588         movdqa t2,              x2;               
589                                                   
590 #define read_blocks(in, x0, x1, x2, x3, t0, t1    
591         movdqu (0*4*4)(in),     x0; \             
592         movdqu (1*4*4)(in),     x1; \             
593         movdqu (2*4*4)(in),     x2; \             
594         movdqu (3*4*4)(in),     x3; \             
595         \                                         
596         transpose_4x4(x0, x1, x2, x3, t0, t1,     
597                                                   
598 #define write_blocks(out, x0, x1, x2, x3, t0,     
599         transpose_4x4(x0, x1, x2, x3, t0, t1,     
600         \                                         
601         movdqu x0,              (0*4*4)(out);     
602         movdqu x1,              (1*4*4)(out);     
603         movdqu x2,              (2*4*4)(out);     
604         movdqu x3,              (3*4*4)(out);     
605                                                   
606 #define xor_blocks(out, x0, x1, x2, x3, t0, t1    
607         transpose_4x4(x0, x1, x2, x3, t0, t1,     
608         \                                         
609         movdqu (0*4*4)(out),    t0; \             
610         pxor t0,                x0; \             
611         movdqu x0,              (0*4*4)(out);     
612         movdqu (1*4*4)(out),    t0; \             
613         pxor t0,                x1; \             
614         movdqu x1,              (1*4*4)(out);     
615         movdqu (2*4*4)(out),    t0; \             
616         pxor t0,                x2; \             
617         movdqu x2,              (2*4*4)(out);     
618         movdqu (3*4*4)(out),    t0; \             
619         pxor t0,                x3; \             
620         movdqu x3,              (3*4*4)(out);     
621                                                   
622 SYM_FUNC_START(__serpent_enc_blk_8way)            
623         /* input:                                 
624          *      %rdi: ctx, CTX                    
625          *      %rsi: dst                         
626          *      %rdx: src                         
627          *      %rcx: bool, if true: xor outpu    
628          */                                       
629                                                   
630         pcmpeqd RNOT, RNOT;                       
631                                                   
632         leaq (4*4*4)(%rdx), %rax;                 
633         read_blocks(%rdx, RA1, RB1, RC1, RD1,     
634         read_blocks(%rax, RA2, RB2, RC2, RD2,     
635                                                   
636                                                   
637         S(S0, RA, RB, RC, RD, RE);                
638         S(S1, RC, RB, RD, RA, RE);                
639         S(S2, RE, RD, RA, RC, RB);                
640         S(S3, RB, RD, RE, RC, RA);                
641         S(S4, RC, RA, RD, RB, RE);                
642         S(S5, RA, RD, RB, RE, RC);                
643         S(S6, RC, RA, RD, RE, RB);                
644         S(S7, RD, RB, RA, RE, RC);                
645         S(S0, RC, RA, RE, RD, RB);                
646         S(S1, RE, RA, RD, RC, RB);                
647         S(S2, RB, RD, RC, RE, RA);                
648         S(S3, RA, RD, RB, RE, RC);                
649         S(S4, RE, RC, RD, RA, RB);                
650         S(S5, RC, RD, RA, RB, RE);                
651         S(S6, RE, RC, RD, RB, RA);                
652         S(S7, RD, RA, RC, RB, RE);                
653         S(S0, RE, RC, RB, RD, RA);                
654         S(S1, RB, RC, RD, RE, RA);                
655         S(S2, RA, RD, RE, RB, RC);                
656         S(S3, RC, RD, RA, RB, RE);                
657         S(S4, RB, RE, RD, RC, RA);                
658         S(S5, RE, RD, RC, RA, RB);                
659         S(S6, RB, RE, RD, RA, RC);                
660         S(S7, RD, RC, RE, RA, RB);                
661         S(S0, RB, RE, RA, RD, RC);                
662         S(S1, RA, RE, RD, RB, RC);                
663         S(S2, RC, RD, RB, RA, RE);                
664         S(S3, RE, RD, RC, RA, RB);                
665         S(S4, RA, RB, RD, RE, RC);                
666         S(S5, RB, RD, RE, RC, RA);                
667         S(S6, RA, RB, RD, RC, RE);                
668         S(S7, RD, RE, RB, RC, RA);                
669                                                   
670         leaq (4*4*4)(%rsi), %rax;                 
671                                                   
672         testb %cl, %cl;                           
673         jnz .L__enc_xor8;                         
674                                                   
675         write_blocks(%rsi, RA1, RB1, RC1, RD1,    
676         write_blocks(%rax, RA2, RB2, RC2, RD2,    
677                                                   
678         RET;                                      
679                                                   
680 .L__enc_xor8:                                     
681         xor_blocks(%rsi, RA1, RB1, RC1, RD1, R    
682         xor_blocks(%rax, RA2, RB2, RC2, RD2, R    
683                                                   
684         RET;                                      
685 SYM_FUNC_END(__serpent_enc_blk_8way)              
686                                                   
687 SYM_FUNC_START(serpent_dec_blk_8way)              
688         /* input:                                 
689          *      %rdi: ctx, CTX                    
690          *      %rsi: dst                         
691          *      %rdx: src                         
692          */                                       
693                                                   
694         pcmpeqd RNOT, RNOT;                       
695                                                   
696         leaq (4*4*4)(%rdx), %rax;                 
697         read_blocks(%rdx, RA1, RB1, RC1, RD1,     
698         read_blocks(%rax, RA2, RB2, RC2, RD2,     
699                                                   
700                                                   
701         SP(SI7, RA, RB, RC, RD, RE, 31);          
702         SP(SI6, RB, RD, RA, RE, RC, 30);          
703         SP(SI5, RA, RC, RE, RB, RD, 29);          
704         SP(SI4, RC, RD, RA, RE, RB, 28);          
705         SP(SI3, RC, RA, RB, RE, RD, 27);          
706         SP(SI2, RB, RC, RD, RE, RA, 26);          
707         SP(SI1, RC, RA, RE, RD, RB, 25);          
708         SP(SI0, RB, RA, RE, RD, RC, 24);          
709         SP(SI7, RE, RC, RA, RB, RD, 23);          
710         SP(SI6, RC, RB, RE, RD, RA, 22);          
711         SP(SI5, RE, RA, RD, RC, RB, 21);          
712         SP(SI4, RA, RB, RE, RD, RC, 20);          
713         SP(SI3, RA, RE, RC, RD, RB, 19);          
714         SP(SI2, RC, RA, RB, RD, RE, 18);          
715         SP(SI1, RA, RE, RD, RB, RC, 17);          
716         SP(SI0, RC, RE, RD, RB, RA, 16);          
717         SP(SI7, RD, RA, RE, RC, RB, 15);          
718         SP(SI6, RA, RC, RD, RB, RE, 14);          
719         SP(SI5, RD, RE, RB, RA, RC, 13);          
720         SP(SI4, RE, RC, RD, RB, RA, 12);          
721         SP(SI3, RE, RD, RA, RB, RC, 11);          
722         SP(SI2, RA, RE, RC, RB, RD, 10);          
723         SP(SI1, RE, RD, RB, RC, RA, 9);           
724         SP(SI0, RA, RD, RB, RC, RE, 8);           
725         SP(SI7, RB, RE, RD, RA, RC, 7);           
726         SP(SI6, RE, RA, RB, RC, RD, 6);           
727         SP(SI5, RB, RD, RC, RE, RA, 5);           
728         SP(SI4, RD, RA, RB, RC, RE, 4);           
729         SP(SI3, RD, RB, RE, RC, RA, 3);           
730         SP(SI2, RE, RD, RA, RC, RB, 2);           
731         SP(SI1, RD, RB, RC, RA, RE, 1);           
732         S(SI0, RE, RB, RC, RA, RD);               
733                                                   
734         leaq (4*4*4)(%rsi), %rax;                 
735         write_blocks(%rsi, RC1, RD1, RB1, RE1,    
736         write_blocks(%rax, RC2, RD2, RB2, RE2,    
737                                                   
738         RET;                                      
739 SYM_FUNC_END(serpent_dec_blk_8way)                
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php