~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/crypto/blowfish-x86_64-asm_64.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0-or-later */
  2 /*
  3  * Blowfish Cipher Algorithm (x86_64)
  4  *
  5  * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
  6  */
  7 
  8 #include <linux/linkage.h>
  9 
 10 .file "blowfish-x86_64-asm.S"
 11 .text
 12 
 13 /* structure of crypto context */
 14 #define p       0
 15 #define s0      ((16 + 2) * 4)
 16 #define s1      ((16 + 2 + (1 * 256)) * 4)
 17 #define s2      ((16 + 2 + (2 * 256)) * 4)
 18 #define s3      ((16 + 2 + (3 * 256)) * 4)
 19 
 20 /* register macros */
 21 #define CTX %r12
 22 #define RIO %rsi
 23 
 24 #define RX0 %rax
 25 #define RX1 %rbx
 26 #define RX2 %rcx
 27 #define RX3 %rdx
 28 
 29 #define RX0d %eax
 30 #define RX1d %ebx
 31 #define RX2d %ecx
 32 #define RX3d %edx
 33 
 34 #define RX0bl %al
 35 #define RX1bl %bl
 36 #define RX2bl %cl
 37 #define RX3bl %dl
 38 
 39 #define RX0bh %ah
 40 #define RX1bh %bh
 41 #define RX2bh %ch
 42 #define RX3bh %dh
 43 
 44 #define RT0 %rdi
 45 #define RT1 %rsi
 46 #define RT2 %r8
 47 #define RT3 %r9
 48 
 49 #define RT0d %edi
 50 #define RT1d %esi
 51 #define RT2d %r8d
 52 #define RT3d %r9d
 53 
 54 #define RKEY %r10
 55 
 56 /***********************************************************************
 57  * 1-way blowfish
 58  ***********************************************************************/
 59 #define F() \
 60         rorq $16,               RX0; \
 61         movzbl RX0bh,           RT0d; \
 62         movzbl RX0bl,           RT1d; \
 63         rolq $16,               RX0; \
 64         movl s0(CTX,RT0,4),     RT0d; \
 65         addl s1(CTX,RT1,4),     RT0d; \
 66         movzbl RX0bh,           RT1d; \
 67         movzbl RX0bl,           RT2d; \
 68         rolq $32,               RX0; \
 69         xorl s2(CTX,RT1,4),     RT0d; \
 70         addl s3(CTX,RT2,4),     RT0d; \
 71         xorq RT0,               RX0;
 72 
 73 #define add_roundkey_enc(n) \
 74         xorq p+4*(n)(CTX),      RX0;
 75 
 76 #define round_enc(n) \
 77         add_roundkey_enc(n); \
 78         \
 79         F(); \
 80         F();
 81 
 82 #define add_roundkey_dec(n) \
 83         movq p+4*(n-1)(CTX),    RT0; \
 84         rorq $32,               RT0; \
 85         xorq RT0,               RX0;
 86 
 87 #define round_dec(n) \
 88         add_roundkey_dec(n); \
 89         \
 90         F(); \
 91         F(); \
 92 
 93 #define read_block() \
 94         movq (RIO),             RX0; \
 95         rorq $32,               RX0; \
 96         bswapq                  RX0;
 97 
 98 #define write_block() \
 99         bswapq                  RX0; \
100         movq RX0,               (RIO);
101 
102 SYM_FUNC_START(blowfish_enc_blk)
103         /* input:
104          *      %rdi: ctx
105          *      %rsi: dst
106          *      %rdx: src
107          */
108         movq %r12, %r11;
109 
110         movq %rdi, CTX;
111         movq %rsi, %r10;
112         movq %rdx, RIO;
113 
114         read_block();
115 
116         round_enc(0);
117         round_enc(2);
118         round_enc(4);
119         round_enc(6);
120         round_enc(8);
121         round_enc(10);
122         round_enc(12);
123         round_enc(14);
124         add_roundkey_enc(16);
125 
126         movq %r11, %r12;
127         movq %r10, RIO;
128 
129         write_block();
130         RET;
131 SYM_FUNC_END(blowfish_enc_blk)
132 
133 SYM_FUNC_START(blowfish_dec_blk)
134         /* input:
135          *      %rdi: ctx
136          *      %rsi: dst
137          *      %rdx: src
138          */
139         movq %r12, %r11;
140 
141         movq %rdi, CTX;
142         movq %rsi, %r10;
143         movq %rdx, RIO;
144 
145         read_block();
146 
147         round_dec(17);
148         round_dec(15);
149         round_dec(13);
150         round_dec(11);
151         round_dec(9);
152         round_dec(7);
153         round_dec(5);
154         round_dec(3);
155         add_roundkey_dec(1);
156 
157         movq %r10, RIO;
158         write_block();
159 
160         movq %r11, %r12;
161 
162         RET;
163 SYM_FUNC_END(blowfish_dec_blk)
164 
165 /**********************************************************************
166   4-way blowfish, four blocks parallel
167  **********************************************************************/
168 
169 /* F() for 4-way. Slower when used alone/1-way, but faster when used
170  * parallel/4-way (tested on AMD Phenom II & Intel Xeon E7330).
171  */
172 #define F4(x) \
173         movzbl x ## bh,         RT1d; \
174         movzbl x ## bl,         RT3d; \
175         rorq $16,               x; \
176         movzbl x ## bh,         RT0d; \
177         movzbl x ## bl,         RT2d; \
178         rorq $16,               x; \
179         movl s0(CTX,RT0,4),     RT0d; \
180         addl s1(CTX,RT2,4),     RT0d; \
181         xorl s2(CTX,RT1,4),     RT0d; \
182         addl s3(CTX,RT3,4),     RT0d; \
183         xorq RT0,               x;
184 
185 #define add_preloaded_roundkey4() \
186         xorq RKEY,              RX0; \
187         xorq RKEY,              RX1; \
188         xorq RKEY,              RX2; \
189         xorq RKEY,              RX3;
190 
191 #define preload_roundkey_enc(n) \
192         movq p+4*(n)(CTX),      RKEY;
193 
194 #define add_roundkey_enc4(n) \
195         add_preloaded_roundkey4(); \
196         preload_roundkey_enc(n + 2);
197 
198 #define round_enc4(n) \
199         add_roundkey_enc4(n); \
200         \
201         F4(RX0); \
202         F4(RX1); \
203         F4(RX2); \
204         F4(RX3); \
205         \
206         F4(RX0); \
207         F4(RX1); \
208         F4(RX2); \
209         F4(RX3);
210 
211 #define preload_roundkey_dec(n) \
212         movq p+4*((n)-1)(CTX),  RKEY; \
213         rorq $32,               RKEY;
214 
215 #define add_roundkey_dec4(n) \
216         add_preloaded_roundkey4(); \
217         preload_roundkey_dec(n - 2);
218 
219 #define round_dec4(n) \
220         add_roundkey_dec4(n); \
221         \
222         F4(RX0); \
223         F4(RX1); \
224         F4(RX2); \
225         F4(RX3); \
226         \
227         F4(RX0); \
228         F4(RX1); \
229         F4(RX2); \
230         F4(RX3);
231 
232 #define read_block4() \
233         movq (RIO),             RX0; \
234         rorq $32,               RX0; \
235         bswapq                  RX0; \
236         \
237         movq 8(RIO),            RX1; \
238         rorq $32,               RX1; \
239         bswapq                  RX1; \
240         \
241         movq 16(RIO),           RX2; \
242         rorq $32,               RX2; \
243         bswapq                  RX2; \
244         \
245         movq 24(RIO),           RX3; \
246         rorq $32,               RX3; \
247         bswapq                  RX3;
248 
249 #define write_block4() \
250         bswapq                  RX0; \
251         movq RX0,               (RIO); \
252         \
253         bswapq                  RX1; \
254         movq RX1,               8(RIO); \
255         \
256         bswapq                  RX2; \
257         movq RX2,               16(RIO); \
258         \
259         bswapq                  RX3; \
260         movq RX3,               24(RIO);
261 
262 #define xor_block4() \
263         movq (RIO),             RT0; \
264         bswapq                  RT0; \
265         xorq RT0,               RX1; \
266         \
267         movq 8(RIO),            RT2; \
268         bswapq                  RT2; \
269         xorq RT2,               RX2; \
270         \
271         movq 16(RIO),           RT3; \
272         bswapq                  RT3; \
273         xorq RT3,               RX3;
274 
275 SYM_FUNC_START(blowfish_enc_blk_4way)
276         /* input:
277          *      %rdi: ctx
278          *      %rsi: dst
279          *      %rdx: src
280          */
281         pushq %r12;
282         pushq %rbx;
283 
284         movq %rdi, CTX
285         movq %rsi, %r11;
286         movq %rdx, RIO;
287 
288         preload_roundkey_enc(0);
289 
290         read_block4();
291 
292         round_enc4(0);
293         round_enc4(2);
294         round_enc4(4);
295         round_enc4(6);
296         round_enc4(8);
297         round_enc4(10);
298         round_enc4(12);
299         round_enc4(14);
300         add_preloaded_roundkey4();
301 
302         movq %r11, RIO;
303         write_block4();
304 
305         popq %rbx;
306         popq %r12;
307         RET;
308 SYM_FUNC_END(blowfish_enc_blk_4way)
309 
310 SYM_FUNC_START(__blowfish_dec_blk_4way)
311         /* input:
312          *      %rdi: ctx
313          *      %rsi: dst
314          *      %rdx: src
315          *      %rcx: cbc (bool)
316          */
317         pushq %r12;
318         pushq %rbx;
319         pushq %rcx;
320         pushq %rdx;
321 
322         movq %rdi, CTX;
323         movq %rsi, %r11;
324         movq %rdx, RIO;
325 
326         preload_roundkey_dec(17);
327         read_block4();
328 
329         round_dec4(17);
330         round_dec4(15);
331         round_dec4(13);
332         round_dec4(11);
333         round_dec4(9);
334         round_dec4(7);
335         round_dec4(5);
336         round_dec4(3);
337         add_preloaded_roundkey4();
338 
339         popq RIO;
340         popq %r12;
341         testq %r12, %r12;
342         jz .L_no_cbc_xor;
343 
344         xor_block4();
345 
346 .L_no_cbc_xor:
347         movq %r11, RIO;
348         write_block4();
349 
350         popq %rbx;
351         popq %r12;
352 
353         RET;
354 SYM_FUNC_END(__blowfish_dec_blk_4way)

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php