~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/crypto/blake2s-core.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0 OR MIT */
  2 /*
  3  * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
  4  * Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
  5  */
  6 
  7 #include <linux/linkage.h>
  8 
  9 .section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32
 10 .align 32
 11 IV:     .octa 0xA54FF53A3C6EF372BB67AE856A09E667
 12         .octa 0x5BE0CD191F83D9AB9B05688C510E527F
 13 .section .rodata.cst16.ROT16, "aM", @progbits, 16
 14 .align 16
 15 ROT16:  .octa 0x0D0C0F0E09080B0A0504070601000302
 16 .section .rodata.cst16.ROR328, "aM", @progbits, 16
 17 .align 16
 18 ROR328: .octa 0x0C0F0E0D080B0A090407060500030201
 19 .section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160
 20 .align 64
 21 SIGMA:
 22 .byte  0,  2,  4,  6,  1,  3,  5,  7, 14,  8, 10, 12, 15,  9, 11, 13
 23 .byte 14,  4,  9, 13, 10,  8, 15,  6,  5,  1,  0, 11,  3, 12,  2,  7
 24 .byte 11, 12,  5, 15,  8,  0,  2, 13,  9, 10,  3,  7,  4, 14,  6,  1
 25 .byte  7,  3, 13, 11,  9,  1, 12, 14, 15,  2,  5,  4,  8,  6, 10,  0
 26 .byte  9,  5,  2, 10,  0,  7,  4, 15,  3, 14, 11,  6, 13,  1, 12,  8
 27 .byte  2,  6,  0,  8, 12, 10, 11,  3,  1,  4,  7, 15,  9, 13,  5, 14
 28 .byte 12,  1, 14,  4,  5, 15, 13, 10,  8,  0,  6,  9, 11,  7,  3,  2
 29 .byte 13,  7, 12,  3, 11, 14,  1,  9,  2,  5, 15,  8, 10,  0,  4,  6
 30 .byte  6, 14, 11,  0, 15,  9,  3,  8, 10, 12, 13,  1,  5,  2,  7,  4
 31 .byte 10,  8,  7,  1,  2,  4,  6,  5, 13, 15,  9,  3,  0, 11, 14, 12
 32 #ifdef CONFIG_AS_AVX512
 33 .section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640
 34 .align 64
 35 SIGMA2:
 36 .long  0,  2,  4,  6,  1,  3,  5,  7, 14,  8, 10, 12, 15,  9, 11, 13
 37 .long  8,  2, 13, 15, 10,  9, 12,  3,  6,  4,  0, 14,  5, 11,  1,  7
 38 .long 11, 13,  8,  6,  5, 10, 14,  3,  2,  4, 12, 15,  1,  0,  7,  9
 39 .long 11, 10,  7,  0,  8, 15,  1, 13,  3,  6,  2, 12,  4, 14,  9,  5
 40 .long  4, 10,  9, 14, 15,  0, 11,  8,  1,  7,  3, 13,  2,  5,  6, 12
 41 .long  2, 11,  4, 15, 14,  3, 10,  8, 13,  6,  5,  7,  0, 12,  1,  9
 42 .long  4,  8, 15,  9, 14, 11, 13,  5,  3,  2,  1, 12,  6, 10,  7,  0
 43 .long  6, 13,  0, 14, 12,  2,  1, 11, 15,  4,  5,  8,  7,  9,  3, 10
 44 .long 15,  5,  4, 13, 10,  7,  3, 11, 12,  2,  0,  6,  9,  8,  1, 14
 45 .long  8,  7, 14, 11, 13, 15,  0, 12, 10,  4,  5,  6,  3,  2,  1,  9
 46 #endif /* CONFIG_AS_AVX512 */
 47 
 48 .text
 49 SYM_FUNC_START(blake2s_compress_ssse3)
 50         testq           %rdx,%rdx
 51         je              .Lendofloop
 52         movdqu          (%rdi),%xmm0
 53         movdqu          0x10(%rdi),%xmm1
 54         movdqa          ROT16(%rip),%xmm12
 55         movdqa          ROR328(%rip),%xmm13
 56         movdqu          0x20(%rdi),%xmm14
 57         movq            %rcx,%xmm15
 58         leaq            SIGMA+0xa0(%rip),%r8
 59         jmp             .Lbeginofloop
 60         .align          32
 61 .Lbeginofloop:
 62         movdqa          %xmm0,%xmm10
 63         movdqa          %xmm1,%xmm11
 64         paddq           %xmm15,%xmm14
 65         movdqa          IV(%rip),%xmm2
 66         movdqa          %xmm14,%xmm3
 67         pxor            IV+0x10(%rip),%xmm3
 68         leaq            SIGMA(%rip),%rcx
 69 .Lroundloop:
 70         movzbl          (%rcx),%eax
 71         movd            (%rsi,%rax,4),%xmm4
 72         movzbl          0x1(%rcx),%eax
 73         movd            (%rsi,%rax,4),%xmm5
 74         movzbl          0x2(%rcx),%eax
 75         movd            (%rsi,%rax,4),%xmm6
 76         movzbl          0x3(%rcx),%eax
 77         movd            (%rsi,%rax,4),%xmm7
 78         punpckldq       %xmm5,%xmm4
 79         punpckldq       %xmm7,%xmm6
 80         punpcklqdq      %xmm6,%xmm4
 81         paddd           %xmm4,%xmm0
 82         paddd           %xmm1,%xmm0
 83         pxor            %xmm0,%xmm3
 84         pshufb          %xmm12,%xmm3
 85         paddd           %xmm3,%xmm2
 86         pxor            %xmm2,%xmm1
 87         movdqa          %xmm1,%xmm8
 88         psrld           $0xc,%xmm1
 89         pslld           $0x14,%xmm8
 90         por             %xmm8,%xmm1
 91         movzbl          0x4(%rcx),%eax
 92         movd            (%rsi,%rax,4),%xmm5
 93         movzbl          0x5(%rcx),%eax
 94         movd            (%rsi,%rax,4),%xmm6
 95         movzbl          0x6(%rcx),%eax
 96         movd            (%rsi,%rax,4),%xmm7
 97         movzbl          0x7(%rcx),%eax
 98         movd            (%rsi,%rax,4),%xmm4
 99         punpckldq       %xmm6,%xmm5
100         punpckldq       %xmm4,%xmm7
101         punpcklqdq      %xmm7,%xmm5
102         paddd           %xmm5,%xmm0
103         paddd           %xmm1,%xmm0
104         pxor            %xmm0,%xmm3
105         pshufb          %xmm13,%xmm3
106         paddd           %xmm3,%xmm2
107         pxor            %xmm2,%xmm1
108         movdqa          %xmm1,%xmm8
109         psrld           $0x7,%xmm1
110         pslld           $0x19,%xmm8
111         por             %xmm8,%xmm1
112         pshufd          $0x93,%xmm0,%xmm0
113         pshufd          $0x4e,%xmm3,%xmm3
114         pshufd          $0x39,%xmm2,%xmm2
115         movzbl          0x8(%rcx),%eax
116         movd            (%rsi,%rax,4),%xmm6
117         movzbl          0x9(%rcx),%eax
118         movd            (%rsi,%rax,4),%xmm7
119         movzbl          0xa(%rcx),%eax
120         movd            (%rsi,%rax,4),%xmm4
121         movzbl          0xb(%rcx),%eax
122         movd            (%rsi,%rax,4),%xmm5
123         punpckldq       %xmm7,%xmm6
124         punpckldq       %xmm5,%xmm4
125         punpcklqdq      %xmm4,%xmm6
126         paddd           %xmm6,%xmm0
127         paddd           %xmm1,%xmm0
128         pxor            %xmm0,%xmm3
129         pshufb          %xmm12,%xmm3
130         paddd           %xmm3,%xmm2
131         pxor            %xmm2,%xmm1
132         movdqa          %xmm1,%xmm8
133         psrld           $0xc,%xmm1
134         pslld           $0x14,%xmm8
135         por             %xmm8,%xmm1
136         movzbl          0xc(%rcx),%eax
137         movd            (%rsi,%rax,4),%xmm7
138         movzbl          0xd(%rcx),%eax
139         movd            (%rsi,%rax,4),%xmm4
140         movzbl          0xe(%rcx),%eax
141         movd            (%rsi,%rax,4),%xmm5
142         movzbl          0xf(%rcx),%eax
143         movd            (%rsi,%rax,4),%xmm6
144         punpckldq       %xmm4,%xmm7
145         punpckldq       %xmm6,%xmm5
146         punpcklqdq      %xmm5,%xmm7
147         paddd           %xmm7,%xmm0
148         paddd           %xmm1,%xmm0
149         pxor            %xmm0,%xmm3
150         pshufb          %xmm13,%xmm3
151         paddd           %xmm3,%xmm2
152         pxor            %xmm2,%xmm1
153         movdqa          %xmm1,%xmm8
154         psrld           $0x7,%xmm1
155         pslld           $0x19,%xmm8
156         por             %xmm8,%xmm1
157         pshufd          $0x39,%xmm0,%xmm0
158         pshufd          $0x4e,%xmm3,%xmm3
159         pshufd          $0x93,%xmm2,%xmm2
160         addq            $0x10,%rcx
161         cmpq            %r8,%rcx
162         jnz             .Lroundloop
163         pxor            %xmm2,%xmm0
164         pxor            %xmm3,%xmm1
165         pxor            %xmm10,%xmm0
166         pxor            %xmm11,%xmm1
167         addq            $0x40,%rsi
168         decq            %rdx
169         jnz             .Lbeginofloop
170         movdqu          %xmm0,(%rdi)
171         movdqu          %xmm1,0x10(%rdi)
172         movdqu          %xmm14,0x20(%rdi)
173 .Lendofloop:
174         RET
175 SYM_FUNC_END(blake2s_compress_ssse3)
176 
177 #ifdef CONFIG_AS_AVX512
178 SYM_FUNC_START(blake2s_compress_avx512)
179         vmovdqu         (%rdi),%xmm0
180         vmovdqu         0x10(%rdi),%xmm1
181         vmovdqu         0x20(%rdi),%xmm4
182         vmovq           %rcx,%xmm5
183         vmovdqa         IV(%rip),%xmm14
184         vmovdqa         IV+16(%rip),%xmm15
185         jmp             .Lblake2s_compress_avx512_mainloop
186 .align 32
187 .Lblake2s_compress_avx512_mainloop:
188         vmovdqa         %xmm0,%xmm10
189         vmovdqa         %xmm1,%xmm11
190         vpaddq          %xmm5,%xmm4,%xmm4
191         vmovdqa         %xmm14,%xmm2
192         vpxor           %xmm15,%xmm4,%xmm3
193         vmovdqu         (%rsi),%ymm6
194         vmovdqu         0x20(%rsi),%ymm7
195         addq            $0x40,%rsi
196         leaq            SIGMA2(%rip),%rax
197         movb            $0xa,%cl
198 .Lblake2s_compress_avx512_roundloop:
199         addq            $0x40,%rax
200         vmovdqa         -0x40(%rax),%ymm8
201         vmovdqa         -0x20(%rax),%ymm9
202         vpermi2d        %ymm7,%ymm6,%ymm8
203         vpermi2d        %ymm7,%ymm6,%ymm9
204         vmovdqa         %ymm8,%ymm6
205         vmovdqa         %ymm9,%ymm7
206         vpaddd          %xmm8,%xmm0,%xmm0
207         vpaddd          %xmm1,%xmm0,%xmm0
208         vpxor           %xmm0,%xmm3,%xmm3
209         vprord          $0x10,%xmm3,%xmm3
210         vpaddd          %xmm3,%xmm2,%xmm2
211         vpxor           %xmm2,%xmm1,%xmm1
212         vprord          $0xc,%xmm1,%xmm1
213         vextracti128    $0x1,%ymm8,%xmm8
214         vpaddd          %xmm8,%xmm0,%xmm0
215         vpaddd          %xmm1,%xmm0,%xmm0
216         vpxor           %xmm0,%xmm3,%xmm3
217         vprord          $0x8,%xmm3,%xmm3
218         vpaddd          %xmm3,%xmm2,%xmm2
219         vpxor           %xmm2,%xmm1,%xmm1
220         vprord          $0x7,%xmm1,%xmm1
221         vpshufd         $0x93,%xmm0,%xmm0
222         vpshufd         $0x4e,%xmm3,%xmm3
223         vpshufd         $0x39,%xmm2,%xmm2
224         vpaddd          %xmm9,%xmm0,%xmm0
225         vpaddd          %xmm1,%xmm0,%xmm0
226         vpxor           %xmm0,%xmm3,%xmm3
227         vprord          $0x10,%xmm3,%xmm3
228         vpaddd          %xmm3,%xmm2,%xmm2
229         vpxor           %xmm2,%xmm1,%xmm1
230         vprord          $0xc,%xmm1,%xmm1
231         vextracti128    $0x1,%ymm9,%xmm9
232         vpaddd          %xmm9,%xmm0,%xmm0
233         vpaddd          %xmm1,%xmm0,%xmm0
234         vpxor           %xmm0,%xmm3,%xmm3
235         vprord          $0x8,%xmm3,%xmm3
236         vpaddd          %xmm3,%xmm2,%xmm2
237         vpxor           %xmm2,%xmm1,%xmm1
238         vprord          $0x7,%xmm1,%xmm1
239         vpshufd         $0x39,%xmm0,%xmm0
240         vpshufd         $0x4e,%xmm3,%xmm3
241         vpshufd         $0x93,%xmm2,%xmm2
242         decb            %cl
243         jne             .Lblake2s_compress_avx512_roundloop
244         vpxor           %xmm10,%xmm0,%xmm0
245         vpxor           %xmm11,%xmm1,%xmm1
246         vpxor           %xmm2,%xmm0,%xmm0
247         vpxor           %xmm3,%xmm1,%xmm1
248         decq            %rdx
249         jne             .Lblake2s_compress_avx512_mainloop
250         vmovdqu         %xmm0,(%rdi)
251         vmovdqu         %xmm1,0x10(%rdi)
252         vmovdqu         %xmm4,0x20(%rdi)
253         vzeroupper
254         RET
255 SYM_FUNC_END(blake2s_compress_avx512)
256 #endif /* CONFIG_AS_AVX512 */

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php