~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/lib/raid6/avx2.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 /* -*- linux-c -*- ------------------------------------------------------- *
  3  *
  4  *   Copyright (C) 2012 Intel Corporation
  5  *   Author: Yuanhan Liu <yuanhan.liu@linux.intel.com>
  6  *
  7  *   Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
  8  *
  9  * ----------------------------------------------------------------------- */
 10 
 11 /*
 12  * AVX2 implementation of RAID-6 syndrome functions
 13  *
 14  */
 15 
 16 #include <linux/raid/pq.h>
 17 #include "x86.h"
 18 
 19 static const struct raid6_avx2_constants {
 20         u64 x1d[4];
 21 } raid6_avx2_constants __aligned(32) = {
 22         { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
 23           0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
 24 };
 25 
 26 static int raid6_have_avx2(void)
 27 {
 28         return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX);
 29 }
 30 
 31 /*
 32  * Plain AVX2 implementation
 33  */
 34 static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
 35 {
 36         u8 **dptr = (u8 **)ptrs;
 37         u8 *p, *q;
 38         int d, z, z0;
 39 
 40         z0 = disks - 3;         /* Highest data disk */
 41         p = dptr[z0+1];         /* XOR parity */
 42         q = dptr[z0+2];         /* RS syndrome */
 43 
 44         kernel_fpu_begin();
 45 
 46         asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
 47         asm volatile("vpxor %ymm3,%ymm3,%ymm3");        /* Zero temp */
 48 
 49         for (d = 0; d < bytes; d += 32) {
 50                 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
 51                 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
 52                 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
 53                 asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */
 54                 asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d]));
 55                 for (z = z0-2; z >= 0; z--) {
 56                         asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
 57                         asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
 58                         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 59                         asm volatile("vpand %ymm0,%ymm5,%ymm5");
 60                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 61                         asm volatile("vpxor %ymm6,%ymm2,%ymm2");
 62                         asm volatile("vpxor %ymm6,%ymm4,%ymm4");
 63                         asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d]));
 64                 }
 65                 asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
 66                 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 67                 asm volatile("vpand %ymm0,%ymm5,%ymm5");
 68                 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 69                 asm volatile("vpxor %ymm6,%ymm2,%ymm2");
 70                 asm volatile("vpxor %ymm6,%ymm4,%ymm4");
 71 
 72                 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
 73                 asm volatile("vpxor %ymm2,%ymm2,%ymm2");
 74                 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
 75                 asm volatile("vpxor %ymm4,%ymm4,%ymm4");
 76         }
 77 
 78         asm volatile("sfence" : : : "memory");
 79         kernel_fpu_end();
 80 }
 81 
 82 static void raid6_avx21_xor_syndrome(int disks, int start, int stop,
 83                                      size_t bytes, void **ptrs)
 84 {
 85         u8 **dptr = (u8 **)ptrs;
 86         u8 *p, *q;
 87         int d, z, z0;
 88 
 89         z0 = stop;              /* P/Q right side optimization */
 90         p = dptr[disks-2];      /* XOR parity */
 91         q = dptr[disks-1];      /* RS syndrome */
 92 
 93         kernel_fpu_begin();
 94 
 95         asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
 96 
 97         for (d = 0 ; d < bytes ; d += 32) {
 98                 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
 99                 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
100                 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
101                 /* P/Q data pages */
102                 for (z = z0-1 ; z >= start ; z--) {
103                         asm volatile("vpxor %ymm5,%ymm5,%ymm5");
104                         asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
105                         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
106                         asm volatile("vpand %ymm0,%ymm5,%ymm5");
107                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
108                         asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
109                         asm volatile("vpxor %ymm5,%ymm2,%ymm2");
110                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
111                 }
112                 /* P/Q left side optimization */
113                 for (z = start-1 ; z >= 0 ; z--) {
114                         asm volatile("vpxor %ymm5,%ymm5,%ymm5");
115                         asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
116                         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
117                         asm volatile("vpand %ymm0,%ymm5,%ymm5");
118                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
119                 }
120                 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
121                 /* Don't use movntdq for r/w memory area < cache line */
122                 asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d]));
123                 asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d]));
124         }
125 
126         asm volatile("sfence" : : : "memory");
127         kernel_fpu_end();
128 }
129 
130 const struct raid6_calls raid6_avx2x1 = {
131         raid6_avx21_gen_syndrome,
132         raid6_avx21_xor_syndrome,
133         raid6_have_avx2,
134         "avx2x1",
135         .priority = 2           /* Prefer AVX2 over priority 1 (SSE2 and others) */
136 };
137 
138 /*
139  * Unrolled-by-2 AVX2 implementation
140  */
141 static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
142 {
143         u8 **dptr = (u8 **)ptrs;
144         u8 *p, *q;
145         int d, z, z0;
146 
147         z0 = disks - 3;         /* Highest data disk */
148         p = dptr[z0+1];         /* XOR parity */
149         q = dptr[z0+2];         /* RS syndrome */
150 
151         kernel_fpu_begin();
152 
153         asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
154         asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
155 
156         /* We uniformly assume a single prefetch covers at least 32 bytes */
157         for (d = 0; d < bytes; d += 64) {
158                 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
159                 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32]));
160                 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
161                 asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */
162                 asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */
163                 asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */
164                 for (z = z0-1; z >= 0; z--) {
165                         asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
166                         asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
167                         asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
168                         asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
169                         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
170                         asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
171                         asm volatile("vpand %ymm0,%ymm5,%ymm5");
172                         asm volatile("vpand %ymm0,%ymm7,%ymm7");
173                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
174                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
175                         asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
176                         asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
177                         asm volatile("vpxor %ymm5,%ymm2,%ymm2");
178                         asm volatile("vpxor %ymm7,%ymm3,%ymm3");
179                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
180                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
181                 }
182                 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
183                 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
184                 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
185                 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
186         }
187 
188         asm volatile("sfence" : : : "memory");
189         kernel_fpu_end();
190 }
191 
192 static void raid6_avx22_xor_syndrome(int disks, int start, int stop,
193                                      size_t bytes, void **ptrs)
194 {
195         u8 **dptr = (u8 **)ptrs;
196         u8 *p, *q;
197         int d, z, z0;
198 
199         z0 = stop;              /* P/Q right side optimization */
200         p = dptr[disks-2];      /* XOR parity */
201         q = dptr[disks-1];      /* RS syndrome */
202 
203         kernel_fpu_begin();
204 
205         asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
206 
207         for (d = 0 ; d < bytes ; d += 64) {
208                 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
209                 asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32]));
210                 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
211                 asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32]));
212                 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
213                 asm volatile("vpxor %ymm6,%ymm3,%ymm3");
214                 /* P/Q data pages */
215                 for (z = z0-1 ; z >= start ; z--) {
216                         asm volatile("vpxor %ymm5,%ymm5,%ymm5");
217                         asm volatile("vpxor %ymm7,%ymm7,%ymm7");
218                         asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
219                         asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
220                         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
221                         asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
222                         asm volatile("vpand %ymm0,%ymm5,%ymm5");
223                         asm volatile("vpand %ymm0,%ymm7,%ymm7");
224                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
225                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
226                         asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
227                         asm volatile("vmovdqa %0,%%ymm7"
228                                      :: "m" (dptr[z][d+32]));
229                         asm volatile("vpxor %ymm5,%ymm2,%ymm2");
230                         asm volatile("vpxor %ymm7,%ymm3,%ymm3");
231                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
232                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
233                 }
234                 /* P/Q left side optimization */
235                 for (z = start-1 ; z >= 0 ; z--) {
236                         asm volatile("vpxor %ymm5,%ymm5,%ymm5");
237                         asm volatile("vpxor %ymm7,%ymm7,%ymm7");
238                         asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
239                         asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
240                         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
241                         asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
242                         asm volatile("vpand %ymm0,%ymm5,%ymm5");
243                         asm volatile("vpand %ymm0,%ymm7,%ymm7");
244                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
245                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
246                 }
247                 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
248                 asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32]));
249                 /* Don't use movntdq for r/w memory area < cache line */
250                 asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d]));
251                 asm volatile("vmovdqa %%ymm6,%0" : "=m" (q[d+32]));
252                 asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d]));
253                 asm volatile("vmovdqa %%ymm3,%0" : "=m" (p[d+32]));
254         }
255 
256         asm volatile("sfence" : : : "memory");
257         kernel_fpu_end();
258 }
259 
260 const struct raid6_calls raid6_avx2x2 = {
261         raid6_avx22_gen_syndrome,
262         raid6_avx22_xor_syndrome,
263         raid6_have_avx2,
264         "avx2x2",
265         .priority = 2           /* Prefer AVX2 over priority 1 (SSE2 and others) */
266 };
267 
268 #ifdef CONFIG_X86_64
269 
270 /*
271  * Unrolled-by-4 AVX2 implementation
272  */
273 static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
274 {
275         u8 **dptr = (u8 **)ptrs;
276         u8 *p, *q;
277         int d, z, z0;
278 
279         z0 = disks - 3;         /* Highest data disk */
280         p = dptr[z0+1];         /* XOR parity */
281         q = dptr[z0+2];         /* RS syndrome */
282 
283         kernel_fpu_begin();
284 
285         asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
286         asm volatile("vpxor %ymm1,%ymm1,%ymm1");        /* Zero temp */
287         asm volatile("vpxor %ymm2,%ymm2,%ymm2");        /* P[0] */
288         asm volatile("vpxor %ymm3,%ymm3,%ymm3");        /* P[1] */
289         asm volatile("vpxor %ymm4,%ymm4,%ymm4");        /* Q[0] */
290         asm volatile("vpxor %ymm6,%ymm6,%ymm6");        /* Q[1] */
291         asm volatile("vpxor %ymm10,%ymm10,%ymm10");     /* P[2] */
292         asm volatile("vpxor %ymm11,%ymm11,%ymm11");     /* P[3] */
293         asm volatile("vpxor %ymm12,%ymm12,%ymm12");     /* Q[2] */
294         asm volatile("vpxor %ymm14,%ymm14,%ymm14");     /* Q[3] */
295 
296         for (d = 0; d < bytes; d += 128) {
297                 for (z = z0; z >= 0; z--) {
298                         asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
299                         asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
300                         asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64]));
301                         asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96]));
302                         asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
303                         asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
304                         asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13");
305                         asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15");
306                         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
307                         asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
308                         asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
309                         asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
310                         asm volatile("vpand %ymm0,%ymm5,%ymm5");
311                         asm volatile("vpand %ymm0,%ymm7,%ymm7");
312                         asm volatile("vpand %ymm0,%ymm13,%ymm13");
313                         asm volatile("vpand %ymm0,%ymm15,%ymm15");
314                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
315                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
316                         asm volatile("vpxor %ymm13,%ymm12,%ymm12");
317                         asm volatile("vpxor %ymm15,%ymm14,%ymm14");
318                         asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
319                         asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
320                         asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64]));
321                         asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96]));
322                         asm volatile("vpxor %ymm5,%ymm2,%ymm2");
323                         asm volatile("vpxor %ymm7,%ymm3,%ymm3");
324                         asm volatile("vpxor %ymm13,%ymm10,%ymm10");
325                         asm volatile("vpxor %ymm15,%ymm11,%ymm11");
326                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
327                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
328                         asm volatile("vpxor %ymm13,%ymm12,%ymm12");
329                         asm volatile("vpxor %ymm15,%ymm14,%ymm14");
330                 }
331                 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
332                 asm volatile("vpxor %ymm2,%ymm2,%ymm2");
333                 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
334                 asm volatile("vpxor %ymm3,%ymm3,%ymm3");
335                 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
336                 asm volatile("vpxor %ymm10,%ymm10,%ymm10");
337                 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
338                 asm volatile("vpxor %ymm11,%ymm11,%ymm11");
339                 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
340                 asm volatile("vpxor %ymm4,%ymm4,%ymm4");
341                 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
342                 asm volatile("vpxor %ymm6,%ymm6,%ymm6");
343                 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
344                 asm volatile("vpxor %ymm12,%ymm12,%ymm12");
345                 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
346                 asm volatile("vpxor %ymm14,%ymm14,%ymm14");
347         }
348 
349         asm volatile("sfence" : : : "memory");
350         kernel_fpu_end();
351 }
352 
353 static void raid6_avx24_xor_syndrome(int disks, int start, int stop,
354                                      size_t bytes, void **ptrs)
355 {
356         u8 **dptr = (u8 **)ptrs;
357         u8 *p, *q;
358         int d, z, z0;
359 
360         z0 = stop;              /* P/Q right side optimization */
361         p = dptr[disks-2];      /* XOR parity */
362         q = dptr[disks-1];      /* RS syndrome */
363 
364         kernel_fpu_begin();
365 
366         asm volatile("vmovdqa %0,%%ymm0" :: "m" (raid6_avx2_constants.x1d[0]));
367 
368         for (d = 0 ; d < bytes ; d += 128) {
369                 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
370                 asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32]));
371                 asm volatile("vmovdqa %0,%%ymm12" :: "m" (dptr[z0][d+64]));
372                 asm volatile("vmovdqa %0,%%ymm14" :: "m" (dptr[z0][d+96]));
373                 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
374                 asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32]));
375                 asm volatile("vmovdqa %0,%%ymm10" : : "m" (p[d+64]));
376                 asm volatile("vmovdqa %0,%%ymm11" : : "m" (p[d+96]));
377                 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
378                 asm volatile("vpxor %ymm6,%ymm3,%ymm3");
379                 asm volatile("vpxor %ymm12,%ymm10,%ymm10");
380                 asm volatile("vpxor %ymm14,%ymm11,%ymm11");
381                 /* P/Q data pages */
382                 for (z = z0-1 ; z >= start ; z--) {
383                         asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
384                         asm volatile("prefetchnta %0" :: "m" (dptr[z][d+64]));
385                         asm volatile("vpxor %ymm5,%ymm5,%ymm5");
386                         asm volatile("vpxor %ymm7,%ymm7,%ymm7");
387                         asm volatile("vpxor %ymm13,%ymm13,%ymm13");
388                         asm volatile("vpxor %ymm15,%ymm15,%ymm15");
389                         asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
390                         asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
391                         asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
392                         asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
393                         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
394                         asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
395                         asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
396                         asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
397                         asm volatile("vpand %ymm0,%ymm5,%ymm5");
398                         asm volatile("vpand %ymm0,%ymm7,%ymm7");
399                         asm volatile("vpand %ymm0,%ymm13,%ymm13");
400                         asm volatile("vpand %ymm0,%ymm15,%ymm15");
401                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
402                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
403                         asm volatile("vpxor %ymm13,%ymm12,%ymm12");
404                         asm volatile("vpxor %ymm15,%ymm14,%ymm14");
405                         asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
406                         asm volatile("vmovdqa %0,%%ymm7"
407                                      :: "m" (dptr[z][d+32]));
408                         asm volatile("vmovdqa %0,%%ymm13"
409                                      :: "m" (dptr[z][d+64]));
410                         asm volatile("vmovdqa %0,%%ymm15"
411                                      :: "m" (dptr[z][d+96]));
412                         asm volatile("vpxor %ymm5,%ymm2,%ymm2");
413                         asm volatile("vpxor %ymm7,%ymm3,%ymm3");
414                         asm volatile("vpxor %ymm13,%ymm10,%ymm10");
415                         asm volatile("vpxor %ymm15,%ymm11,%ymm11");
416                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
417                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
418                         asm volatile("vpxor %ymm13,%ymm12,%ymm12");
419                         asm volatile("vpxor %ymm15,%ymm14,%ymm14");
420                 }
421                 asm volatile("prefetchnta %0" :: "m" (q[d]));
422                 asm volatile("prefetchnta %0" :: "m" (q[d+64]));
423                 /* P/Q left side optimization */
424                 for (z = start-1 ; z >= 0 ; z--) {
425                         asm volatile("vpxor %ymm5,%ymm5,%ymm5");
426                         asm volatile("vpxor %ymm7,%ymm7,%ymm7");
427                         asm volatile("vpxor %ymm13,%ymm13,%ymm13");
428                         asm volatile("vpxor %ymm15,%ymm15,%ymm15");
429                         asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
430                         asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
431                         asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
432                         asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
433                         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
434                         asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
435                         asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
436                         asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
437                         asm volatile("vpand %ymm0,%ymm5,%ymm5");
438                         asm volatile("vpand %ymm0,%ymm7,%ymm7");
439                         asm volatile("vpand %ymm0,%ymm13,%ymm13");
440                         asm volatile("vpand %ymm0,%ymm15,%ymm15");
441                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
442                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
443                         asm volatile("vpxor %ymm13,%ymm12,%ymm12");
444                         asm volatile("vpxor %ymm15,%ymm14,%ymm14");
445                 }
446                 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
447                 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
448                 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
449                 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
450                 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
451                 asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32]));
452                 asm volatile("vpxor %0,%%ymm12,%%ymm12" : : "m" (q[d+64]));
453                 asm volatile("vpxor %0,%%ymm14,%%ymm14" : : "m" (q[d+96]));
454                 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
455                 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
456                 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
457                 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
458         }
459         asm volatile("sfence" : : : "memory");
460         kernel_fpu_end();
461 }
462 
463 const struct raid6_calls raid6_avx2x4 = {
464         raid6_avx24_gen_syndrome,
465         raid6_avx24_xor_syndrome,
466         raid6_have_avx2,
467         "avx2x4",
468         .priority = 2           /* Prefer AVX2 over priority 1 (SSE2 and others) */
469 };
470 #endif /* CONFIG_X86_64 */
471 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php