~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/lib/raid6/avx512.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 /* -*- linux-c -*- --------------------------------------------------------
  3  *
  4  *   Copyright (C) 2016 Intel Corporation
  5  *
  6  *   Author: Gayatri Kammela <gayatri.kammela@intel.com>
  7  *   Author: Megha Dey <megha.dey@linux.intel.com>
  8  *
  9  *   Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved
 10  *   Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
 11  *
 12  * -----------------------------------------------------------------------
 13  */
 14 
 15 /*
 16  * AVX512 implementation of RAID-6 syndrome functions
 17  *
 18  */
 19 
 20 #ifdef CONFIG_AS_AVX512
 21 
 22 #include <linux/raid/pq.h>
 23 #include "x86.h"
 24 
 25 static const struct raid6_avx512_constants {
 26         u64 x1d[8];
 27 } raid6_avx512_constants __aligned(512/8) = {
 28         { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
 29           0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
 30           0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
 31           0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
 32 };
 33 
 34 static int raid6_have_avx512(void)
 35 {
 36         return boot_cpu_has(X86_FEATURE_AVX2) &&
 37                 boot_cpu_has(X86_FEATURE_AVX) &&
 38                 boot_cpu_has(X86_FEATURE_AVX512F) &&
 39                 boot_cpu_has(X86_FEATURE_AVX512BW) &&
 40                 boot_cpu_has(X86_FEATURE_AVX512VL) &&
 41                 boot_cpu_has(X86_FEATURE_AVX512DQ);
 42 }
 43 
 44 static void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs)
 45 {
 46         u8 **dptr = (u8 **)ptrs;
 47         u8 *p, *q;
 48         int d, z, z0;
 49 
 50         z0 = disks - 3;         /* Highest data disk */
 51         p = dptr[z0+1];         /* XOR parity */
 52         q = dptr[z0+2];         /* RS syndrome */
 53 
 54         kernel_fpu_begin();
 55 
 56         asm volatile("vmovdqa64 %0,%%zmm0\n\t"
 57                      "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
 58                      :
 59                      : "m" (raid6_avx512_constants.x1d[0]));
 60 
 61         for (d = 0; d < bytes; d += 64) {
 62                 asm volatile("prefetchnta %0\n\t"
 63                              "vmovdqa64 %0,%%zmm2\n\t"     /* P[0] */
 64                              "prefetchnta %1\n\t"
 65                              "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
 66                              "vmovdqa64 %1,%%zmm6"
 67                              :
 68                              : "m" (dptr[z0][d]), "m" (dptr[z0-1][d]));
 69                 for (z = z0-2; z >= 0; z--) {
 70                         asm volatile("prefetchnta %0\n\t"
 71                                      "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
 72                                      "vpmovm2b %%k1,%%zmm5\n\t"
 73                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
 74                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
 75                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
 76                                      "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
 77                                      "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
 78                                      "vmovdqa64 %0,%%zmm6"
 79                                      :
 80                                      : "m" (dptr[z][d]));
 81                 }
 82                 asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
 83                              "vpmovm2b %%k1,%%zmm5\n\t"
 84                              "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
 85                              "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
 86                              "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
 87                              "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
 88                              "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
 89                              "vmovntdq %%zmm2,%0\n\t"
 90                              "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
 91                              "vmovntdq %%zmm4,%1\n\t"
 92                              "vpxorq %%zmm4,%%zmm4,%%zmm4"
 93                              :
 94                              : "m" (p[d]), "m" (q[d]));
 95         }
 96 
 97         asm volatile("sfence" : : : "memory");
 98         kernel_fpu_end();
 99 }
100 
101 static void raid6_avx5121_xor_syndrome(int disks, int start, int stop,
102                                        size_t bytes, void **ptrs)
103 {
104         u8 **dptr = (u8 **)ptrs;
105         u8 *p, *q;
106         int d, z, z0;
107 
108         z0 = stop;              /* P/Q right side optimization */
109         p = dptr[disks-2];      /* XOR parity */
110         q = dptr[disks-1];      /* RS syndrome */
111 
112         kernel_fpu_begin();
113 
114         asm volatile("vmovdqa64 %0,%%zmm0"
115                      : : "m" (raid6_avx512_constants.x1d[0]));
116 
117         for (d = 0 ; d < bytes ; d += 64) {
118                 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
119                              "vmovdqa64 %1,%%zmm2\n\t"
120                              "vpxorq %%zmm4,%%zmm2,%%zmm2"
121                              :
122                              : "m" (dptr[z0][d]),  "m" (p[d]));
123                 /* P/Q data pages */
124                 for (z = z0-1 ; z >= start ; z--) {
125                         asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
126                                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
127                                      "vpmovm2b %%k1,%%zmm5\n\t"
128                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
129                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
130                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
131                                      "vmovdqa64 %0,%%zmm5\n\t"
132                                      "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
133                                      "vpxorq %%zmm5,%%zmm4,%%zmm4"
134                                      :
135                                      : "m" (dptr[z][d]));
136                 }
137                 /* P/Q left side optimization */
138                 for (z = start-1 ; z >= 0 ; z--) {
139                         asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
140                                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
141                                      "vpmovm2b %%k1,%%zmm5\n\t"
142                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
143                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
144                                      "vpxorq %%zmm5,%%zmm4,%%zmm4"
145                                      :
146                                      : );
147                 }
148                 asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
149                 /* Don't use movntdq for r/w memory area < cache line */
150                              "vmovdqa64 %%zmm4,%0\n\t"
151                              "vmovdqa64 %%zmm2,%1"
152                              :
153                              : "m" (q[d]), "m" (p[d]));
154         }
155 
156         asm volatile("sfence" : : : "memory");
157         kernel_fpu_end();
158 }
159 
160 const struct raid6_calls raid6_avx512x1 = {
161         raid6_avx5121_gen_syndrome,
162         raid6_avx5121_xor_syndrome,
163         raid6_have_avx512,
164         "avx512x1",
165         .priority = 2           /* Prefer AVX512 over priority 1 (SSE2 and others) */
166 };
167 
168 /*
169  * Unrolled-by-2 AVX512 implementation
170  */
171 static void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs)
172 {
173         u8 **dptr = (u8 **)ptrs;
174         u8 *p, *q;
175         int d, z, z0;
176 
177         z0 = disks - 3;         /* Highest data disk */
178         p = dptr[z0+1];         /* XOR parity */
179         q = dptr[z0+2];         /* RS syndrome */
180 
181         kernel_fpu_begin();
182 
183         asm volatile("vmovdqa64 %0,%%zmm0\n\t"
184                      "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
185                      :
186                      : "m" (raid6_avx512_constants.x1d[0]));
187 
188         /* We uniformly assume a single prefetch covers at least 64 bytes */
189         for (d = 0; d < bytes; d += 128) {
190                 asm volatile("prefetchnta %0\n\t"
191                              "prefetchnta %1\n\t"
192                              "vmovdqa64 %0,%%zmm2\n\t"      /* P[0] */
193                              "vmovdqa64 %1,%%zmm3\n\t"      /* P[1] */
194                              "vmovdqa64 %%zmm2,%%zmm4\n\t"  /* Q[0] */
195                              "vmovdqa64 %%zmm3,%%zmm6"      /* Q[1] */
196                              :
197                              : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]));
198                 for (z = z0-1; z >= 0; z--) {
199                         asm volatile("prefetchnta %0\n\t"
200                                      "prefetchnta %1\n\t"
201                                      "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
202                                      "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
203                                      "vpmovm2b %%k1,%%zmm5\n\t"
204                                      "vpmovm2b %%k2,%%zmm7\n\t"
205                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
206                                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
207                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
208                                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
209                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
210                                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
211                                      "vmovdqa64 %0,%%zmm5\n\t"
212                                      "vmovdqa64 %1,%%zmm7\n\t"
213                                      "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
214                                      "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
215                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
216                                      "vpxorq %%zmm7,%%zmm6,%%zmm6"
217                                      :
218                                      : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
219                 }
220                 asm volatile("vmovntdq %%zmm2,%0\n\t"
221                              "vmovntdq %%zmm3,%1\n\t"
222                              "vmovntdq %%zmm4,%2\n\t"
223                              "vmovntdq %%zmm6,%3"
224                              :
225                              : "m" (p[d]), "m" (p[d+64]), "m" (q[d]),
226                                "m" (q[d+64]));
227         }
228 
229         asm volatile("sfence" : : : "memory");
230         kernel_fpu_end();
231 }
232 
233 static void raid6_avx5122_xor_syndrome(int disks, int start, int stop,
234                                        size_t bytes, void **ptrs)
235 {
236         u8 **dptr = (u8 **)ptrs;
237         u8 *p, *q;
238         int d, z, z0;
239 
240         z0 = stop;              /* P/Q right side optimization */
241         p = dptr[disks-2];      /* XOR parity */
242         q = dptr[disks-1];      /* RS syndrome */
243 
244         kernel_fpu_begin();
245 
246         asm volatile("vmovdqa64 %0,%%zmm0"
247                      : : "m" (raid6_avx512_constants.x1d[0]));
248 
249         for (d = 0 ; d < bytes ; d += 128) {
250                 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
251                              "vmovdqa64 %1,%%zmm6\n\t"
252                              "vmovdqa64 %2,%%zmm2\n\t"
253                              "vmovdqa64 %3,%%zmm3\n\t"
254                              "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
255                              "vpxorq %%zmm6,%%zmm3,%%zmm3"
256                              :
257                              : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
258                                "m" (p[d]), "m" (p[d+64]));
259                 /* P/Q data pages */
260                 for (z = z0-1 ; z >= start ; z--) {
261                         asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
262                                      "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
263                                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
264                                      "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
265                                      "vpmovm2b %%k1,%%zmm5\n\t"
266                                      "vpmovm2b %%k2,%%zmm7\n\t"
267                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
268                                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
269                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
270                                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
271                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
272                                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
273                                      "vmovdqa64 %0,%%zmm5\n\t"
274                                      "vmovdqa64 %1,%%zmm7\n\t"
275                                      "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
276                                      "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
277                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
278                                      "vpxorq %%zmm7,%%zmm6,%%zmm6"
279                                      :
280                                      : "m" (dptr[z][d]),  "m" (dptr[z][d+64]));
281                 }
282                 /* P/Q left side optimization */
283                 for (z = start-1 ; z >= 0 ; z--) {
284                         asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
285                                      "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
286                                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
287                                      "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
288                                      "vpmovm2b %%k1,%%zmm5\n\t"
289                                      "vpmovm2b %%k2,%%zmm7\n\t"
290                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
291                                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
292                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
293                                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
294                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
295                                      "vpxorq %%zmm7,%%zmm6,%%zmm6"
296                                      :
297                                      : );
298                 }
299                 asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
300                              "vpxorq %1,%%zmm6,%%zmm6\n\t"
301                              /* Don't use movntdq for r/w
302                               * memory area < cache line
303                               */
304                              "vmovdqa64 %%zmm4,%0\n\t"
305                              "vmovdqa64 %%zmm6,%1\n\t"
306                              "vmovdqa64 %%zmm2,%2\n\t"
307                              "vmovdqa64 %%zmm3,%3"
308                              :
309                              : "m" (q[d]), "m" (q[d+64]), "m" (p[d]),
310                                "m" (p[d+64]));
311         }
312 
313         asm volatile("sfence" : : : "memory");
314         kernel_fpu_end();
315 }
316 
317 const struct raid6_calls raid6_avx512x2 = {
318         raid6_avx5122_gen_syndrome,
319         raid6_avx5122_xor_syndrome,
320         raid6_have_avx512,
321         "avx512x2",
322         .priority = 2           /* Prefer AVX512 over priority 1 (SSE2 and others) */
323 };
324 
325 #ifdef CONFIG_X86_64
326 
327 /*
328  * Unrolled-by-4 AVX2 implementation
329  */
330 static void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs)
331 {
332         u8 **dptr = (u8 **)ptrs;
333         u8 *p, *q;
334         int d, z, z0;
335 
336         z0 = disks - 3;         /* Highest data disk */
337         p = dptr[z0+1];         /* XOR parity */
338         q = dptr[z0+2];         /* RS syndrome */
339 
340         kernel_fpu_begin();
341 
342         asm volatile("vmovdqa64 %0,%%zmm0\n\t"
343                      "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t"       /* Zero temp */
344                      "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"       /* P[0] */
345                      "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"       /* P[1] */
346                      "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"       /* Q[0] */
347                      "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"       /* Q[1] */
348                      "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"    /* P[2] */
349                      "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"    /* P[3] */
350                      "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"    /* Q[2] */
351                      "vpxorq %%zmm14,%%zmm14,%%zmm14"        /* Q[3] */
352                      :
353                      : "m" (raid6_avx512_constants.x1d[0]));
354 
355         for (d = 0; d < bytes; d += 256) {
356                 for (z = z0; z >= 0; z--) {
357                 asm volatile("prefetchnta %0\n\t"
358                              "prefetchnta %1\n\t"
359                              "prefetchnta %2\n\t"
360                              "prefetchnta %3\n\t"
361                              "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
362                              "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
363                              "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t"
364                              "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t"
365                              "vpmovm2b %%k1,%%zmm5\n\t"
366                              "vpmovm2b %%k2,%%zmm7\n\t"
367                              "vpmovm2b %%k3,%%zmm13\n\t"
368                              "vpmovm2b %%k4,%%zmm15\n\t"
369                              "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
370                              "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
371                              "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
372                              "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
373                              "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
374                              "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
375                              "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
376                              "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
377                              "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
378                              "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
379                              "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
380                              "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
381                              "vmovdqa64 %0,%%zmm5\n\t"
382                              "vmovdqa64 %1,%%zmm7\n\t"
383                              "vmovdqa64 %2,%%zmm13\n\t"
384                              "vmovdqa64 %3,%%zmm15\n\t"
385                              "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
386                              "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
387                              "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
388                              "vpxorq %%zmm15,%%zmm11,%%zmm11\n"
389                              "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
390                              "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
391                              "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
392                              "vpxorq %%zmm15,%%zmm14,%%zmm14"
393                              :
394                              : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
395                                "m" (dptr[z][d+128]), "m" (dptr[z][d+192]));
396                 }
397                 asm volatile("vmovntdq %%zmm2,%0\n\t"
398                              "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
399                              "vmovntdq %%zmm3,%1\n\t"
400                              "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"
401                              "vmovntdq %%zmm10,%2\n\t"
402                              "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"
403                              "vmovntdq %%zmm11,%3\n\t"
404                              "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"
405                              "vmovntdq %%zmm4,%4\n\t"
406                              "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"
407                              "vmovntdq %%zmm6,%5\n\t"
408                              "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"
409                              "vmovntdq %%zmm12,%6\n\t"
410                              "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"
411                              "vmovntdq %%zmm14,%7\n\t"
412                              "vpxorq %%zmm14,%%zmm14,%%zmm14"
413                              :
414                              : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
415                                "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
416                                "m" (q[d+128]), "m" (q[d+192]));
417         }
418 
419         asm volatile("sfence" : : : "memory");
420         kernel_fpu_end();
421 }
422 
423 static void raid6_avx5124_xor_syndrome(int disks, int start, int stop,
424                                        size_t bytes, void **ptrs)
425 {
426         u8 **dptr = (u8 **)ptrs;
427         u8 *p, *q;
428         int d, z, z0;
429 
430         z0 = stop;              /* P/Q right side optimization */
431         p = dptr[disks-2];      /* XOR parity */
432         q = dptr[disks-1];      /* RS syndrome */
433 
434         kernel_fpu_begin();
435 
436         asm volatile("vmovdqa64 %0,%%zmm0"
437                      :: "m" (raid6_avx512_constants.x1d[0]));
438 
439         for (d = 0 ; d < bytes ; d += 256) {
440                 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
441                              "vmovdqa64 %1,%%zmm6\n\t"
442                              "vmovdqa64 %2,%%zmm12\n\t"
443                              "vmovdqa64 %3,%%zmm14\n\t"
444                              "vmovdqa64 %4,%%zmm2\n\t"
445                              "vmovdqa64 %5,%%zmm3\n\t"
446                              "vmovdqa64 %6,%%zmm10\n\t"
447                              "vmovdqa64 %7,%%zmm11\n\t"
448                              "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
449                              "vpxorq %%zmm6,%%zmm3,%%zmm3\n\t"
450                              "vpxorq %%zmm12,%%zmm10,%%zmm10\n\t"
451                              "vpxorq %%zmm14,%%zmm11,%%zmm11"
452                              :
453                              : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
454                                "m" (dptr[z0][d+128]), "m" (dptr[z0][d+192]),
455                                "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
456                                "m" (p[d+192]));
457                 /* P/Q data pages */
458                 for (z = z0-1 ; z >= start ; z--) {
459                         asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
460                                      "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
461                                      "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
462                                      "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
463                                      "prefetchnta %0\n\t"
464                                      "prefetchnta %2\n\t"
465                                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
466                                      "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
467                                      "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
468                                      "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
469                                      "vpmovm2b %%k1,%%zmm5\n\t"
470                                      "vpmovm2b %%k2,%%zmm7\n\t"
471                                      "vpmovm2b %%k3,%%zmm13\n\t"
472                                      "vpmovm2b %%k4,%%zmm15\n\t"
473                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
474                                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
475                                      "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
476                                      "vpaddb %%Zmm14,%%zmm14,%%zmm14\n\t"
477                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
478                                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
479                                      "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
480                                      "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
481                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
482                                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
483                                      "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
484                                      "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
485                                      "vmovdqa64 %0,%%zmm5\n\t"
486                                      "vmovdqa64 %1,%%zmm7\n\t"
487                                      "vmovdqa64 %2,%%zmm13\n\t"
488                                      "vmovdqa64 %3,%%zmm15\n\t"
489                                      "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
490                                      "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
491                                      "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
492                                      "vpxorq %%zmm15,%%zmm11,%%zmm11\n\t"
493                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
494                                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
495                                      "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
496                                      "vpxorq %%zmm15,%%zmm14,%%zmm14"
497                                      :
498                                      : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
499                                        "m" (dptr[z][d+128]),
500                                        "m" (dptr[z][d+192]));
501                 }
502                 asm volatile("prefetchnta %0\n\t"
503                              "prefetchnta %1\n\t"
504                              :
505                              : "m" (q[d]), "m" (q[d+128]));
506                 /* P/Q left side optimization */
507                 for (z = start-1 ; z >= 0 ; z--) {
508                         asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
509                                      "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
510                                      "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
511                                      "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
512                                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
513                                      "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
514                                      "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
515                                      "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
516                                      "vpmovm2b %%k1,%%zmm5\n\t"
517                                      "vpmovm2b %%k2,%%zmm7\n\t"
518                                      "vpmovm2b %%k3,%%zmm13\n\t"
519                                      "vpmovm2b %%k4,%%zmm15\n\t"
520                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
521                                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
522                                      "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
523                                      "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
524                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
525                                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
526                                      "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
527                                      "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
528                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
529                                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
530                                      "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
531                                      "vpxorq %%zmm15,%%zmm14,%%zmm14"
532                                      :
533                                      : );
534                 }
535                 asm volatile("vmovntdq %%zmm2,%0\n\t"
536                              "vmovntdq %%zmm3,%1\n\t"
537                              "vmovntdq %%zmm10,%2\n\t"
538                              "vmovntdq %%zmm11,%3\n\t"
539                              "vpxorq %4,%%zmm4,%%zmm4\n\t"
540                              "vpxorq %5,%%zmm6,%%zmm6\n\t"
541                              "vpxorq %6,%%zmm12,%%zmm12\n\t"
542                              "vpxorq %7,%%zmm14,%%zmm14\n\t"
543                              "vmovntdq %%zmm4,%4\n\t"
544                              "vmovntdq %%zmm6,%5\n\t"
545                              "vmovntdq %%zmm12,%6\n\t"
546                              "vmovntdq %%zmm14,%7"
547                              :
548                              : "m" (p[d]),  "m" (p[d+64]), "m" (p[d+128]),
549                                "m" (p[d+192]), "m" (q[d]),  "m" (q[d+64]),
550                                "m" (q[d+128]), "m" (q[d+192]));
551         }
552         asm volatile("sfence" : : : "memory");
553         kernel_fpu_end();
554 }
555 const struct raid6_calls raid6_avx512x4 = {
556         raid6_avx5124_gen_syndrome,
557         raid6_avx5124_xor_syndrome,
558         raid6_have_avx512,
559         "avx512x4",
560         .priority = 2           /* Prefer AVX512 over priority 1 (SSE2 and others) */
561 };
562 #endif
563 
564 #endif /* CONFIG_AS_AVX512 */
565 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php