~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/lib/raid6/avx512.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /lib/raid6/avx512.c (Version linux-6.12-rc7) and /lib/raid6/avx512.c (Version linux-6.6.60)


  1 // SPDX-License-Identifier: GPL-2.0-or-later        1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 /* -*- linux-c -*- ---------------------------      2 /* -*- linux-c -*- --------------------------------------------------------
  3  *                                                  3  *
  4  *   Copyright (C) 2016 Intel Corporation           4  *   Copyright (C) 2016 Intel Corporation
  5  *                                                  5  *
  6  *   Author: Gayatri Kammela <gayatri.kammela@      6  *   Author: Gayatri Kammela <gayatri.kammela@intel.com>
  7  *   Author: Megha Dey <megha.dey@linux.intel.      7  *   Author: Megha Dey <megha.dey@linux.intel.com>
  8  *                                                  8  *
  9  *   Based on avx2.c: Copyright 2012 Yuanhan L      9  *   Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved
 10  *   Based on sse2.c: Copyright 2002 H. Peter      10  *   Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
 11  *                                                 11  *
 12  * -------------------------------------------     12  * -----------------------------------------------------------------------
 13  */                                                13  */
 14                                                    14 
 15 /*                                                 15 /*
 16  * AVX512 implementation of RAID-6 syndrome fu     16  * AVX512 implementation of RAID-6 syndrome functions
 17  *                                                 17  *
 18  */                                                18  */
 19                                                    19 
 20 #ifdef CONFIG_AS_AVX512                            20 #ifdef CONFIG_AS_AVX512
 21                                                    21 
 22 #include <linux/raid/pq.h>                         22 #include <linux/raid/pq.h>
 23 #include "x86.h"                                   23 #include "x86.h"
 24                                                    24 
 25 static const struct raid6_avx512_constants {       25 static const struct raid6_avx512_constants {
 26         u64 x1d[8];                                26         u64 x1d[8];
 27 } raid6_avx512_constants __aligned(512/8) = {      27 } raid6_avx512_constants __aligned(512/8) = {
 28         { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1     28         { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
 29           0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1     29           0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
 30           0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1     30           0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
 31           0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1     31           0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
 32 };                                                 32 };
 33                                                    33 
 34 static int raid6_have_avx512(void)                 34 static int raid6_have_avx512(void)
 35 {                                                  35 {
 36         return boot_cpu_has(X86_FEATURE_AVX2)      36         return boot_cpu_has(X86_FEATURE_AVX2) &&
 37                 boot_cpu_has(X86_FEATURE_AVX)      37                 boot_cpu_has(X86_FEATURE_AVX) &&
 38                 boot_cpu_has(X86_FEATURE_AVX51     38                 boot_cpu_has(X86_FEATURE_AVX512F) &&
 39                 boot_cpu_has(X86_FEATURE_AVX51     39                 boot_cpu_has(X86_FEATURE_AVX512BW) &&
 40                 boot_cpu_has(X86_FEATURE_AVX51     40                 boot_cpu_has(X86_FEATURE_AVX512VL) &&
 41                 boot_cpu_has(X86_FEATURE_AVX51     41                 boot_cpu_has(X86_FEATURE_AVX512DQ);
 42 }                                                  42 }
 43                                                    43 
 44 static void raid6_avx5121_gen_syndrome(int dis     44 static void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs)
 45 {                                                  45 {
 46         u8 **dptr = (u8 **)ptrs;                   46         u8 **dptr = (u8 **)ptrs;
 47         u8 *p, *q;                                 47         u8 *p, *q;
 48         int d, z, z0;                              48         int d, z, z0;
 49                                                    49 
 50         z0 = disks - 3;         /* Highest dat     50         z0 = disks - 3;         /* Highest data disk */
 51         p = dptr[z0+1];         /* XOR parity      51         p = dptr[z0+1];         /* XOR parity */
 52         q = dptr[z0+2];         /* RS syndrome     52         q = dptr[z0+2];         /* RS syndrome */
 53                                                    53 
 54         kernel_fpu_begin();                        54         kernel_fpu_begin();
 55                                                    55 
 56         asm volatile("vmovdqa64 %0,%%zmm0\n\t"     56         asm volatile("vmovdqa64 %0,%%zmm0\n\t"
 57                      "vpxorq %%zmm1,%%zmm1,%%z     57                      "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
 58                      :                             58                      :
 59                      : "m" (raid6_avx512_const     59                      : "m" (raid6_avx512_constants.x1d[0]));
 60                                                    60 
 61         for (d = 0; d < bytes; d += 64) {          61         for (d = 0; d < bytes; d += 64) {
 62                 asm volatile("prefetchnta %0\n     62                 asm volatile("prefetchnta %0\n\t"
 63                              "vmovdqa64 %0,%%z     63                              "vmovdqa64 %0,%%zmm2\n\t"     /* P[0] */
 64                              "prefetchnta %1\n     64                              "prefetchnta %1\n\t"
 65                              "vmovdqa64 %%zmm2     65                              "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
 66                              "vmovdqa64 %1,%%z     66                              "vmovdqa64 %1,%%zmm6"
 67                              :                     67                              :
 68                              : "m" (dptr[z0][d     68                              : "m" (dptr[z0][d]), "m" (dptr[z0-1][d]));
 69                 for (z = z0-2; z >= 0; z--) {      69                 for (z = z0-2; z >= 0; z--) {
 70                         asm volatile("prefetch     70                         asm volatile("prefetchnta %0\n\t"
 71                                      "vpcmpgtb     71                                      "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
 72                                      "vpmovm2b     72                                      "vpmovm2b %%k1,%%zmm5\n\t"
 73                                      "vpaddb %     73                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
 74                                      "vpandq %     74                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
 75                                      "vpxorq %     75                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
 76                                      "vpxorq %     76                                      "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
 77                                      "vpxorq %     77                                      "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
 78                                      "vmovdqa6     78                                      "vmovdqa64 %0,%%zmm6"
 79                                      :             79                                      :
 80                                      : "m" (dp     80                                      : "m" (dptr[z][d]));
 81                 }                                  81                 }
 82                 asm volatile("vpcmpgtb %%zmm4,     82                 asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
 83                              "vpmovm2b %%k1,%%     83                              "vpmovm2b %%k1,%%zmm5\n\t"
 84                              "vpaddb %%zmm4,%%     84                              "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
 85                              "vpandq %%zmm0,%%     85                              "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
 86                              "vpxorq %%zmm5,%%     86                              "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
 87                              "vpxorq %%zmm6,%%     87                              "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
 88                              "vpxorq %%zmm6,%%     88                              "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
 89                              "vmovntdq %%zmm2,     89                              "vmovntdq %%zmm2,%0\n\t"
 90                              "vpxorq %%zmm2,%%     90                              "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
 91                              "vmovntdq %%zmm4,     91                              "vmovntdq %%zmm4,%1\n\t"
 92                              "vpxorq %%zmm4,%%     92                              "vpxorq %%zmm4,%%zmm4,%%zmm4"
 93                              :                     93                              :
 94                              : "m" (p[d]), "m"     94                              : "m" (p[d]), "m" (q[d]));
 95         }                                          95         }
 96                                                    96 
 97         asm volatile("sfence" : : : "memory");     97         asm volatile("sfence" : : : "memory");
 98         kernel_fpu_end();                          98         kernel_fpu_end();
 99 }                                                  99 }
100                                                   100 
101 static void raid6_avx5121_xor_syndrome(int dis    101 static void raid6_avx5121_xor_syndrome(int disks, int start, int stop,
102                                        size_t     102                                        size_t bytes, void **ptrs)
103 {                                                 103 {
104         u8 **dptr = (u8 **)ptrs;                  104         u8 **dptr = (u8 **)ptrs;
105         u8 *p, *q;                                105         u8 *p, *q;
106         int d, z, z0;                             106         int d, z, z0;
107                                                   107 
108         z0 = stop;              /* P/Q right s    108         z0 = stop;              /* P/Q right side optimization */
109         p = dptr[disks-2];      /* XOR parity     109         p = dptr[disks-2];      /* XOR parity */
110         q = dptr[disks-1];      /* RS syndrome    110         q = dptr[disks-1];      /* RS syndrome */
111                                                   111 
112         kernel_fpu_begin();                       112         kernel_fpu_begin();
113                                                   113 
114         asm volatile("vmovdqa64 %0,%%zmm0"        114         asm volatile("vmovdqa64 %0,%%zmm0"
115                      : : "m" (raid6_avx512_con    115                      : : "m" (raid6_avx512_constants.x1d[0]));
116                                                   116 
117         for (d = 0 ; d < bytes ; d += 64) {       117         for (d = 0 ; d < bytes ; d += 64) {
118                 asm volatile("vmovdqa64 %0,%%z    118                 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
119                              "vmovdqa64 %1,%%z    119                              "vmovdqa64 %1,%%zmm2\n\t"
120                              "vpxorq %%zmm4,%%    120                              "vpxorq %%zmm4,%%zmm2,%%zmm2"
121                              :                    121                              :
122                              : "m" (dptr[z0][d    122                              : "m" (dptr[z0][d]),  "m" (p[d]));
123                 /* P/Q data pages */              123                 /* P/Q data pages */
124                 for (z = z0-1 ; z >= start ; z    124                 for (z = z0-1 ; z >= start ; z--) {
125                         asm volatile("vpxorq %    125                         asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
126                                      "vpcmpgtb    126                                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
127                                      "vpmovm2b    127                                      "vpmovm2b %%k1,%%zmm5\n\t"
128                                      "vpaddb %    128                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
129                                      "vpandq %    129                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
130                                      "vpxorq %    130                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
131                                      "vmovdqa6    131                                      "vmovdqa64 %0,%%zmm5\n\t"
132                                      "vpxorq %    132                                      "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
133                                      "vpxorq %    133                                      "vpxorq %%zmm5,%%zmm4,%%zmm4"
134                                      :            134                                      :
135                                      : "m" (dp    135                                      : "m" (dptr[z][d]));
136                 }                                 136                 }
137                 /* P/Q left side optimization     137                 /* P/Q left side optimization */
138                 for (z = start-1 ; z >= 0 ; z-    138                 for (z = start-1 ; z >= 0 ; z--) {
139                         asm volatile("vpxorq %    139                         asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
140                                      "vpcmpgtb    140                                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
141                                      "vpmovm2b    141                                      "vpmovm2b %%k1,%%zmm5\n\t"
142                                      "vpaddb %    142                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
143                                      "vpandq %    143                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
144                                      "vpxorq %    144                                      "vpxorq %%zmm5,%%zmm4,%%zmm4"
145                                      :            145                                      :
146                                      : );         146                                      : );
147                 }                                 147                 }
148                 asm volatile("vpxorq %0,%%zmm4    148                 asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
149                 /* Don't use movntdq for r/w m    149                 /* Don't use movntdq for r/w memory area < cache line */
150                              "vmovdqa64 %%zmm4    150                              "vmovdqa64 %%zmm4,%0\n\t"
151                              "vmovdqa64 %%zmm2    151                              "vmovdqa64 %%zmm2,%1"
152                              :                    152                              :
153                              : "m" (q[d]), "m"    153                              : "m" (q[d]), "m" (p[d]));
154         }                                         154         }
155                                                   155 
156         asm volatile("sfence" : : : "memory");    156         asm volatile("sfence" : : : "memory");
157         kernel_fpu_end();                         157         kernel_fpu_end();
158 }                                                 158 }
159                                                   159 
160 const struct raid6_calls raid6_avx512x1 = {       160 const struct raid6_calls raid6_avx512x1 = {
161         raid6_avx5121_gen_syndrome,               161         raid6_avx5121_gen_syndrome,
162         raid6_avx5121_xor_syndrome,               162         raid6_avx5121_xor_syndrome,
163         raid6_have_avx512,                        163         raid6_have_avx512,
164         "avx512x1",                               164         "avx512x1",
165         .priority = 2           /* Prefer AVX5    165         .priority = 2           /* Prefer AVX512 over priority 1 (SSE2 and others) */
166 };                                                166 };
167                                                   167 
168 /*                                                168 /*
169  * Unrolled-by-2 AVX512 implementation            169  * Unrolled-by-2 AVX512 implementation
170  */                                               170  */
171 static void raid6_avx5122_gen_syndrome(int dis    171 static void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs)
172 {                                                 172 {
173         u8 **dptr = (u8 **)ptrs;                  173         u8 **dptr = (u8 **)ptrs;
174         u8 *p, *q;                                174         u8 *p, *q;
175         int d, z, z0;                             175         int d, z, z0;
176                                                   176 
177         z0 = disks - 3;         /* Highest dat    177         z0 = disks - 3;         /* Highest data disk */
178         p = dptr[z0+1];         /* XOR parity     178         p = dptr[z0+1];         /* XOR parity */
179         q = dptr[z0+2];         /* RS syndrome    179         q = dptr[z0+2];         /* RS syndrome */
180                                                   180 
181         kernel_fpu_begin();                       181         kernel_fpu_begin();
182                                                   182 
183         asm volatile("vmovdqa64 %0,%%zmm0\n\t"    183         asm volatile("vmovdqa64 %0,%%zmm0\n\t"
184                      "vpxorq %%zmm1,%%zmm1,%%z    184                      "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
185                      :                            185                      :
186                      : "m" (raid6_avx512_const    186                      : "m" (raid6_avx512_constants.x1d[0]));
187                                                   187 
188         /* We uniformly assume a single prefet    188         /* We uniformly assume a single prefetch covers at least 64 bytes */
189         for (d = 0; d < bytes; d += 128) {        189         for (d = 0; d < bytes; d += 128) {
190                 asm volatile("prefetchnta %0\n    190                 asm volatile("prefetchnta %0\n\t"
191                              "prefetchnta %1\n    191                              "prefetchnta %1\n\t"
192                              "vmovdqa64 %0,%%z    192                              "vmovdqa64 %0,%%zmm2\n\t"      /* P[0] */
193                              "vmovdqa64 %1,%%z    193                              "vmovdqa64 %1,%%zmm3\n\t"      /* P[1] */
194                              "vmovdqa64 %%zmm2    194                              "vmovdqa64 %%zmm2,%%zmm4\n\t"  /* Q[0] */
195                              "vmovdqa64 %%zmm3    195                              "vmovdqa64 %%zmm3,%%zmm6"      /* Q[1] */
196                              :                    196                              :
197                              : "m" (dptr[z0][d    197                              : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]));
198                 for (z = z0-1; z >= 0; z--) {     198                 for (z = z0-1; z >= 0; z--) {
199                         asm volatile("prefetch    199                         asm volatile("prefetchnta %0\n\t"
200                                      "prefetch    200                                      "prefetchnta %1\n\t"
201                                      "vpcmpgtb    201                                      "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
202                                      "vpcmpgtb    202                                      "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
203                                      "vpmovm2b    203                                      "vpmovm2b %%k1,%%zmm5\n\t"
204                                      "vpmovm2b    204                                      "vpmovm2b %%k2,%%zmm7\n\t"
205                                      "vpaddb %    205                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
206                                      "vpaddb %    206                                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
207                                      "vpandq %    207                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
208                                      "vpandq %    208                                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
209                                      "vpxorq %    209                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
210                                      "vpxorq %    210                                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
211                                      "vmovdqa6    211                                      "vmovdqa64 %0,%%zmm5\n\t"
212                                      "vmovdqa6    212                                      "vmovdqa64 %1,%%zmm7\n\t"
213                                      "vpxorq %    213                                      "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
214                                      "vpxorq %    214                                      "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
215                                      "vpxorq %    215                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
216                                      "vpxorq %    216                                      "vpxorq %%zmm7,%%zmm6,%%zmm6"
217                                      :            217                                      :
218                                      : "m" (dp    218                                      : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
219                 }                                 219                 }
220                 asm volatile("vmovntdq %%zmm2,    220                 asm volatile("vmovntdq %%zmm2,%0\n\t"
221                              "vmovntdq %%zmm3,    221                              "vmovntdq %%zmm3,%1\n\t"
222                              "vmovntdq %%zmm4,    222                              "vmovntdq %%zmm4,%2\n\t"
223                              "vmovntdq %%zmm6,    223                              "vmovntdq %%zmm6,%3"
224                              :                    224                              :
225                              : "m" (p[d]), "m"    225                              : "m" (p[d]), "m" (p[d+64]), "m" (q[d]),
226                                "m" (q[d+64]));    226                                "m" (q[d+64]));
227         }                                         227         }
228                                                   228 
229         asm volatile("sfence" : : : "memory");    229         asm volatile("sfence" : : : "memory");
230         kernel_fpu_end();                         230         kernel_fpu_end();
231 }                                                 231 }
232                                                   232 
233 static void raid6_avx5122_xor_syndrome(int dis    233 static void raid6_avx5122_xor_syndrome(int disks, int start, int stop,
234                                        size_t     234                                        size_t bytes, void **ptrs)
235 {                                                 235 {
236         u8 **dptr = (u8 **)ptrs;                  236         u8 **dptr = (u8 **)ptrs;
237         u8 *p, *q;                                237         u8 *p, *q;
238         int d, z, z0;                             238         int d, z, z0;
239                                                   239 
240         z0 = stop;              /* P/Q right s    240         z0 = stop;              /* P/Q right side optimization */
241         p = dptr[disks-2];      /* XOR parity     241         p = dptr[disks-2];      /* XOR parity */
242         q = dptr[disks-1];      /* RS syndrome    242         q = dptr[disks-1];      /* RS syndrome */
243                                                   243 
244         kernel_fpu_begin();                       244         kernel_fpu_begin();
245                                                   245 
246         asm volatile("vmovdqa64 %0,%%zmm0"        246         asm volatile("vmovdqa64 %0,%%zmm0"
247                      : : "m" (raid6_avx512_con    247                      : : "m" (raid6_avx512_constants.x1d[0]));
248                                                   248 
249         for (d = 0 ; d < bytes ; d += 128) {      249         for (d = 0 ; d < bytes ; d += 128) {
250                 asm volatile("vmovdqa64 %0,%%z    250                 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
251                              "vmovdqa64 %1,%%z    251                              "vmovdqa64 %1,%%zmm6\n\t"
252                              "vmovdqa64 %2,%%z    252                              "vmovdqa64 %2,%%zmm2\n\t"
253                              "vmovdqa64 %3,%%z    253                              "vmovdqa64 %3,%%zmm3\n\t"
254                              "vpxorq %%zmm4,%%    254                              "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
255                              "vpxorq %%zmm6,%%    255                              "vpxorq %%zmm6,%%zmm3,%%zmm3"
256                              :                    256                              :
257                              : "m" (dptr[z0][d    257                              : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
258                                "m" (p[d]), "m"    258                                "m" (p[d]), "m" (p[d+64]));
259                 /* P/Q data pages */              259                 /* P/Q data pages */
260                 for (z = z0-1 ; z >= start ; z    260                 for (z = z0-1 ; z >= start ; z--) {
261                         asm volatile("vpxorq %    261                         asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
262                                      "vpxorq %    262                                      "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
263                                      "vpcmpgtb    263                                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
264                                      "vpcmpgtb    264                                      "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
265                                      "vpmovm2b    265                                      "vpmovm2b %%k1,%%zmm5\n\t"
266                                      "vpmovm2b    266                                      "vpmovm2b %%k2,%%zmm7\n\t"
267                                      "vpaddb %    267                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
268                                      "vpaddb %    268                                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
269                                      "vpandq %    269                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
270                                      "vpandq %    270                                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
271                                      "vpxorq %    271                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
272                                      "vpxorq %    272                                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
273                                      "vmovdqa6    273                                      "vmovdqa64 %0,%%zmm5\n\t"
274                                      "vmovdqa6    274                                      "vmovdqa64 %1,%%zmm7\n\t"
275                                      "vpxorq %    275                                      "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
276                                      "vpxorq %    276                                      "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
277                                      "vpxorq %    277                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
278                                      "vpxorq %    278                                      "vpxorq %%zmm7,%%zmm6,%%zmm6"
279                                      :            279                                      :
280                                      : "m" (dp    280                                      : "m" (dptr[z][d]),  "m" (dptr[z][d+64]));
281                 }                                 281                 }
282                 /* P/Q left side optimization     282                 /* P/Q left side optimization */
283                 for (z = start-1 ; z >= 0 ; z-    283                 for (z = start-1 ; z >= 0 ; z--) {
284                         asm volatile("vpxorq %    284                         asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
285                                      "vpxorq %    285                                      "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
286                                      "vpcmpgtb    286                                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
287                                      "vpcmpgtb    287                                      "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
288                                      "vpmovm2b    288                                      "vpmovm2b %%k1,%%zmm5\n\t"
289                                      "vpmovm2b    289                                      "vpmovm2b %%k2,%%zmm7\n\t"
290                                      "vpaddb %    290                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
291                                      "vpaddb %    291                                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
292                                      "vpandq %    292                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
293                                      "vpandq %    293                                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
294                                      "vpxorq %    294                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
295                                      "vpxorq %    295                                      "vpxorq %%zmm7,%%zmm6,%%zmm6"
296                                      :            296                                      :
297                                      : );         297                                      : );
298                 }                                 298                 }
299                 asm volatile("vpxorq %0,%%zmm4    299                 asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
300                              "vpxorq %1,%%zmm6    300                              "vpxorq %1,%%zmm6,%%zmm6\n\t"
301                              /* Don't use movn    301                              /* Don't use movntdq for r/w
302                               * memory area <     302                               * memory area < cache line
303                               */                  303                               */
304                              "vmovdqa64 %%zmm4    304                              "vmovdqa64 %%zmm4,%0\n\t"
305                              "vmovdqa64 %%zmm6    305                              "vmovdqa64 %%zmm6,%1\n\t"
306                              "vmovdqa64 %%zmm2    306                              "vmovdqa64 %%zmm2,%2\n\t"
307                              "vmovdqa64 %%zmm3    307                              "vmovdqa64 %%zmm3,%3"
308                              :                    308                              :
309                              : "m" (q[d]), "m"    309                              : "m" (q[d]), "m" (q[d+64]), "m" (p[d]),
310                                "m" (p[d+64]));    310                                "m" (p[d+64]));
311         }                                         311         }
312                                                   312 
313         asm volatile("sfence" : : : "memory");    313         asm volatile("sfence" : : : "memory");
314         kernel_fpu_end();                         314         kernel_fpu_end();
315 }                                                 315 }
316                                                   316 
317 const struct raid6_calls raid6_avx512x2 = {       317 const struct raid6_calls raid6_avx512x2 = {
318         raid6_avx5122_gen_syndrome,               318         raid6_avx5122_gen_syndrome,
319         raid6_avx5122_xor_syndrome,               319         raid6_avx5122_xor_syndrome,
320         raid6_have_avx512,                        320         raid6_have_avx512,
321         "avx512x2",                               321         "avx512x2",
322         .priority = 2           /* Prefer AVX5    322         .priority = 2           /* Prefer AVX512 over priority 1 (SSE2 and others) */
323 };                                                323 };
324                                                   324 
325 #ifdef CONFIG_X86_64                              325 #ifdef CONFIG_X86_64
326                                                   326 
327 /*                                                327 /*
328  * Unrolled-by-4 AVX2 implementation              328  * Unrolled-by-4 AVX2 implementation
329  */                                               329  */
330 static void raid6_avx5124_gen_syndrome(int dis    330 static void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs)
331 {                                                 331 {
332         u8 **dptr = (u8 **)ptrs;                  332         u8 **dptr = (u8 **)ptrs;
333         u8 *p, *q;                                333         u8 *p, *q;
334         int d, z, z0;                             334         int d, z, z0;
335                                                   335 
336         z0 = disks - 3;         /* Highest dat    336         z0 = disks - 3;         /* Highest data disk */
337         p = dptr[z0+1];         /* XOR parity     337         p = dptr[z0+1];         /* XOR parity */
338         q = dptr[z0+2];         /* RS syndrome    338         q = dptr[z0+2];         /* RS syndrome */
339                                                   339 
340         kernel_fpu_begin();                       340         kernel_fpu_begin();
341                                                   341 
342         asm volatile("vmovdqa64 %0,%%zmm0\n\t"    342         asm volatile("vmovdqa64 %0,%%zmm0\n\t"
343                      "vpxorq %%zmm1,%%zmm1,%%z    343                      "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t"       /* Zero temp */
344                      "vpxorq %%zmm2,%%zmm2,%%z    344                      "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"       /* P[0] */
345                      "vpxorq %%zmm3,%%zmm3,%%z    345                      "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"       /* P[1] */
346                      "vpxorq %%zmm4,%%zmm4,%%z    346                      "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"       /* Q[0] */
347                      "vpxorq %%zmm6,%%zmm6,%%z    347                      "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"       /* Q[1] */
348                      "vpxorq %%zmm10,%%zmm10,%    348                      "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"    /* P[2] */
349                      "vpxorq %%zmm11,%%zmm11,%    349                      "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"    /* P[3] */
350                      "vpxorq %%zmm12,%%zmm12,%    350                      "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"    /* Q[2] */
351                      "vpxorq %%zmm14,%%zmm14,%    351                      "vpxorq %%zmm14,%%zmm14,%%zmm14"        /* Q[3] */
352                      :                            352                      :
353                      : "m" (raid6_avx512_const    353                      : "m" (raid6_avx512_constants.x1d[0]));
354                                                   354 
355         for (d = 0; d < bytes; d += 256) {        355         for (d = 0; d < bytes; d += 256) {
356                 for (z = z0; z >= 0; z--) {       356                 for (z = z0; z >= 0; z--) {
357                 asm volatile("prefetchnta %0\n    357                 asm volatile("prefetchnta %0\n\t"
358                              "prefetchnta %1\n    358                              "prefetchnta %1\n\t"
359                              "prefetchnta %2\n    359                              "prefetchnta %2\n\t"
360                              "prefetchnta %3\n    360                              "prefetchnta %3\n\t"
361                              "vpcmpgtb %%zmm4,    361                              "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
362                              "vpcmpgtb %%zmm6,    362                              "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
363                              "vpcmpgtb %%zmm12    363                              "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t"
364                              "vpcmpgtb %%zmm14    364                              "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t"
365                              "vpmovm2b %%k1,%%    365                              "vpmovm2b %%k1,%%zmm5\n\t"
366                              "vpmovm2b %%k2,%%    366                              "vpmovm2b %%k2,%%zmm7\n\t"
367                              "vpmovm2b %%k3,%%    367                              "vpmovm2b %%k3,%%zmm13\n\t"
368                              "vpmovm2b %%k4,%%    368                              "vpmovm2b %%k4,%%zmm15\n\t"
369                              "vpaddb %%zmm4,%%    369                              "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
370                              "vpaddb %%zmm6,%%    370                              "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
371                              "vpaddb %%zmm12,%    371                              "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
372                              "vpaddb %%zmm14,%    372                              "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
373                              "vpandq %%zmm0,%%    373                              "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
374                              "vpandq %%zmm0,%%    374                              "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
375                              "vpandq %%zmm0,%%    375                              "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
376                              "vpandq %%zmm0,%%    376                              "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
377                              "vpxorq %%zmm5,%%    377                              "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
378                              "vpxorq %%zmm7,%%    378                              "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
379                              "vpxorq %%zmm13,%    379                              "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
380                              "vpxorq %%zmm15,%    380                              "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
381                              "vmovdqa64 %0,%%z    381                              "vmovdqa64 %0,%%zmm5\n\t"
382                              "vmovdqa64 %1,%%z    382                              "vmovdqa64 %1,%%zmm7\n\t"
383                              "vmovdqa64 %2,%%z    383                              "vmovdqa64 %2,%%zmm13\n\t"
384                              "vmovdqa64 %3,%%z    384                              "vmovdqa64 %3,%%zmm15\n\t"
385                              "vpxorq %%zmm5,%%    385                              "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
386                              "vpxorq %%zmm7,%%    386                              "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
387                              "vpxorq %%zmm13,%    387                              "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
388                              "vpxorq %%zmm15,%    388                              "vpxorq %%zmm15,%%zmm11,%%zmm11\n"
389                              "vpxorq %%zmm5,%%    389                              "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
390                              "vpxorq %%zmm7,%%    390                              "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
391                              "vpxorq %%zmm13,%    391                              "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
392                              "vpxorq %%zmm15,%    392                              "vpxorq %%zmm15,%%zmm14,%%zmm14"
393                              :                    393                              :
394                              : "m" (dptr[z][d]    394                              : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
395                                "m" (dptr[z][d+    395                                "m" (dptr[z][d+128]), "m" (dptr[z][d+192]));
396                 }                                 396                 }
397                 asm volatile("vmovntdq %%zmm2,    397                 asm volatile("vmovntdq %%zmm2,%0\n\t"
398                              "vpxorq %%zmm2,%%    398                              "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
399                              "vmovntdq %%zmm3,    399                              "vmovntdq %%zmm3,%1\n\t"
400                              "vpxorq %%zmm3,%%    400                              "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"
401                              "vmovntdq %%zmm10    401                              "vmovntdq %%zmm10,%2\n\t"
402                              "vpxorq %%zmm10,%    402                              "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"
403                              "vmovntdq %%zmm11    403                              "vmovntdq %%zmm11,%3\n\t"
404                              "vpxorq %%zmm11,%    404                              "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"
405                              "vmovntdq %%zmm4,    405                              "vmovntdq %%zmm4,%4\n\t"
406                              "vpxorq %%zmm4,%%    406                              "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"
407                              "vmovntdq %%zmm6,    407                              "vmovntdq %%zmm6,%5\n\t"
408                              "vpxorq %%zmm6,%%    408                              "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"
409                              "vmovntdq %%zmm12    409                              "vmovntdq %%zmm12,%6\n\t"
410                              "vpxorq %%zmm12,%    410                              "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"
411                              "vmovntdq %%zmm14    411                              "vmovntdq %%zmm14,%7\n\t"
412                              "vpxorq %%zmm14,%    412                              "vpxorq %%zmm14,%%zmm14,%%zmm14"
413                              :                    413                              :
414                              : "m" (p[d]), "m"    414                              : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
415                                "m" (p[d+192]),    415                                "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
416                                "m" (q[d+128]),    416                                "m" (q[d+128]), "m" (q[d+192]));
417         }                                         417         }
418                                                   418 
419         asm volatile("sfence" : : : "memory");    419         asm volatile("sfence" : : : "memory");
420         kernel_fpu_end();                         420         kernel_fpu_end();
421 }                                                 421 }
422                                                   422 
423 static void raid6_avx5124_xor_syndrome(int dis    423 static void raid6_avx5124_xor_syndrome(int disks, int start, int stop,
424                                        size_t     424                                        size_t bytes, void **ptrs)
425 {                                                 425 {
426         u8 **dptr = (u8 **)ptrs;                  426         u8 **dptr = (u8 **)ptrs;
427         u8 *p, *q;                                427         u8 *p, *q;
428         int d, z, z0;                             428         int d, z, z0;
429                                                   429 
430         z0 = stop;              /* P/Q right s    430         z0 = stop;              /* P/Q right side optimization */
431         p = dptr[disks-2];      /* XOR parity     431         p = dptr[disks-2];      /* XOR parity */
432         q = dptr[disks-1];      /* RS syndrome    432         q = dptr[disks-1];      /* RS syndrome */
433                                                   433 
434         kernel_fpu_begin();                       434         kernel_fpu_begin();
435                                                   435 
436         asm volatile("vmovdqa64 %0,%%zmm0"        436         asm volatile("vmovdqa64 %0,%%zmm0"
437                      :: "m" (raid6_avx512_cons    437                      :: "m" (raid6_avx512_constants.x1d[0]));
438                                                   438 
439         for (d = 0 ; d < bytes ; d += 256) {      439         for (d = 0 ; d < bytes ; d += 256) {
440                 asm volatile("vmovdqa64 %0,%%z    440                 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
441                              "vmovdqa64 %1,%%z    441                              "vmovdqa64 %1,%%zmm6\n\t"
442                              "vmovdqa64 %2,%%z    442                              "vmovdqa64 %2,%%zmm12\n\t"
443                              "vmovdqa64 %3,%%z    443                              "vmovdqa64 %3,%%zmm14\n\t"
444                              "vmovdqa64 %4,%%z    444                              "vmovdqa64 %4,%%zmm2\n\t"
445                              "vmovdqa64 %5,%%z    445                              "vmovdqa64 %5,%%zmm3\n\t"
446                              "vmovdqa64 %6,%%z    446                              "vmovdqa64 %6,%%zmm10\n\t"
447                              "vmovdqa64 %7,%%z    447                              "vmovdqa64 %7,%%zmm11\n\t"
448                              "vpxorq %%zmm4,%%    448                              "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
449                              "vpxorq %%zmm6,%%    449                              "vpxorq %%zmm6,%%zmm3,%%zmm3\n\t"
450                              "vpxorq %%zmm12,%    450                              "vpxorq %%zmm12,%%zmm10,%%zmm10\n\t"
451                              "vpxorq %%zmm14,%    451                              "vpxorq %%zmm14,%%zmm11,%%zmm11"
452                              :                    452                              :
453                              : "m" (dptr[z0][d    453                              : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
454                                "m" (dptr[z0][d    454                                "m" (dptr[z0][d+128]), "m" (dptr[z0][d+192]),
455                                "m" (p[d]), "m"    455                                "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
456                                "m" (p[d+192]))    456                                "m" (p[d+192]));
457                 /* P/Q data pages */              457                 /* P/Q data pages */
458                 for (z = z0-1 ; z >= start ; z    458                 for (z = z0-1 ; z >= start ; z--) {
459                         asm volatile("vpxorq %    459                         asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
460                                      "vpxorq %    460                                      "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
461                                      "vpxorq %    461                                      "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
462                                      "vpxorq %    462                                      "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
463                                      "prefetch    463                                      "prefetchnta %0\n\t"
464                                      "prefetch    464                                      "prefetchnta %2\n\t"
465                                      "vpcmpgtb    465                                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
466                                      "vpcmpgtb    466                                      "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
467                                      "vpcmpgtb    467                                      "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
468                                      "vpcmpgtb    468                                      "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
469                                      "vpmovm2b    469                                      "vpmovm2b %%k1,%%zmm5\n\t"
470                                      "vpmovm2b    470                                      "vpmovm2b %%k2,%%zmm7\n\t"
471                                      "vpmovm2b    471                                      "vpmovm2b %%k3,%%zmm13\n\t"
472                                      "vpmovm2b    472                                      "vpmovm2b %%k4,%%zmm15\n\t"
473                                      "vpaddb %    473                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
474                                      "vpaddb %    474                                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
475                                      "vpaddb %    475                                      "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
476                                      "vpaddb %    476                                      "vpaddb %%Zmm14,%%zmm14,%%zmm14\n\t"
477                                      "vpandq %    477                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
478                                      "vpandq %    478                                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
479                                      "vpandq %    479                                      "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
480                                      "vpandq %    480                                      "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
481                                      "vpxorq %    481                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
482                                      "vpxorq %    482                                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
483                                      "vpxorq %    483                                      "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
484                                      "vpxorq %    484                                      "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
485                                      "vmovdqa6    485                                      "vmovdqa64 %0,%%zmm5\n\t"
486                                      "vmovdqa6    486                                      "vmovdqa64 %1,%%zmm7\n\t"
487                                      "vmovdqa6    487                                      "vmovdqa64 %2,%%zmm13\n\t"
488                                      "vmovdqa6    488                                      "vmovdqa64 %3,%%zmm15\n\t"
489                                      "vpxorq %    489                                      "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
490                                      "vpxorq %    490                                      "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
491                                      "vpxorq %    491                                      "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
492                                      "vpxorq %    492                                      "vpxorq %%zmm15,%%zmm11,%%zmm11\n\t"
493                                      "vpxorq %    493                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
494                                      "vpxorq %    494                                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
495                                      "vpxorq %    495                                      "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
496                                      "vpxorq %    496                                      "vpxorq %%zmm15,%%zmm14,%%zmm14"
497                                      :            497                                      :
498                                      : "m" (dp    498                                      : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
499                                        "m" (dp    499                                        "m" (dptr[z][d+128]),
500                                        "m" (dp    500                                        "m" (dptr[z][d+192]));
501                 }                                 501                 }
502                 asm volatile("prefetchnta %0\n    502                 asm volatile("prefetchnta %0\n\t"
503                              "prefetchnta %1\n    503                              "prefetchnta %1\n\t"
504                              :                    504                              :
505                              : "m" (q[d]), "m"    505                              : "m" (q[d]), "m" (q[d+128]));
506                 /* P/Q left side optimization     506                 /* P/Q left side optimization */
507                 for (z = start-1 ; z >= 0 ; z-    507                 for (z = start-1 ; z >= 0 ; z--) {
508                         asm volatile("vpxorq %    508                         asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
509                                      "vpxorq %    509                                      "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
510                                      "vpxorq %    510                                      "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
511                                      "vpxorq %    511                                      "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
512                                      "vpcmpgtb    512                                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
513                                      "vpcmpgtb    513                                      "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
514                                      "vpcmpgtb    514                                      "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
515                                      "vpcmpgtb    515                                      "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
516                                      "vpmovm2b    516                                      "vpmovm2b %%k1,%%zmm5\n\t"
517                                      "vpmovm2b    517                                      "vpmovm2b %%k2,%%zmm7\n\t"
518                                      "vpmovm2b    518                                      "vpmovm2b %%k3,%%zmm13\n\t"
519                                      "vpmovm2b    519                                      "vpmovm2b %%k4,%%zmm15\n\t"
520                                      "vpaddb %    520                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
521                                      "vpaddb %    521                                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
522                                      "vpaddb %    522                                      "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
523                                      "vpaddb %    523                                      "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
524                                      "vpandq %    524                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
525                                      "vpandq %    525                                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
526                                      "vpandq %    526                                      "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
527                                      "vpandq %    527                                      "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
528                                      "vpxorq %    528                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
529                                      "vpxorq %    529                                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
530                                      "vpxorq %    530                                      "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
531                                      "vpxorq %    531                                      "vpxorq %%zmm15,%%zmm14,%%zmm14"
532                                      :            532                                      :
533                                      : );         533                                      : );
534                 }                                 534                 }
535                 asm volatile("vmovntdq %%zmm2,    535                 asm volatile("vmovntdq %%zmm2,%0\n\t"
536                              "vmovntdq %%zmm3,    536                              "vmovntdq %%zmm3,%1\n\t"
537                              "vmovntdq %%zmm10    537                              "vmovntdq %%zmm10,%2\n\t"
538                              "vmovntdq %%zmm11    538                              "vmovntdq %%zmm11,%3\n\t"
539                              "vpxorq %4,%%zmm4    539                              "vpxorq %4,%%zmm4,%%zmm4\n\t"
540                              "vpxorq %5,%%zmm6    540                              "vpxorq %5,%%zmm6,%%zmm6\n\t"
541                              "vpxorq %6,%%zmm1    541                              "vpxorq %6,%%zmm12,%%zmm12\n\t"
542                              "vpxorq %7,%%zmm1    542                              "vpxorq %7,%%zmm14,%%zmm14\n\t"
543                              "vmovntdq %%zmm4,    543                              "vmovntdq %%zmm4,%4\n\t"
544                              "vmovntdq %%zmm6,    544                              "vmovntdq %%zmm6,%5\n\t"
545                              "vmovntdq %%zmm12    545                              "vmovntdq %%zmm12,%6\n\t"
546                              "vmovntdq %%zmm14    546                              "vmovntdq %%zmm14,%7"
547                              :                    547                              :
548                              : "m" (p[d]),  "m    548                              : "m" (p[d]),  "m" (p[d+64]), "m" (p[d+128]),
549                                "m" (p[d+192]),    549                                "m" (p[d+192]), "m" (q[d]),  "m" (q[d+64]),
550                                "m" (q[d+128]),    550                                "m" (q[d+128]), "m" (q[d+192]));
551         }                                         551         }
552         asm volatile("sfence" : : : "memory");    552         asm volatile("sfence" : : : "memory");
553         kernel_fpu_end();                         553         kernel_fpu_end();
554 }                                                 554 }
555 const struct raid6_calls raid6_avx512x4 = {       555 const struct raid6_calls raid6_avx512x4 = {
556         raid6_avx5124_gen_syndrome,               556         raid6_avx5124_gen_syndrome,
557         raid6_avx5124_xor_syndrome,               557         raid6_avx5124_xor_syndrome,
558         raid6_have_avx512,                        558         raid6_have_avx512,
559         "avx512x4",                               559         "avx512x4",
560         .priority = 2           /* Prefer AVX5    560         .priority = 2           /* Prefer AVX512 over priority 1 (SSE2 and others) */
561 };                                                561 };
562 #endif                                            562 #endif
563                                                   563 
564 #endif /* CONFIG_AS_AVX512 */                     564 #endif /* CONFIG_AS_AVX512 */
565                                                   565 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php