~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/lib/raid6/avx512.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /lib/raid6/avx512.c (Version linux-6.12-rc7) and /lib/raid6/avx512.c (Version linux-4.11.12)


  1 // SPDX-License-Identifier: GPL-2.0-or-later   << 
  2 /* -*- linux-c -*- ---------------------------      1 /* -*- linux-c -*- --------------------------------------------------------
  3  *                                                  2  *
  4  *   Copyright (C) 2016 Intel Corporation           3  *   Copyright (C) 2016 Intel Corporation
  5  *                                                  4  *
  6  *   Author: Gayatri Kammela <gayatri.kammela@      5  *   Author: Gayatri Kammela <gayatri.kammela@intel.com>
  7  *   Author: Megha Dey <megha.dey@linux.intel.      6  *   Author: Megha Dey <megha.dey@linux.intel.com>
  8  *                                                  7  *
  9  *   Based on avx2.c: Copyright 2012 Yuanhan L      8  *   Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved
 10  *   Based on sse2.c: Copyright 2002 H. Peter       9  *   Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
 11  *                                                 10  *
                                                   >>  11  *   This program is free software; you can redistribute it and/or modify
                                                   >>  12  *   it under the terms of the GNU General Public License as published by
                                                   >>  13  *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
                                                   >>  14  *   Boston MA 02111-1307, USA; either version 2 of the License, or
                                                   >>  15  *   (at your option) any later version; incorporated herein by reference.
                                                   >>  16  *
 12  * -------------------------------------------     17  * -----------------------------------------------------------------------
 13  */                                                18  */
 14                                                    19 
 15 /*                                                 20 /*
 16  * AVX512 implementation of RAID-6 syndrome fu     21  * AVX512 implementation of RAID-6 syndrome functions
 17  *                                                 22  *
 18  */                                                23  */
 19                                                    24 
 20 #ifdef CONFIG_AS_AVX512                            25 #ifdef CONFIG_AS_AVX512
 21                                                    26 
 22 #include <linux/raid/pq.h>                         27 #include <linux/raid/pq.h>
 23 #include "x86.h"                                   28 #include "x86.h"
 24                                                    29 
 25 static const struct raid6_avx512_constants {       30 static const struct raid6_avx512_constants {
 26         u64 x1d[8];                                31         u64 x1d[8];
 27 } raid6_avx512_constants __aligned(512/8) = {  !!  32 } raid6_avx512_constants __aligned(512) = {
 28         { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1     33         { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
 29           0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1     34           0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
 30           0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1     35           0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
 31           0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1     36           0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
 32 };                                                 37 };
 33                                                    38 
 34 static int raid6_have_avx512(void)                 39 static int raid6_have_avx512(void)
 35 {                                                  40 {
 36         return boot_cpu_has(X86_FEATURE_AVX2)      41         return boot_cpu_has(X86_FEATURE_AVX2) &&
 37                 boot_cpu_has(X86_FEATURE_AVX)      42                 boot_cpu_has(X86_FEATURE_AVX) &&
 38                 boot_cpu_has(X86_FEATURE_AVX51     43                 boot_cpu_has(X86_FEATURE_AVX512F) &&
 39                 boot_cpu_has(X86_FEATURE_AVX51     44                 boot_cpu_has(X86_FEATURE_AVX512BW) &&
 40                 boot_cpu_has(X86_FEATURE_AVX51     45                 boot_cpu_has(X86_FEATURE_AVX512VL) &&
 41                 boot_cpu_has(X86_FEATURE_AVX51     46                 boot_cpu_has(X86_FEATURE_AVX512DQ);
 42 }                                                  47 }
 43                                                    48 
 44 static void raid6_avx5121_gen_syndrome(int dis     49 static void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs)
 45 {                                                  50 {
 46         u8 **dptr = (u8 **)ptrs;                   51         u8 **dptr = (u8 **)ptrs;
 47         u8 *p, *q;                                 52         u8 *p, *q;
 48         int d, z, z0;                              53         int d, z, z0;
 49                                                    54 
 50         z0 = disks - 3;         /* Highest dat     55         z0 = disks - 3;         /* Highest data disk */
 51         p = dptr[z0+1];         /* XOR parity      56         p = dptr[z0+1];         /* XOR parity */
 52         q = dptr[z0+2];         /* RS syndrome     57         q = dptr[z0+2];         /* RS syndrome */
 53                                                    58 
 54         kernel_fpu_begin();                        59         kernel_fpu_begin();
 55                                                    60 
 56         asm volatile("vmovdqa64 %0,%%zmm0\n\t"     61         asm volatile("vmovdqa64 %0,%%zmm0\n\t"
 57                      "vpxorq %%zmm1,%%zmm1,%%z     62                      "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
 58                      :                             63                      :
 59                      : "m" (raid6_avx512_const     64                      : "m" (raid6_avx512_constants.x1d[0]));
 60                                                    65 
 61         for (d = 0; d < bytes; d += 64) {          66         for (d = 0; d < bytes; d += 64) {
 62                 asm volatile("prefetchnta %0\n     67                 asm volatile("prefetchnta %0\n\t"
 63                              "vmovdqa64 %0,%%z     68                              "vmovdqa64 %0,%%zmm2\n\t"     /* P[0] */
 64                              "prefetchnta %1\n     69                              "prefetchnta %1\n\t"
 65                              "vmovdqa64 %%zmm2     70                              "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
 66                              "vmovdqa64 %1,%%z     71                              "vmovdqa64 %1,%%zmm6"
 67                              :                     72                              :
 68                              : "m" (dptr[z0][d     73                              : "m" (dptr[z0][d]), "m" (dptr[z0-1][d]));
 69                 for (z = z0-2; z >= 0; z--) {      74                 for (z = z0-2; z >= 0; z--) {
 70                         asm volatile("prefetch     75                         asm volatile("prefetchnta %0\n\t"
 71                                      "vpcmpgtb     76                                      "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
 72                                      "vpmovm2b     77                                      "vpmovm2b %%k1,%%zmm5\n\t"
 73                                      "vpaddb %     78                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
 74                                      "vpandq %     79                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
 75                                      "vpxorq %     80                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
 76                                      "vpxorq %     81                                      "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
 77                                      "vpxorq %     82                                      "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
 78                                      "vmovdqa6     83                                      "vmovdqa64 %0,%%zmm6"
 79                                      :             84                                      :
 80                                      : "m" (dp     85                                      : "m" (dptr[z][d]));
 81                 }                                  86                 }
 82                 asm volatile("vpcmpgtb %%zmm4,     87                 asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
 83                              "vpmovm2b %%k1,%%     88                              "vpmovm2b %%k1,%%zmm5\n\t"
 84                              "vpaddb %%zmm4,%%     89                              "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
 85                              "vpandq %%zmm0,%%     90                              "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
 86                              "vpxorq %%zmm5,%%     91                              "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
 87                              "vpxorq %%zmm6,%%     92                              "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
 88                              "vpxorq %%zmm6,%%     93                              "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
 89                              "vmovntdq %%zmm2,     94                              "vmovntdq %%zmm2,%0\n\t"
 90                              "vpxorq %%zmm2,%%     95                              "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
 91                              "vmovntdq %%zmm4,     96                              "vmovntdq %%zmm4,%1\n\t"
 92                              "vpxorq %%zmm4,%%     97                              "vpxorq %%zmm4,%%zmm4,%%zmm4"
 93                              :                     98                              :
 94                              : "m" (p[d]), "m"     99                              : "m" (p[d]), "m" (q[d]));
 95         }                                         100         }
 96                                                   101 
 97         asm volatile("sfence" : : : "memory");    102         asm volatile("sfence" : : : "memory");
 98         kernel_fpu_end();                         103         kernel_fpu_end();
 99 }                                                 104 }
100                                                   105 
101 static void raid6_avx5121_xor_syndrome(int dis    106 static void raid6_avx5121_xor_syndrome(int disks, int start, int stop,
102                                        size_t     107                                        size_t bytes, void **ptrs)
103 {                                                 108 {
104         u8 **dptr = (u8 **)ptrs;                  109         u8 **dptr = (u8 **)ptrs;
105         u8 *p, *q;                                110         u8 *p, *q;
106         int d, z, z0;                             111         int d, z, z0;
107                                                   112 
108         z0 = stop;              /* P/Q right s    113         z0 = stop;              /* P/Q right side optimization */
109         p = dptr[disks-2];      /* XOR parity     114         p = dptr[disks-2];      /* XOR parity */
110         q = dptr[disks-1];      /* RS syndrome    115         q = dptr[disks-1];      /* RS syndrome */
111                                                   116 
112         kernel_fpu_begin();                       117         kernel_fpu_begin();
113                                                   118 
114         asm volatile("vmovdqa64 %0,%%zmm0"        119         asm volatile("vmovdqa64 %0,%%zmm0"
115                      : : "m" (raid6_avx512_con    120                      : : "m" (raid6_avx512_constants.x1d[0]));
116                                                   121 
117         for (d = 0 ; d < bytes ; d += 64) {       122         for (d = 0 ; d < bytes ; d += 64) {
118                 asm volatile("vmovdqa64 %0,%%z    123                 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
119                              "vmovdqa64 %1,%%z    124                              "vmovdqa64 %1,%%zmm2\n\t"
120                              "vpxorq %%zmm4,%%    125                              "vpxorq %%zmm4,%%zmm2,%%zmm2"
121                              :                    126                              :
122                              : "m" (dptr[z0][d    127                              : "m" (dptr[z0][d]),  "m" (p[d]));
123                 /* P/Q data pages */              128                 /* P/Q data pages */
124                 for (z = z0-1 ; z >= start ; z    129                 for (z = z0-1 ; z >= start ; z--) {
125                         asm volatile("vpxorq %    130                         asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
126                                      "vpcmpgtb    131                                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
127                                      "vpmovm2b    132                                      "vpmovm2b %%k1,%%zmm5\n\t"
128                                      "vpaddb %    133                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
129                                      "vpandq %    134                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
130                                      "vpxorq %    135                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
131                                      "vmovdqa6    136                                      "vmovdqa64 %0,%%zmm5\n\t"
132                                      "vpxorq %    137                                      "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
133                                      "vpxorq %    138                                      "vpxorq %%zmm5,%%zmm4,%%zmm4"
134                                      :            139                                      :
135                                      : "m" (dp    140                                      : "m" (dptr[z][d]));
136                 }                                 141                 }
137                 /* P/Q left side optimization     142                 /* P/Q left side optimization */
138                 for (z = start-1 ; z >= 0 ; z-    143                 for (z = start-1 ; z >= 0 ; z--) {
139                         asm volatile("vpxorq %    144                         asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
140                                      "vpcmpgtb    145                                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
141                                      "vpmovm2b    146                                      "vpmovm2b %%k1,%%zmm5\n\t"
142                                      "vpaddb %    147                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
143                                      "vpandq %    148                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
144                                      "vpxorq %    149                                      "vpxorq %%zmm5,%%zmm4,%%zmm4"
145                                      :            150                                      :
146                                      : );         151                                      : );
147                 }                                 152                 }
148                 asm volatile("vpxorq %0,%%zmm4    153                 asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
149                 /* Don't use movntdq for r/w m    154                 /* Don't use movntdq for r/w memory area < cache line */
150                              "vmovdqa64 %%zmm4    155                              "vmovdqa64 %%zmm4,%0\n\t"
151                              "vmovdqa64 %%zmm2    156                              "vmovdqa64 %%zmm2,%1"
152                              :                    157                              :
153                              : "m" (q[d]), "m"    158                              : "m" (q[d]), "m" (p[d]));
154         }                                         159         }
155                                                   160 
156         asm volatile("sfence" : : : "memory");    161         asm volatile("sfence" : : : "memory");
157         kernel_fpu_end();                         162         kernel_fpu_end();
158 }                                                 163 }
159                                                   164 
160 const struct raid6_calls raid6_avx512x1 = {       165 const struct raid6_calls raid6_avx512x1 = {
161         raid6_avx5121_gen_syndrome,               166         raid6_avx5121_gen_syndrome,
162         raid6_avx5121_xor_syndrome,               167         raid6_avx5121_xor_syndrome,
163         raid6_have_avx512,                        168         raid6_have_avx512,
164         "avx512x1",                               169         "avx512x1",
165         .priority = 2           /* Prefer AVX5 !! 170         1                       /* Has cache hints */
166 };                                                171 };
167                                                   172 
168 /*                                                173 /*
169  * Unrolled-by-2 AVX512 implementation            174  * Unrolled-by-2 AVX512 implementation
170  */                                               175  */
171 static void raid6_avx5122_gen_syndrome(int dis    176 static void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs)
172 {                                                 177 {
173         u8 **dptr = (u8 **)ptrs;                  178         u8 **dptr = (u8 **)ptrs;
174         u8 *p, *q;                                179         u8 *p, *q;
175         int d, z, z0;                             180         int d, z, z0;
176                                                   181 
177         z0 = disks - 3;         /* Highest dat    182         z0 = disks - 3;         /* Highest data disk */
178         p = dptr[z0+1];         /* XOR parity     183         p = dptr[z0+1];         /* XOR parity */
179         q = dptr[z0+2];         /* RS syndrome    184         q = dptr[z0+2];         /* RS syndrome */
180                                                   185 
181         kernel_fpu_begin();                       186         kernel_fpu_begin();
182                                                   187 
183         asm volatile("vmovdqa64 %0,%%zmm0\n\t"    188         asm volatile("vmovdqa64 %0,%%zmm0\n\t"
184                      "vpxorq %%zmm1,%%zmm1,%%z    189                      "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
185                      :                            190                      :
186                      : "m" (raid6_avx512_const    191                      : "m" (raid6_avx512_constants.x1d[0]));
187                                                   192 
188         /* We uniformly assume a single prefet    193         /* We uniformly assume a single prefetch covers at least 64 bytes */
189         for (d = 0; d < bytes; d += 128) {        194         for (d = 0; d < bytes; d += 128) {
190                 asm volatile("prefetchnta %0\n    195                 asm volatile("prefetchnta %0\n\t"
191                              "prefetchnta %1\n    196                              "prefetchnta %1\n\t"
192                              "vmovdqa64 %0,%%z    197                              "vmovdqa64 %0,%%zmm2\n\t"      /* P[0] */
193                              "vmovdqa64 %1,%%z    198                              "vmovdqa64 %1,%%zmm3\n\t"      /* P[1] */
194                              "vmovdqa64 %%zmm2    199                              "vmovdqa64 %%zmm2,%%zmm4\n\t"  /* Q[0] */
195                              "vmovdqa64 %%zmm3    200                              "vmovdqa64 %%zmm3,%%zmm6"      /* Q[1] */
196                              :                    201                              :
197                              : "m" (dptr[z0][d    202                              : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]));
198                 for (z = z0-1; z >= 0; z--) {     203                 for (z = z0-1; z >= 0; z--) {
199                         asm volatile("prefetch    204                         asm volatile("prefetchnta %0\n\t"
200                                      "prefetch    205                                      "prefetchnta %1\n\t"
201                                      "vpcmpgtb    206                                      "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
202                                      "vpcmpgtb    207                                      "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
203                                      "vpmovm2b    208                                      "vpmovm2b %%k1,%%zmm5\n\t"
204                                      "vpmovm2b    209                                      "vpmovm2b %%k2,%%zmm7\n\t"
205                                      "vpaddb %    210                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
206                                      "vpaddb %    211                                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
207                                      "vpandq %    212                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
208                                      "vpandq %    213                                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
209                                      "vpxorq %    214                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
210                                      "vpxorq %    215                                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
211                                      "vmovdqa6    216                                      "vmovdqa64 %0,%%zmm5\n\t"
212                                      "vmovdqa6    217                                      "vmovdqa64 %1,%%zmm7\n\t"
213                                      "vpxorq %    218                                      "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
214                                      "vpxorq %    219                                      "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
215                                      "vpxorq %    220                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
216                                      "vpxorq %    221                                      "vpxorq %%zmm7,%%zmm6,%%zmm6"
217                                      :            222                                      :
218                                      : "m" (dp    223                                      : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
219                 }                                 224                 }
220                 asm volatile("vmovntdq %%zmm2,    225                 asm volatile("vmovntdq %%zmm2,%0\n\t"
221                              "vmovntdq %%zmm3,    226                              "vmovntdq %%zmm3,%1\n\t"
222                              "vmovntdq %%zmm4,    227                              "vmovntdq %%zmm4,%2\n\t"
223                              "vmovntdq %%zmm6,    228                              "vmovntdq %%zmm6,%3"
224                              :                    229                              :
225                              : "m" (p[d]), "m"    230                              : "m" (p[d]), "m" (p[d+64]), "m" (q[d]),
226                                "m" (q[d+64]));    231                                "m" (q[d+64]));
227         }                                         232         }
228                                                   233 
229         asm volatile("sfence" : : : "memory");    234         asm volatile("sfence" : : : "memory");
230         kernel_fpu_end();                         235         kernel_fpu_end();
231 }                                                 236 }
232                                                   237 
233 static void raid6_avx5122_xor_syndrome(int dis    238 static void raid6_avx5122_xor_syndrome(int disks, int start, int stop,
234                                        size_t     239                                        size_t bytes, void **ptrs)
235 {                                                 240 {
236         u8 **dptr = (u8 **)ptrs;                  241         u8 **dptr = (u8 **)ptrs;
237         u8 *p, *q;                                242         u8 *p, *q;
238         int d, z, z0;                             243         int d, z, z0;
239                                                   244 
240         z0 = stop;              /* P/Q right s    245         z0 = stop;              /* P/Q right side optimization */
241         p = dptr[disks-2];      /* XOR parity     246         p = dptr[disks-2];      /* XOR parity */
242         q = dptr[disks-1];      /* RS syndrome    247         q = dptr[disks-1];      /* RS syndrome */
243                                                   248 
244         kernel_fpu_begin();                       249         kernel_fpu_begin();
245                                                   250 
246         asm volatile("vmovdqa64 %0,%%zmm0"        251         asm volatile("vmovdqa64 %0,%%zmm0"
247                      : : "m" (raid6_avx512_con    252                      : : "m" (raid6_avx512_constants.x1d[0]));
248                                                   253 
249         for (d = 0 ; d < bytes ; d += 128) {      254         for (d = 0 ; d < bytes ; d += 128) {
250                 asm volatile("vmovdqa64 %0,%%z    255                 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
251                              "vmovdqa64 %1,%%z    256                              "vmovdqa64 %1,%%zmm6\n\t"
252                              "vmovdqa64 %2,%%z    257                              "vmovdqa64 %2,%%zmm2\n\t"
253                              "vmovdqa64 %3,%%z    258                              "vmovdqa64 %3,%%zmm3\n\t"
254                              "vpxorq %%zmm4,%%    259                              "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
255                              "vpxorq %%zmm6,%%    260                              "vpxorq %%zmm6,%%zmm3,%%zmm3"
256                              :                    261                              :
257                              : "m" (dptr[z0][d    262                              : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
258                                "m" (p[d]), "m"    263                                "m" (p[d]), "m" (p[d+64]));
259                 /* P/Q data pages */              264                 /* P/Q data pages */
260                 for (z = z0-1 ; z >= start ; z    265                 for (z = z0-1 ; z >= start ; z--) {
261                         asm volatile("vpxorq %    266                         asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
262                                      "vpxorq %    267                                      "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
263                                      "vpcmpgtb    268                                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
264                                      "vpcmpgtb    269                                      "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
265                                      "vpmovm2b    270                                      "vpmovm2b %%k1,%%zmm5\n\t"
266                                      "vpmovm2b    271                                      "vpmovm2b %%k2,%%zmm7\n\t"
267                                      "vpaddb %    272                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
268                                      "vpaddb %    273                                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
269                                      "vpandq %    274                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
270                                      "vpandq %    275                                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
271                                      "vpxorq %    276                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
272                                      "vpxorq %    277                                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
273                                      "vmovdqa6    278                                      "vmovdqa64 %0,%%zmm5\n\t"
274                                      "vmovdqa6    279                                      "vmovdqa64 %1,%%zmm7\n\t"
275                                      "vpxorq %    280                                      "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
276                                      "vpxorq %    281                                      "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
277                                      "vpxorq %    282                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
278                                      "vpxorq %    283                                      "vpxorq %%zmm7,%%zmm6,%%zmm6"
279                                      :            284                                      :
280                                      : "m" (dp    285                                      : "m" (dptr[z][d]),  "m" (dptr[z][d+64]));
281                 }                                 286                 }
282                 /* P/Q left side optimization     287                 /* P/Q left side optimization */
283                 for (z = start-1 ; z >= 0 ; z-    288                 for (z = start-1 ; z >= 0 ; z--) {
284                         asm volatile("vpxorq %    289                         asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
285                                      "vpxorq %    290                                      "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
286                                      "vpcmpgtb    291                                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
287                                      "vpcmpgtb    292                                      "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
288                                      "vpmovm2b    293                                      "vpmovm2b %%k1,%%zmm5\n\t"
289                                      "vpmovm2b    294                                      "vpmovm2b %%k2,%%zmm7\n\t"
290                                      "vpaddb %    295                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
291                                      "vpaddb %    296                                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
292                                      "vpandq %    297                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
293                                      "vpandq %    298                                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
294                                      "vpxorq %    299                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
295                                      "vpxorq %    300                                      "vpxorq %%zmm7,%%zmm6,%%zmm6"
296                                      :            301                                      :
297                                      : );         302                                      : );
298                 }                                 303                 }
299                 asm volatile("vpxorq %0,%%zmm4    304                 asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
300                              "vpxorq %1,%%zmm6    305                              "vpxorq %1,%%zmm6,%%zmm6\n\t"
301                              /* Don't use movn    306                              /* Don't use movntdq for r/w
302                               * memory area <     307                               * memory area < cache line
303                               */                  308                               */
304                              "vmovdqa64 %%zmm4    309                              "vmovdqa64 %%zmm4,%0\n\t"
305                              "vmovdqa64 %%zmm6    310                              "vmovdqa64 %%zmm6,%1\n\t"
306                              "vmovdqa64 %%zmm2    311                              "vmovdqa64 %%zmm2,%2\n\t"
307                              "vmovdqa64 %%zmm3    312                              "vmovdqa64 %%zmm3,%3"
308                              :                    313                              :
309                              : "m" (q[d]), "m"    314                              : "m" (q[d]), "m" (q[d+64]), "m" (p[d]),
310                                "m" (p[d+64]));    315                                "m" (p[d+64]));
311         }                                         316         }
312                                                   317 
313         asm volatile("sfence" : : : "memory");    318         asm volatile("sfence" : : : "memory");
314         kernel_fpu_end();                         319         kernel_fpu_end();
315 }                                                 320 }
316                                                   321 
317 const struct raid6_calls raid6_avx512x2 = {       322 const struct raid6_calls raid6_avx512x2 = {
318         raid6_avx5122_gen_syndrome,               323         raid6_avx5122_gen_syndrome,
319         raid6_avx5122_xor_syndrome,               324         raid6_avx5122_xor_syndrome,
320         raid6_have_avx512,                        325         raid6_have_avx512,
321         "avx512x2",                               326         "avx512x2",
322         .priority = 2           /* Prefer AVX5 !! 327         1                       /* Has cache hints */
323 };                                                328 };
324                                                   329 
325 #ifdef CONFIG_X86_64                              330 #ifdef CONFIG_X86_64
326                                                   331 
327 /*                                                332 /*
328  * Unrolled-by-4 AVX2 implementation              333  * Unrolled-by-4 AVX2 implementation
329  */                                               334  */
330 static void raid6_avx5124_gen_syndrome(int dis    335 static void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs)
331 {                                                 336 {
332         u8 **dptr = (u8 **)ptrs;                  337         u8 **dptr = (u8 **)ptrs;
333         u8 *p, *q;                                338         u8 *p, *q;
334         int d, z, z0;                             339         int d, z, z0;
335                                                   340 
336         z0 = disks - 3;         /* Highest dat    341         z0 = disks - 3;         /* Highest data disk */
337         p = dptr[z0+1];         /* XOR parity     342         p = dptr[z0+1];         /* XOR parity */
338         q = dptr[z0+2];         /* RS syndrome    343         q = dptr[z0+2];         /* RS syndrome */
339                                                   344 
340         kernel_fpu_begin();                       345         kernel_fpu_begin();
341                                                   346 
342         asm volatile("vmovdqa64 %0,%%zmm0\n\t"    347         asm volatile("vmovdqa64 %0,%%zmm0\n\t"
343                      "vpxorq %%zmm1,%%zmm1,%%z    348                      "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t"       /* Zero temp */
344                      "vpxorq %%zmm2,%%zmm2,%%z    349                      "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"       /* P[0] */
345                      "vpxorq %%zmm3,%%zmm3,%%z    350                      "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"       /* P[1] */
346                      "vpxorq %%zmm4,%%zmm4,%%z    351                      "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"       /* Q[0] */
347                      "vpxorq %%zmm6,%%zmm6,%%z    352                      "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"       /* Q[1] */
348                      "vpxorq %%zmm10,%%zmm10,%    353                      "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"    /* P[2] */
349                      "vpxorq %%zmm11,%%zmm11,%    354                      "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"    /* P[3] */
350                      "vpxorq %%zmm12,%%zmm12,%    355                      "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"    /* Q[2] */
351                      "vpxorq %%zmm14,%%zmm14,%    356                      "vpxorq %%zmm14,%%zmm14,%%zmm14"        /* Q[3] */
352                      :                            357                      :
353                      : "m" (raid6_avx512_const    358                      : "m" (raid6_avx512_constants.x1d[0]));
354                                                   359 
355         for (d = 0; d < bytes; d += 256) {        360         for (d = 0; d < bytes; d += 256) {
356                 for (z = z0; z >= 0; z--) {       361                 for (z = z0; z >= 0; z--) {
357                 asm volatile("prefetchnta %0\n    362                 asm volatile("prefetchnta %0\n\t"
358                              "prefetchnta %1\n    363                              "prefetchnta %1\n\t"
359                              "prefetchnta %2\n    364                              "prefetchnta %2\n\t"
360                              "prefetchnta %3\n    365                              "prefetchnta %3\n\t"
361                              "vpcmpgtb %%zmm4,    366                              "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
362                              "vpcmpgtb %%zmm6,    367                              "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
363                              "vpcmpgtb %%zmm12    368                              "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t"
364                              "vpcmpgtb %%zmm14    369                              "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t"
365                              "vpmovm2b %%k1,%%    370                              "vpmovm2b %%k1,%%zmm5\n\t"
366                              "vpmovm2b %%k2,%%    371                              "vpmovm2b %%k2,%%zmm7\n\t"
367                              "vpmovm2b %%k3,%%    372                              "vpmovm2b %%k3,%%zmm13\n\t"
368                              "vpmovm2b %%k4,%%    373                              "vpmovm2b %%k4,%%zmm15\n\t"
369                              "vpaddb %%zmm4,%%    374                              "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
370                              "vpaddb %%zmm6,%%    375                              "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
371                              "vpaddb %%zmm12,%    376                              "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
372                              "vpaddb %%zmm14,%    377                              "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
373                              "vpandq %%zmm0,%%    378                              "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
374                              "vpandq %%zmm0,%%    379                              "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
375                              "vpandq %%zmm0,%%    380                              "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
376                              "vpandq %%zmm0,%%    381                              "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
377                              "vpxorq %%zmm5,%%    382                              "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
378                              "vpxorq %%zmm7,%%    383                              "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
379                              "vpxorq %%zmm13,%    384                              "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
380                              "vpxorq %%zmm15,%    385                              "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
381                              "vmovdqa64 %0,%%z    386                              "vmovdqa64 %0,%%zmm5\n\t"
382                              "vmovdqa64 %1,%%z    387                              "vmovdqa64 %1,%%zmm7\n\t"
383                              "vmovdqa64 %2,%%z    388                              "vmovdqa64 %2,%%zmm13\n\t"
384                              "vmovdqa64 %3,%%z    389                              "vmovdqa64 %3,%%zmm15\n\t"
385                              "vpxorq %%zmm5,%%    390                              "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
386                              "vpxorq %%zmm7,%%    391                              "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
387                              "vpxorq %%zmm13,%    392                              "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
388                              "vpxorq %%zmm15,%    393                              "vpxorq %%zmm15,%%zmm11,%%zmm11\n"
389                              "vpxorq %%zmm5,%%    394                              "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
390                              "vpxorq %%zmm7,%%    395                              "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
391                              "vpxorq %%zmm13,%    396                              "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
392                              "vpxorq %%zmm15,%    397                              "vpxorq %%zmm15,%%zmm14,%%zmm14"
393                              :                    398                              :
394                              : "m" (dptr[z][d]    399                              : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
395                                "m" (dptr[z][d+    400                                "m" (dptr[z][d+128]), "m" (dptr[z][d+192]));
396                 }                                 401                 }
397                 asm volatile("vmovntdq %%zmm2,    402                 asm volatile("vmovntdq %%zmm2,%0\n\t"
398                              "vpxorq %%zmm2,%%    403                              "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
399                              "vmovntdq %%zmm3,    404                              "vmovntdq %%zmm3,%1\n\t"
400                              "vpxorq %%zmm3,%%    405                              "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"
401                              "vmovntdq %%zmm10    406                              "vmovntdq %%zmm10,%2\n\t"
402                              "vpxorq %%zmm10,%    407                              "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"
403                              "vmovntdq %%zmm11    408                              "vmovntdq %%zmm11,%3\n\t"
404                              "vpxorq %%zmm11,%    409                              "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"
405                              "vmovntdq %%zmm4,    410                              "vmovntdq %%zmm4,%4\n\t"
406                              "vpxorq %%zmm4,%%    411                              "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"
407                              "vmovntdq %%zmm6,    412                              "vmovntdq %%zmm6,%5\n\t"
408                              "vpxorq %%zmm6,%%    413                              "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"
409                              "vmovntdq %%zmm12    414                              "vmovntdq %%zmm12,%6\n\t"
410                              "vpxorq %%zmm12,%    415                              "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"
411                              "vmovntdq %%zmm14    416                              "vmovntdq %%zmm14,%7\n\t"
412                              "vpxorq %%zmm14,%    417                              "vpxorq %%zmm14,%%zmm14,%%zmm14"
413                              :                    418                              :
414                              : "m" (p[d]), "m"    419                              : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
415                                "m" (p[d+192]),    420                                "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
416                                "m" (q[d+128]),    421                                "m" (q[d+128]), "m" (q[d+192]));
417         }                                         422         }
418                                                   423 
419         asm volatile("sfence" : : : "memory");    424         asm volatile("sfence" : : : "memory");
420         kernel_fpu_end();                         425         kernel_fpu_end();
421 }                                                 426 }
422                                                   427 
423 static void raid6_avx5124_xor_syndrome(int dis    428 static void raid6_avx5124_xor_syndrome(int disks, int start, int stop,
424                                        size_t     429                                        size_t bytes, void **ptrs)
425 {                                                 430 {
426         u8 **dptr = (u8 **)ptrs;                  431         u8 **dptr = (u8 **)ptrs;
427         u8 *p, *q;                                432         u8 *p, *q;
428         int d, z, z0;                             433         int d, z, z0;
429                                                   434 
430         z0 = stop;              /* P/Q right s    435         z0 = stop;              /* P/Q right side optimization */
431         p = dptr[disks-2];      /* XOR parity     436         p = dptr[disks-2];      /* XOR parity */
432         q = dptr[disks-1];      /* RS syndrome    437         q = dptr[disks-1];      /* RS syndrome */
433                                                   438 
434         kernel_fpu_begin();                       439         kernel_fpu_begin();
435                                                   440 
436         asm volatile("vmovdqa64 %0,%%zmm0"        441         asm volatile("vmovdqa64 %0,%%zmm0"
437                      :: "m" (raid6_avx512_cons    442                      :: "m" (raid6_avx512_constants.x1d[0]));
438                                                   443 
439         for (d = 0 ; d < bytes ; d += 256) {      444         for (d = 0 ; d < bytes ; d += 256) {
440                 asm volatile("vmovdqa64 %0,%%z    445                 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
441                              "vmovdqa64 %1,%%z    446                              "vmovdqa64 %1,%%zmm6\n\t"
442                              "vmovdqa64 %2,%%z    447                              "vmovdqa64 %2,%%zmm12\n\t"
443                              "vmovdqa64 %3,%%z    448                              "vmovdqa64 %3,%%zmm14\n\t"
444                              "vmovdqa64 %4,%%z    449                              "vmovdqa64 %4,%%zmm2\n\t"
445                              "vmovdqa64 %5,%%z    450                              "vmovdqa64 %5,%%zmm3\n\t"
446                              "vmovdqa64 %6,%%z    451                              "vmovdqa64 %6,%%zmm10\n\t"
447                              "vmovdqa64 %7,%%z    452                              "vmovdqa64 %7,%%zmm11\n\t"
448                              "vpxorq %%zmm4,%%    453                              "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
449                              "vpxorq %%zmm6,%%    454                              "vpxorq %%zmm6,%%zmm3,%%zmm3\n\t"
450                              "vpxorq %%zmm12,%    455                              "vpxorq %%zmm12,%%zmm10,%%zmm10\n\t"
451                              "vpxorq %%zmm14,%    456                              "vpxorq %%zmm14,%%zmm11,%%zmm11"
452                              :                    457                              :
453                              : "m" (dptr[z0][d    458                              : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
454                                "m" (dptr[z0][d    459                                "m" (dptr[z0][d+128]), "m" (dptr[z0][d+192]),
455                                "m" (p[d]), "m"    460                                "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
456                                "m" (p[d+192]))    461                                "m" (p[d+192]));
457                 /* P/Q data pages */              462                 /* P/Q data pages */
458                 for (z = z0-1 ; z >= start ; z    463                 for (z = z0-1 ; z >= start ; z--) {
459                         asm volatile("vpxorq %    464                         asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
460                                      "vpxorq %    465                                      "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
461                                      "vpxorq %    466                                      "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
462                                      "vpxorq %    467                                      "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
463                                      "prefetch    468                                      "prefetchnta %0\n\t"
464                                      "prefetch    469                                      "prefetchnta %2\n\t"
465                                      "vpcmpgtb    470                                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
466                                      "vpcmpgtb    471                                      "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
467                                      "vpcmpgtb    472                                      "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
468                                      "vpcmpgtb    473                                      "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
469                                      "vpmovm2b    474                                      "vpmovm2b %%k1,%%zmm5\n\t"
470                                      "vpmovm2b    475                                      "vpmovm2b %%k2,%%zmm7\n\t"
471                                      "vpmovm2b    476                                      "vpmovm2b %%k3,%%zmm13\n\t"
472                                      "vpmovm2b    477                                      "vpmovm2b %%k4,%%zmm15\n\t"
473                                      "vpaddb %    478                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
474                                      "vpaddb %    479                                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
475                                      "vpaddb %    480                                      "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
476                                      "vpaddb %    481                                      "vpaddb %%Zmm14,%%zmm14,%%zmm14\n\t"
477                                      "vpandq %    482                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
478                                      "vpandq %    483                                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
479                                      "vpandq %    484                                      "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
480                                      "vpandq %    485                                      "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
481                                      "vpxorq %    486                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
482                                      "vpxorq %    487                                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
483                                      "vpxorq %    488                                      "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
484                                      "vpxorq %    489                                      "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
485                                      "vmovdqa6    490                                      "vmovdqa64 %0,%%zmm5\n\t"
486                                      "vmovdqa6    491                                      "vmovdqa64 %1,%%zmm7\n\t"
487                                      "vmovdqa6    492                                      "vmovdqa64 %2,%%zmm13\n\t"
488                                      "vmovdqa6    493                                      "vmovdqa64 %3,%%zmm15\n\t"
489                                      "vpxorq %    494                                      "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
490                                      "vpxorq %    495                                      "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
491                                      "vpxorq %    496                                      "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
492                                      "vpxorq %    497                                      "vpxorq %%zmm15,%%zmm11,%%zmm11\n\t"
493                                      "vpxorq %    498                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
494                                      "vpxorq %    499                                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
495                                      "vpxorq %    500                                      "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
496                                      "vpxorq %    501                                      "vpxorq %%zmm15,%%zmm14,%%zmm14"
497                                      :            502                                      :
498                                      : "m" (dp    503                                      : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
499                                        "m" (dp    504                                        "m" (dptr[z][d+128]),
500                                        "m" (dp    505                                        "m" (dptr[z][d+192]));
501                 }                                 506                 }
502                 asm volatile("prefetchnta %0\n    507                 asm volatile("prefetchnta %0\n\t"
503                              "prefetchnta %1\n    508                              "prefetchnta %1\n\t"
504                              :                    509                              :
505                              : "m" (q[d]), "m"    510                              : "m" (q[d]), "m" (q[d+128]));
506                 /* P/Q left side optimization     511                 /* P/Q left side optimization */
507                 for (z = start-1 ; z >= 0 ; z-    512                 for (z = start-1 ; z >= 0 ; z--) {
508                         asm volatile("vpxorq %    513                         asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
509                                      "vpxorq %    514                                      "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
510                                      "vpxorq %    515                                      "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
511                                      "vpxorq %    516                                      "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
512                                      "vpcmpgtb    517                                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
513                                      "vpcmpgtb    518                                      "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
514                                      "vpcmpgtb    519                                      "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
515                                      "vpcmpgtb    520                                      "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
516                                      "vpmovm2b    521                                      "vpmovm2b %%k1,%%zmm5\n\t"
517                                      "vpmovm2b    522                                      "vpmovm2b %%k2,%%zmm7\n\t"
518                                      "vpmovm2b    523                                      "vpmovm2b %%k3,%%zmm13\n\t"
519                                      "vpmovm2b    524                                      "vpmovm2b %%k4,%%zmm15\n\t"
520                                      "vpaddb %    525                                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
521                                      "vpaddb %    526                                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
522                                      "vpaddb %    527                                      "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
523                                      "vpaddb %    528                                      "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
524                                      "vpandq %    529                                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
525                                      "vpandq %    530                                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
526                                      "vpandq %    531                                      "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
527                                      "vpandq %    532                                      "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
528                                      "vpxorq %    533                                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
529                                      "vpxorq %    534                                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
530                                      "vpxorq %    535                                      "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
531                                      "vpxorq %    536                                      "vpxorq %%zmm15,%%zmm14,%%zmm14"
532                                      :            537                                      :
533                                      : );         538                                      : );
534                 }                                 539                 }
535                 asm volatile("vmovntdq %%zmm2,    540                 asm volatile("vmovntdq %%zmm2,%0\n\t"
536                              "vmovntdq %%zmm3,    541                              "vmovntdq %%zmm3,%1\n\t"
537                              "vmovntdq %%zmm10    542                              "vmovntdq %%zmm10,%2\n\t"
538                              "vmovntdq %%zmm11    543                              "vmovntdq %%zmm11,%3\n\t"
539                              "vpxorq %4,%%zmm4    544                              "vpxorq %4,%%zmm4,%%zmm4\n\t"
540                              "vpxorq %5,%%zmm6    545                              "vpxorq %5,%%zmm6,%%zmm6\n\t"
541                              "vpxorq %6,%%zmm1    546                              "vpxorq %6,%%zmm12,%%zmm12\n\t"
542                              "vpxorq %7,%%zmm1    547                              "vpxorq %7,%%zmm14,%%zmm14\n\t"
543                              "vmovntdq %%zmm4,    548                              "vmovntdq %%zmm4,%4\n\t"
544                              "vmovntdq %%zmm6,    549                              "vmovntdq %%zmm6,%5\n\t"
545                              "vmovntdq %%zmm12    550                              "vmovntdq %%zmm12,%6\n\t"
546                              "vmovntdq %%zmm14    551                              "vmovntdq %%zmm14,%7"
547                              :                    552                              :
548                              : "m" (p[d]),  "m    553                              : "m" (p[d]),  "m" (p[d+64]), "m" (p[d+128]),
549                                "m" (p[d+192]),    554                                "m" (p[d+192]), "m" (q[d]),  "m" (q[d+64]),
550                                "m" (q[d+128]),    555                                "m" (q[d+128]), "m" (q[d+192]));
551         }                                         556         }
552         asm volatile("sfence" : : : "memory");    557         asm volatile("sfence" : : : "memory");
553         kernel_fpu_end();                         558         kernel_fpu_end();
554 }                                                 559 }
555 const struct raid6_calls raid6_avx512x4 = {       560 const struct raid6_calls raid6_avx512x4 = {
556         raid6_avx5124_gen_syndrome,               561         raid6_avx5124_gen_syndrome,
557         raid6_avx5124_xor_syndrome,               562         raid6_avx5124_xor_syndrome,
558         raid6_have_avx512,                        563         raid6_have_avx512,
559         "avx512x4",                               564         "avx512x4",
560         .priority = 2           /* Prefer AVX5 !! 565         1                       /* Has cache hints */
561 };                                                566 };
562 #endif                                            567 #endif
563                                                   568 
564 #endif /* CONFIG_AS_AVX512 */                     569 #endif /* CONFIG_AS_AVX512 */
565                                                   570 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php