~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/lib/raid6/recov_avx512.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /lib/raid6/recov_avx512.c (Version linux-6.11-rc3) and /lib/raid6/recov_avx512.c (Version linux-6.7.12)


  1 // SPDX-License-Identifier: GPL-2.0-only            1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*                                                  2 /*
  3  * Copyright (C) 2016 Intel Corporation             3  * Copyright (C) 2016 Intel Corporation
  4  *                                                  4  *
  5  * Author: Gayatri Kammela <gayatri.kammela@in      5  * Author: Gayatri Kammela <gayatri.kammela@intel.com>
  6  * Author: Megha Dey <megha.dey@linux.intel.co      6  * Author: Megha Dey <megha.dey@linux.intel.com>
  7  */                                                 7  */
  8                                                     8 
  9 #ifdef CONFIG_AS_AVX512                             9 #ifdef CONFIG_AS_AVX512
 10                                                    10 
 11 #include <linux/raid/pq.h>                         11 #include <linux/raid/pq.h>
 12 #include "x86.h"                                   12 #include "x86.h"
 13                                                    13 
 14 static int raid6_has_avx512(void)                  14 static int raid6_has_avx512(void)
 15 {                                                  15 {
 16         return boot_cpu_has(X86_FEATURE_AVX2)      16         return boot_cpu_has(X86_FEATURE_AVX2) &&
 17                 boot_cpu_has(X86_FEATURE_AVX)      17                 boot_cpu_has(X86_FEATURE_AVX) &&
 18                 boot_cpu_has(X86_FEATURE_AVX51     18                 boot_cpu_has(X86_FEATURE_AVX512F) &&
 19                 boot_cpu_has(X86_FEATURE_AVX51     19                 boot_cpu_has(X86_FEATURE_AVX512BW) &&
 20                 boot_cpu_has(X86_FEATURE_AVX51     20                 boot_cpu_has(X86_FEATURE_AVX512VL) &&
 21                 boot_cpu_has(X86_FEATURE_AVX51     21                 boot_cpu_has(X86_FEATURE_AVX512DQ);
 22 }                                                  22 }
 23                                                    23 
 24 static void raid6_2data_recov_avx512(int disks     24 static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
 25                                      int failb     25                                      int failb, void **ptrs)
 26 {                                                  26 {
 27         u8 *p, *q, *dp, *dq;                       27         u8 *p, *q, *dp, *dq;
 28         const u8 *pbmul;        /* P multiplie     28         const u8 *pbmul;        /* P multiplier table for B data */
 29         const u8 *qmul;         /* Q multiplie     29         const u8 *qmul;         /* Q multiplier table (for both) */
 30         const u8 x0f = 0x0f;                       30         const u8 x0f = 0x0f;
 31                                                    31 
 32         p = (u8 *)ptrs[disks-2];                   32         p = (u8 *)ptrs[disks-2];
 33         q = (u8 *)ptrs[disks-1];                   33         q = (u8 *)ptrs[disks-1];
 34                                                    34 
 35         /*                                         35         /*
 36          * Compute syndrome with zero for the      36          * Compute syndrome with zero for the missing data pages
 37          * Use the dead data pages as temporar     37          * Use the dead data pages as temporary storage for
 38          * delta p and delta q                     38          * delta p and delta q
 39          */                                        39          */
 40                                                    40 
 41         dp = (u8 *)ptrs[faila];                    41         dp = (u8 *)ptrs[faila];
 42         ptrs[faila] = (void *)raid6_empty_zero     42         ptrs[faila] = (void *)raid6_empty_zero_page;
 43         ptrs[disks-2] = dp;                        43         ptrs[disks-2] = dp;
 44         dq = (u8 *)ptrs[failb];                    44         dq = (u8 *)ptrs[failb];
 45         ptrs[failb] = (void *)raid6_empty_zero     45         ptrs[failb] = (void *)raid6_empty_zero_page;
 46         ptrs[disks-1] = dq;                        46         ptrs[disks-1] = dq;
 47                                                    47 
 48         raid6_call.gen_syndrome(disks, bytes,      48         raid6_call.gen_syndrome(disks, bytes, ptrs);
 49                                                    49 
 50         /* Restore pointer table */                50         /* Restore pointer table */
 51         ptrs[faila]   = dp;                        51         ptrs[faila]   = dp;
 52         ptrs[failb]   = dq;                        52         ptrs[failb]   = dq;
 53         ptrs[disks-2] = p;                         53         ptrs[disks-2] = p;
 54         ptrs[disks-1] = q;                         54         ptrs[disks-1] = q;
 55                                                    55 
 56         /* Now, pick the proper data tables */     56         /* Now, pick the proper data tables */
 57         pbmul = raid6_vgfmul[raid6_gfexi[failb     57         pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
 58         qmul  = raid6_vgfmul[raid6_gfinv[raid6     58         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
 59                 raid6_gfexp[failb]]];              59                 raid6_gfexp[failb]]];
 60                                                    60 
 61         kernel_fpu_begin();                        61         kernel_fpu_begin();
 62                                                    62 
 63         /* zmm0 = x0f[16] */                       63         /* zmm0 = x0f[16] */
 64         asm volatile("vpbroadcastb %0, %%zmm7"     64         asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
 65                                                    65 
 66         while (bytes) {                            66         while (bytes) {
 67 #ifdef CONFIG_X86_64                               67 #ifdef CONFIG_X86_64
 68                 asm volatile("vmovdqa64 %0, %%     68                 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
 69                              "vmovdqa64 %1, %%     69                              "vmovdqa64 %1, %%zmm9\n\t"
 70                              "vmovdqa64 %2, %%     70                              "vmovdqa64 %2, %%zmm0\n\t"
 71                              "vmovdqa64 %3, %%     71                              "vmovdqa64 %3, %%zmm8\n\t"
 72                              "vpxorq %4, %%zmm     72                              "vpxorq %4, %%zmm1, %%zmm1\n\t"
 73                              "vpxorq %5, %%zmm     73                              "vpxorq %5, %%zmm9, %%zmm9\n\t"
 74                              "vpxorq %6, %%zmm     74                              "vpxorq %6, %%zmm0, %%zmm0\n\t"
 75                              "vpxorq %7, %%zmm     75                              "vpxorq %7, %%zmm8, %%zmm8"
 76                              :                     76                              :
 77                              : "m" (q[0]), "m"     77                              : "m" (q[0]), "m" (q[64]), "m" (p[0]),
 78                                "m" (p[64]), "m     78                                "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
 79                                "m" (dp[0]), "m     79                                "m" (dp[0]), "m" (dp[64]));
 80                                                    80 
 81                 /*                                 81                 /*
 82                  * 1 = dq[0]  ^ q[0]               82                  * 1 = dq[0]  ^ q[0]
 83                  * 9 = dq[64] ^ q[64]              83                  * 9 = dq[64] ^ q[64]
 84                  * 0 = dp[0]  ^ p[0]               84                  * 0 = dp[0]  ^ p[0]
 85                  * 8 = dp[64] ^ p[64]              85                  * 8 = dp[64] ^ p[64]
 86                  */                                86                  */
 87                                                    87 
 88                 asm volatile("vbroadcasti64x2      88                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
 89                              "vbroadcasti64x2      89                              "vbroadcasti64x2 %1, %%zmm5"
 90                              :                     90                              :
 91                              : "m" (qmul[0]),      91                              : "m" (qmul[0]), "m" (qmul[16]));
 92                                                    92 
 93                 asm volatile("vpsraw $4, %%zmm     93                 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
 94                              "vpsraw $4, %%zmm     94                              "vpsraw $4, %%zmm9, %%zmm12\n\t"
 95                              "vpandq %%zmm7, %     95                              "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
 96                              "vpandq %%zmm7, %     96                              "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
 97                              "vpandq %%zmm7, %     97                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
 98                              "vpandq %%zmm7, %     98                              "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
 99                              "vpshufb %%zmm9,      99                              "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
100                              "vpshufb %%zmm1,     100                              "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
101                              "vpshufb %%zmm12,    101                              "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
102                              "vpshufb %%zmm3,     102                              "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
103                              "vpxorq %%zmm14,     103                              "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
104                              "vpxorq %%zmm4, %    104                              "vpxorq %%zmm4, %%zmm5, %%zmm5"
105                              :                    105                              :
106                              : );                 106                              : );
107                                                   107 
108                 /*                                108                 /*
109                  * 5 = qx[0]                      109                  * 5 = qx[0]
110                  * 15 = qx[64]                    110                  * 15 = qx[64]
111                  */                               111                  */
112                                                   112 
113                 asm volatile("vbroadcasti64x2     113                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
114                              "vbroadcasti64x2     114                              "vbroadcasti64x2 %1, %%zmm1\n\t"
115                              "vpsraw $4, %%zmm    115                              "vpsraw $4, %%zmm0, %%zmm2\n\t"
116                              "vpsraw $4, %%zmm    116                              "vpsraw $4, %%zmm8, %%zmm6\n\t"
117                              "vpandq %%zmm7, %    117                              "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
118                              "vpandq %%zmm7, %    118                              "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
119                              "vpandq %%zmm7, %    119                              "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
120                              "vpandq %%zmm7, %    120                              "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
121                              "vpshufb %%zmm14,    121                              "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
122                              "vpshufb %%zmm3,     122                              "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
123                              "vpshufb %%zmm6,     123                              "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
124                              "vpshufb %%zmm2,     124                              "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
125                              "vpxorq %%zmm4, %    125                              "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
126                              "vpxorq %%zmm12,     126                              "vpxorq %%zmm12, %%zmm13, %%zmm13"
127                              :                    127                              :
128                              : "m" (pbmul[0]),    128                              : "m" (pbmul[0]), "m" (pbmul[16]));
129                                                   129 
130                 /*                                130                 /*
131                  * 1  = pbmul[px[0]]              131                  * 1  = pbmul[px[0]]
132                  * 13 = pbmul[px[64]]             132                  * 13 = pbmul[px[64]]
133                  */                               133                  */
134                 asm volatile("vpxorq %%zmm5, %    134                 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
135                              "vpxorq %%zmm15,     135                              "vpxorq %%zmm15, %%zmm13, %%zmm13"
136                              :                    136                              :
137                              : );                 137                              : );
138                                                   138 
139                 /*                                139                 /*
140                  * 1 = db = DQ                    140                  * 1 = db = DQ
141                  * 13 = db[64] = DQ[64]           141                  * 13 = db[64] = DQ[64]
142                  */                               142                  */
143                 asm volatile("vmovdqa64 %%zmm1    143                 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
144                              "vmovdqa64 %%zmm1    144                              "vmovdqa64 %%zmm13,%1\n\t"
145                              "vpxorq %%zmm1, %    145                              "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
146                              "vpxorq %%zmm13,     146                              "vpxorq %%zmm13, %%zmm8, %%zmm8"
147                              :                    147                              :
148                              : "m" (dq[0]), "m    148                              : "m" (dq[0]), "m" (dq[64]));
149                                                   149 
150                 asm volatile("vmovdqa64 %%zmm0    150                 asm volatile("vmovdqa64 %%zmm0, %0\n\t"
151                              "vmovdqa64 %%zmm8    151                              "vmovdqa64 %%zmm8, %1"
152                              :                    152                              :
153                              : "m" (dp[0]), "m    153                              : "m" (dp[0]), "m" (dp[64]));
154                                                   154 
155                 bytes -= 128;                     155                 bytes -= 128;
156                 p += 128;                         156                 p += 128;
157                 q += 128;                         157                 q += 128;
158                 dp += 128;                        158                 dp += 128;
159                 dq += 128;                        159                 dq += 128;
160 #else                                             160 #else
161                 asm volatile("vmovdqa64 %0, %%    161                 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
162                              "vmovdqa64 %1, %%    162                              "vmovdqa64 %1, %%zmm0\n\t"
163                              "vpxorq %2, %%zmm    163                              "vpxorq %2, %%zmm1, %%zmm1\n\t"
164                              "vpxorq %3, %%zmm    164                              "vpxorq %3, %%zmm0, %%zmm0"
165                              :                    165                              :
166                              : "m" (*q), "m" (    166                              : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
167                                                   167 
168                 /* 1 = dq ^ q;  0 = dp ^ p */     168                 /* 1 = dq ^ q;  0 = dp ^ p */
169                                                   169 
170                 asm volatile("vbroadcasti64x2     170                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
171                              "vbroadcasti64x2     171                              "vbroadcasti64x2 %1, %%zmm5"
172                              :                    172                              :
173                              : "m" (qmul[0]),     173                              : "m" (qmul[0]), "m" (qmul[16]));
174                                                   174 
175                 /*                                175                 /*
176                  * 1 = dq ^ q                     176                  * 1 = dq ^ q
177                  * 3 = dq ^ p >> 4                177                  * 3 = dq ^ p >> 4
178                  */                               178                  */
179                 asm volatile("vpsraw $4, %%zmm    179                 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
180                              "vpandq %%zmm7, %    180                              "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
181                              "vpandq %%zmm7, %    181                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
182                              "vpshufb %%zmm1,     182                              "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
183                              "vpshufb %%zmm3,     183                              "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
184                              "vpxorq %%zmm4, %    184                              "vpxorq %%zmm4, %%zmm5, %%zmm5"
185                              :                    185                              :
186                              : );                 186                              : );
187                                                   187 
188                 /* 5 = qx */                      188                 /* 5 = qx */
189                                                   189 
190                 asm volatile("vbroadcasti64x2     190                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
191                              "vbroadcasti64x2     191                              "vbroadcasti64x2 %1, %%zmm1"
192                              :                    192                              :
193                              : "m" (pbmul[0]),    193                              : "m" (pbmul[0]), "m" (pbmul[16]));
194                                                   194 
195                 asm volatile("vpsraw $4, %%zmm    195                 asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
196                              "vpandq %%zmm7, %    196                              "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
197                              "vpandq %%zmm7, %    197                              "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
198                              "vpshufb %%zmm3,     198                              "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
199                              "vpshufb %%zmm2,     199                              "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
200                              "vpxorq %%zmm4, %    200                              "vpxorq %%zmm4, %%zmm1, %%zmm1"
201                              :                    201                              :
202                              : );                 202                              : );
203                                                   203 
204                 /* 1 = pbmul[px] */               204                 /* 1 = pbmul[px] */
205                 asm volatile("vpxorq %%zmm5, %    205                 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
206                              /* 1 = db = DQ */    206                              /* 1 = db = DQ */
207                              "vmovdqa64 %%zmm1    207                              "vmovdqa64 %%zmm1, %0\n\t"
208                              :                    208                              :
209                              : "m" (dq[0]));      209                              : "m" (dq[0]));
210                                                   210 
211                 asm volatile("vpxorq %%zmm1, %    211                 asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
212                              "vmovdqa64 %%zmm0    212                              "vmovdqa64 %%zmm0, %0"
213                              :                    213                              :
214                              : "m" (dp[0]));      214                              : "m" (dp[0]));
215                                                   215 
216                 bytes -= 64;                      216                 bytes -= 64;
217                 p += 64;                          217                 p += 64;
218                 q += 64;                          218                 q += 64;
219                 dp += 64;                         219                 dp += 64;
220                 dq += 64;                         220                 dq += 64;
221 #endif                                            221 #endif
222         }                                         222         }
223                                                   223 
224         kernel_fpu_end();                         224         kernel_fpu_end();
225 }                                                 225 }
226                                                   226 
227 static void raid6_datap_recov_avx512(int disks    227 static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
228                                      void **pt    228                                      void **ptrs)
229 {                                                 229 {
230         u8 *p, *q, *dq;                           230         u8 *p, *q, *dq;
231         const u8 *qmul;         /* Q multiplie    231         const u8 *qmul;         /* Q multiplier table */
232         const u8 x0f = 0x0f;                      232         const u8 x0f = 0x0f;
233                                                   233 
234         p = (u8 *)ptrs[disks-2];                  234         p = (u8 *)ptrs[disks-2];
235         q = (u8 *)ptrs[disks-1];                  235         q = (u8 *)ptrs[disks-1];
236                                                   236 
237         /*                                        237         /*
238          * Compute syndrome with zero for the     238          * Compute syndrome with zero for the missing data page
239          * Use the dead data page as temporary    239          * Use the dead data page as temporary storage for delta q
240          */                                       240          */
241                                                   241 
242         dq = (u8 *)ptrs[faila];                   242         dq = (u8 *)ptrs[faila];
243         ptrs[faila] = (void *)raid6_empty_zero    243         ptrs[faila] = (void *)raid6_empty_zero_page;
244         ptrs[disks-1] = dq;                       244         ptrs[disks-1] = dq;
245                                                   245 
246         raid6_call.gen_syndrome(disks, bytes,     246         raid6_call.gen_syndrome(disks, bytes, ptrs);
247                                                   247 
248         /* Restore pointer table */               248         /* Restore pointer table */
249         ptrs[faila]   = dq;                       249         ptrs[faila]   = dq;
250         ptrs[disks-1] = q;                        250         ptrs[disks-1] = q;
251                                                   251 
252         /* Now, pick the proper data tables */    252         /* Now, pick the proper data tables */
253         qmul  = raid6_vgfmul[raid6_gfinv[raid6    253         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
254                                                   254 
255         kernel_fpu_begin();                       255         kernel_fpu_begin();
256                                                   256 
257         asm volatile("vpbroadcastb %0, %%zmm7"    257         asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
258                                                   258 
259         while (bytes) {                           259         while (bytes) {
260 #ifdef CONFIG_X86_64                              260 #ifdef CONFIG_X86_64
261                 asm volatile("vmovdqa64 %0, %%    261                 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
262                              "vmovdqa64 %1, %%    262                              "vmovdqa64 %1, %%zmm8\n\t"
263                              "vpxorq %2, %%zmm    263                              "vpxorq %2, %%zmm3, %%zmm3\n\t"
264                              "vpxorq %3, %%zmm    264                              "vpxorq %3, %%zmm8, %%zmm8"
265                              :                    265                              :
266                              : "m" (dq[0]), "m    266                              : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
267                                "m" (q[64]));      267                                "m" (q[64]));
268                                                   268 
269                 /*                                269                 /*
270                  * 3 = q[0] ^ dq[0]               270                  * 3 = q[0] ^ dq[0]
271                  * 8 = q[64] ^ dq[64]             271                  * 8 = q[64] ^ dq[64]
272                  */                               272                  */
273                 asm volatile("vbroadcasti64x2     273                 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
274                              "vmovapd %%zmm0,     274                              "vmovapd %%zmm0, %%zmm13\n\t"
275                              "vbroadcasti64x2     275                              "vbroadcasti64x2 %1, %%zmm1\n\t"
276                              "vmovapd %%zmm1,     276                              "vmovapd %%zmm1, %%zmm14"
277                              :                    277                              :
278                              : "m" (qmul[0]),     278                              : "m" (qmul[0]), "m" (qmul[16]));
279                                                   279 
280                 asm volatile("vpsraw $4, %%zmm    280                 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
281                              "vpsraw $4, %%zmm    281                              "vpsraw $4, %%zmm8, %%zmm12\n\t"
282                              "vpandq %%zmm7, %    282                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
283                              "vpandq %%zmm7, %    283                              "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
284                              "vpandq %%zmm7, %    284                              "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
285                              "vpandq %%zmm7, %    285                              "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
286                              "vpshufb %%zmm3,     286                              "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
287                              "vpshufb %%zmm8,     287                              "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
288                              "vpshufb %%zmm6,     288                              "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
289                              "vpshufb %%zmm12,    289                              "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
290                              "vpxorq %%zmm0, %    290                              "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
291                              "vpxorq %%zmm13,     291                              "vpxorq %%zmm13, %%zmm14, %%zmm14"
292                              :                    292                              :
293                              : );                 293                              : );
294                                                   294 
295                 /*                                295                 /*
296                  * 1  = qmul[q[0]  ^ dq[0]]       296                  * 1  = qmul[q[0]  ^ dq[0]]
297                  * 14 = qmul[q[64] ^ dq[64]]      297                  * 14 = qmul[q[64] ^ dq[64]]
298                  */                               298                  */
299                 asm volatile("vmovdqa64 %0, %%    299                 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
300                              "vmovdqa64 %1, %%    300                              "vmovdqa64 %1, %%zmm12\n\t"
301                              "vpxorq %%zmm1, %    301                              "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
302                              "vpxorq %%zmm14,     302                              "vpxorq %%zmm14, %%zmm12, %%zmm12"
303                              :                    303                              :
304                              : "m" (p[0]), "m"    304                              : "m" (p[0]), "m" (p[64]));
305                                                   305 
306                 /*                                306                 /*
307                  * 2  = p[0]  ^ qmul[q[0]  ^ d    307                  * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
308                  * 12 = p[64] ^ qmul[q[64] ^ d    308                  * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
309                  */                               309                  */
310                                                   310 
311                 asm volatile("vmovdqa64 %%zmm1    311                 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
312                              "vmovdqa64 %%zmm1    312                              "vmovdqa64 %%zmm14, %1\n\t"
313                              "vmovdqa64 %%zmm2    313                              "vmovdqa64 %%zmm2, %2\n\t"
314                              "vmovdqa64 %%zmm1    314                              "vmovdqa64 %%zmm12,%3"
315                              :                    315                              :
316                              : "m" (dq[0]), "m    316                              : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
317                                "m" (p[64]));      317                                "m" (p[64]));
318                                                   318 
319                 bytes -= 128;                     319                 bytes -= 128;
320                 p += 128;                         320                 p += 128;
321                 q += 128;                         321                 q += 128;
322                 dq += 128;                        322                 dq += 128;
323 #else                                             323 #else
324                 asm volatile("vmovdqa64 %0, %%    324                 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
325                              "vpxorq %1, %%zmm    325                              "vpxorq %1, %%zmm3, %%zmm3"
326                              :                    326                              :
327                              : "m" (dq[0]), "m    327                              : "m" (dq[0]), "m" (q[0]));
328                                                   328 
329                 /* 3 = q ^ dq */                  329                 /* 3 = q ^ dq */
330                                                   330 
331                 asm volatile("vbroadcasti64x2     331                 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
332                              "vbroadcasti64x2     332                              "vbroadcasti64x2 %1, %%zmm1"
333                              :                    333                              :
334                              : "m" (qmul[0]),     334                              : "m" (qmul[0]), "m" (qmul[16]));
335                                                   335 
336                 asm volatile("vpsraw $4, %%zmm    336                 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
337                              "vpandq %%zmm7, %    337                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
338                              "vpandq %%zmm7, %    338                              "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
339                              "vpshufb %%zmm3,     339                              "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
340                              "vpshufb %%zmm6,     340                              "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
341                              "vpxorq %%zmm0, %    341                              "vpxorq %%zmm0, %%zmm1, %%zmm1"
342                              :                    342                              :
343                              : );                 343                              : );
344                                                   344 
345                 /* 1 = qmul[q ^ dq] */            345                 /* 1 = qmul[q ^ dq] */
346                                                   346 
347                 asm volatile("vmovdqa64 %0, %%    347                 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
348                              "vpxorq %%zmm1, %    348                              "vpxorq %%zmm1, %%zmm2, %%zmm2"
349                              :                    349                              :
350                              : "m" (p[0]));       350                              : "m" (p[0]));
351                                                   351 
352                 /* 2 = p ^ qmul[q ^ dq] */        352                 /* 2 = p ^ qmul[q ^ dq] */
353                                                   353 
354                 asm volatile("vmovdqa64 %%zmm1    354                 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
355                              "vmovdqa64 %%zmm2    355                              "vmovdqa64 %%zmm2, %1"
356                              :                    356                              :
357                              : "m" (dq[0]), "m    357                              : "m" (dq[0]), "m" (p[0]));
358                                                   358 
359                 bytes -= 64;                      359                 bytes -= 64;
360                 p += 64;                          360                 p += 64;
361                 q += 64;                          361                 q += 64;
362                 dq += 64;                         362                 dq += 64;
363 #endif                                            363 #endif
364         }                                         364         }
365                                                   365 
366         kernel_fpu_end();                         366         kernel_fpu_end();
367 }                                                 367 }
368                                                   368 
369 const struct raid6_recov_calls raid6_recov_avx    369 const struct raid6_recov_calls raid6_recov_avx512 = {
370         .data2 = raid6_2data_recov_avx512,        370         .data2 = raid6_2data_recov_avx512,
371         .datap = raid6_datap_recov_avx512,        371         .datap = raid6_datap_recov_avx512,
372         .valid = raid6_has_avx512,                372         .valid = raid6_has_avx512,
373 #ifdef CONFIG_X86_64                              373 #ifdef CONFIG_X86_64
374         .name = "avx512x2",                       374         .name = "avx512x2",
375 #else                                             375 #else
376         .name = "avx512x1",                       376         .name = "avx512x1",
377 #endif                                            377 #endif
378         .priority = 3,                            378         .priority = 3,
379 };                                                379 };
380                                                   380 
381 #else                                             381 #else
382 #warning "your version of binutils lacks AVX51    382 #warning "your version of binutils lacks AVX512 support"
383 #endif                                            383 #endif
384                                                   384 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php