~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/lib/raid6/recov_avx512.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * Copyright (C) 2016 Intel Corporation
  4  *
  5  * Author: Gayatri Kammela <gayatri.kammela@intel.com>
  6  * Author: Megha Dey <megha.dey@linux.intel.com>
  7  */
  8 
  9 #ifdef CONFIG_AS_AVX512
 10 
 11 #include <linux/raid/pq.h>
 12 #include "x86.h"
 13 
 14 static int raid6_has_avx512(void)
 15 {
 16         return boot_cpu_has(X86_FEATURE_AVX2) &&
 17                 boot_cpu_has(X86_FEATURE_AVX) &&
 18                 boot_cpu_has(X86_FEATURE_AVX512F) &&
 19                 boot_cpu_has(X86_FEATURE_AVX512BW) &&
 20                 boot_cpu_has(X86_FEATURE_AVX512VL) &&
 21                 boot_cpu_has(X86_FEATURE_AVX512DQ);
 22 }
 23 
 24 static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
 25                                      int failb, void **ptrs)
 26 {
 27         u8 *p, *q, *dp, *dq;
 28         const u8 *pbmul;        /* P multiplier table for B data */
 29         const u8 *qmul;         /* Q multiplier table (for both) */
 30         const u8 x0f = 0x0f;
 31 
 32         p = (u8 *)ptrs[disks-2];
 33         q = (u8 *)ptrs[disks-1];
 34 
 35         /*
 36          * Compute syndrome with zero for the missing data pages
 37          * Use the dead data pages as temporary storage for
 38          * delta p and delta q
 39          */
 40 
 41         dp = (u8 *)ptrs[faila];
 42         ptrs[faila] = (void *)raid6_empty_zero_page;
 43         ptrs[disks-2] = dp;
 44         dq = (u8 *)ptrs[failb];
 45         ptrs[failb] = (void *)raid6_empty_zero_page;
 46         ptrs[disks-1] = dq;
 47 
 48         raid6_call.gen_syndrome(disks, bytes, ptrs);
 49 
 50         /* Restore pointer table */
 51         ptrs[faila]   = dp;
 52         ptrs[failb]   = dq;
 53         ptrs[disks-2] = p;
 54         ptrs[disks-1] = q;
 55 
 56         /* Now, pick the proper data tables */
 57         pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
 58         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
 59                 raid6_gfexp[failb]]];
 60 
 61         kernel_fpu_begin();
 62 
 63         /* zmm0 = x0f[16] */
 64         asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
 65 
 66         while (bytes) {
 67 #ifdef CONFIG_X86_64
 68                 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
 69                              "vmovdqa64 %1, %%zmm9\n\t"
 70                              "vmovdqa64 %2, %%zmm0\n\t"
 71                              "vmovdqa64 %3, %%zmm8\n\t"
 72                              "vpxorq %4, %%zmm1, %%zmm1\n\t"
 73                              "vpxorq %5, %%zmm9, %%zmm9\n\t"
 74                              "vpxorq %6, %%zmm0, %%zmm0\n\t"
 75                              "vpxorq %7, %%zmm8, %%zmm8"
 76                              :
 77                              : "m" (q[0]), "m" (q[64]), "m" (p[0]),
 78                                "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
 79                                "m" (dp[0]), "m" (dp[64]));
 80 
 81                 /*
 82                  * 1 = dq[0]  ^ q[0]
 83                  * 9 = dq[64] ^ q[64]
 84                  * 0 = dp[0]  ^ p[0]
 85                  * 8 = dp[64] ^ p[64]
 86                  */
 87 
 88                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
 89                              "vbroadcasti64x2 %1, %%zmm5"
 90                              :
 91                              : "m" (qmul[0]), "m" (qmul[16]));
 92 
 93                 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
 94                              "vpsraw $4, %%zmm9, %%zmm12\n\t"
 95                              "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
 96                              "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
 97                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
 98                              "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
 99                              "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
100                              "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
101                              "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
102                              "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
103                              "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
104                              "vpxorq %%zmm4, %%zmm5, %%zmm5"
105                              :
106                              : );
107 
108                 /*
109                  * 5 = qx[0]
110                  * 15 = qx[64]
111                  */
112 
113                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
114                              "vbroadcasti64x2 %1, %%zmm1\n\t"
115                              "vpsraw $4, %%zmm0, %%zmm2\n\t"
116                              "vpsraw $4, %%zmm8, %%zmm6\n\t"
117                              "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
118                              "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
119                              "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
120                              "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
121                              "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
122                              "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
123                              "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
124                              "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
125                              "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
126                              "vpxorq %%zmm12, %%zmm13, %%zmm13"
127                              :
128                              : "m" (pbmul[0]), "m" (pbmul[16]));
129 
130                 /*
131                  * 1  = pbmul[px[0]]
132                  * 13 = pbmul[px[64]]
133                  */
134                 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
135                              "vpxorq %%zmm15, %%zmm13, %%zmm13"
136                              :
137                              : );
138 
139                 /*
140                  * 1 = db = DQ
141                  * 13 = db[64] = DQ[64]
142                  */
143                 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
144                              "vmovdqa64 %%zmm13,%1\n\t"
145                              "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
146                              "vpxorq %%zmm13, %%zmm8, %%zmm8"
147                              :
148                              : "m" (dq[0]), "m" (dq[64]));
149 
150                 asm volatile("vmovdqa64 %%zmm0, %0\n\t"
151                              "vmovdqa64 %%zmm8, %1"
152                              :
153                              : "m" (dp[0]), "m" (dp[64]));
154 
155                 bytes -= 128;
156                 p += 128;
157                 q += 128;
158                 dp += 128;
159                 dq += 128;
160 #else
161                 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
162                              "vmovdqa64 %1, %%zmm0\n\t"
163                              "vpxorq %2, %%zmm1, %%zmm1\n\t"
164                              "vpxorq %3, %%zmm0, %%zmm0"
165                              :
166                              : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
167 
168                 /* 1 = dq ^ q;  0 = dp ^ p */
169 
170                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
171                              "vbroadcasti64x2 %1, %%zmm5"
172                              :
173                              : "m" (qmul[0]), "m" (qmul[16]));
174 
175                 /*
176                  * 1 = dq ^ q
177                  * 3 = dq ^ p >> 4
178                  */
179                 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
180                              "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
181                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
182                              "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
183                              "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
184                              "vpxorq %%zmm4, %%zmm5, %%zmm5"
185                              :
186                              : );
187 
188                 /* 5 = qx */
189 
190                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
191                              "vbroadcasti64x2 %1, %%zmm1"
192                              :
193                              : "m" (pbmul[0]), "m" (pbmul[16]));
194 
195                 asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
196                              "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
197                              "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
198                              "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
199                              "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
200                              "vpxorq %%zmm4, %%zmm1, %%zmm1"
201                              :
202                              : );
203 
204                 /* 1 = pbmul[px] */
205                 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
206                              /* 1 = db = DQ */
207                              "vmovdqa64 %%zmm1, %0\n\t"
208                              :
209                              : "m" (dq[0]));
210 
211                 asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
212                              "vmovdqa64 %%zmm0, %0"
213                              :
214                              : "m" (dp[0]));
215 
216                 bytes -= 64;
217                 p += 64;
218                 q += 64;
219                 dp += 64;
220                 dq += 64;
221 #endif
222         }
223 
224         kernel_fpu_end();
225 }
226 
227 static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
228                                      void **ptrs)
229 {
230         u8 *p, *q, *dq;
231         const u8 *qmul;         /* Q multiplier table */
232         const u8 x0f = 0x0f;
233 
234         p = (u8 *)ptrs[disks-2];
235         q = (u8 *)ptrs[disks-1];
236 
237         /*
238          * Compute syndrome with zero for the missing data page
239          * Use the dead data page as temporary storage for delta q
240          */
241 
242         dq = (u8 *)ptrs[faila];
243         ptrs[faila] = (void *)raid6_empty_zero_page;
244         ptrs[disks-1] = dq;
245 
246         raid6_call.gen_syndrome(disks, bytes, ptrs);
247 
248         /* Restore pointer table */
249         ptrs[faila]   = dq;
250         ptrs[disks-1] = q;
251 
252         /* Now, pick the proper data tables */
253         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
254 
255         kernel_fpu_begin();
256 
257         asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
258 
259         while (bytes) {
260 #ifdef CONFIG_X86_64
261                 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
262                              "vmovdqa64 %1, %%zmm8\n\t"
263                              "vpxorq %2, %%zmm3, %%zmm3\n\t"
264                              "vpxorq %3, %%zmm8, %%zmm8"
265                              :
266                              : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
267                                "m" (q[64]));
268 
269                 /*
270                  * 3 = q[0] ^ dq[0]
271                  * 8 = q[64] ^ dq[64]
272                  */
273                 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
274                              "vmovapd %%zmm0, %%zmm13\n\t"
275                              "vbroadcasti64x2 %1, %%zmm1\n\t"
276                              "vmovapd %%zmm1, %%zmm14"
277                              :
278                              : "m" (qmul[0]), "m" (qmul[16]));
279 
280                 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
281                              "vpsraw $4, %%zmm8, %%zmm12\n\t"
282                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
283                              "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
284                              "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
285                              "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
286                              "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
287                              "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
288                              "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
289                              "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
290                              "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
291                              "vpxorq %%zmm13, %%zmm14, %%zmm14"
292                              :
293                              : );
294 
295                 /*
296                  * 1  = qmul[q[0]  ^ dq[0]]
297                  * 14 = qmul[q[64] ^ dq[64]]
298                  */
299                 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
300                              "vmovdqa64 %1, %%zmm12\n\t"
301                              "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
302                              "vpxorq %%zmm14, %%zmm12, %%zmm12"
303                              :
304                              : "m" (p[0]), "m" (p[64]));
305 
306                 /*
307                  * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
308                  * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
309                  */
310 
311                 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
312                              "vmovdqa64 %%zmm14, %1\n\t"
313                              "vmovdqa64 %%zmm2, %2\n\t"
314                              "vmovdqa64 %%zmm12,%3"
315                              :
316                              : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
317                                "m" (p[64]));
318 
319                 bytes -= 128;
320                 p += 128;
321                 q += 128;
322                 dq += 128;
323 #else
324                 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
325                              "vpxorq %1, %%zmm3, %%zmm3"
326                              :
327                              : "m" (dq[0]), "m" (q[0]));
328 
329                 /* 3 = q ^ dq */
330 
331                 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
332                              "vbroadcasti64x2 %1, %%zmm1"
333                              :
334                              : "m" (qmul[0]), "m" (qmul[16]));
335 
336                 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
337                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
338                              "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
339                              "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
340                              "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
341                              "vpxorq %%zmm0, %%zmm1, %%zmm1"
342                              :
343                              : );
344 
345                 /* 1 = qmul[q ^ dq] */
346 
347                 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
348                              "vpxorq %%zmm1, %%zmm2, %%zmm2"
349                              :
350                              : "m" (p[0]));
351 
352                 /* 2 = p ^ qmul[q ^ dq] */
353 
354                 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
355                              "vmovdqa64 %%zmm2, %1"
356                              :
357                              : "m" (dq[0]), "m" (p[0]));
358 
359                 bytes -= 64;
360                 p += 64;
361                 q += 64;
362                 dq += 64;
363 #endif
364         }
365 
366         kernel_fpu_end();
367 }
368 
369 const struct raid6_recov_calls raid6_recov_avx512 = {
370         .data2 = raid6_2data_recov_avx512,
371         .datap = raid6_datap_recov_avx512,
372         .valid = raid6_has_avx512,
373 #ifdef CONFIG_X86_64
374         .name = "avx512x2",
375 #else
376         .name = "avx512x1",
377 #endif
378         .priority = 3,
379 };
380 
381 #else
382 #warning "your version of binutils lacks AVX512 support"
383 #endif
384 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php