~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/lib/raid6/recov_avx512.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /lib/raid6/recov_avx512.c (Version linux-6.11.5) and /lib/raid6/recov_avx512.c (Version linux-4.16.18)


  1 // SPDX-License-Identifier: GPL-2.0-only       << 
  2 /*                                                  1 /*
  3  * Copyright (C) 2016 Intel Corporation             2  * Copyright (C) 2016 Intel Corporation
  4  *                                                  3  *
  5  * Author: Gayatri Kammela <gayatri.kammela@in      4  * Author: Gayatri Kammela <gayatri.kammela@intel.com>
  6  * Author: Megha Dey <megha.dey@linux.intel.co      5  * Author: Megha Dey <megha.dey@linux.intel.com>
                                                   >>   6  *
                                                   >>   7  * This program is free software; you can redistribute it and/or
                                                   >>   8  * modify it under the terms of the GNU General Public License
                                                   >>   9  * as published by the Free Software Foundation; version 2
                                                   >>  10  * of the License.
                                                   >>  11  *
  7  */                                                12  */
  8                                                    13 
  9 #ifdef CONFIG_AS_AVX512                            14 #ifdef CONFIG_AS_AVX512
 10                                                    15 
 11 #include <linux/raid/pq.h>                         16 #include <linux/raid/pq.h>
 12 #include "x86.h"                                   17 #include "x86.h"
 13                                                    18 
 14 static int raid6_has_avx512(void)                  19 static int raid6_has_avx512(void)
 15 {                                                  20 {
 16         return boot_cpu_has(X86_FEATURE_AVX2)      21         return boot_cpu_has(X86_FEATURE_AVX2) &&
 17                 boot_cpu_has(X86_FEATURE_AVX)      22                 boot_cpu_has(X86_FEATURE_AVX) &&
 18                 boot_cpu_has(X86_FEATURE_AVX51     23                 boot_cpu_has(X86_FEATURE_AVX512F) &&
 19                 boot_cpu_has(X86_FEATURE_AVX51     24                 boot_cpu_has(X86_FEATURE_AVX512BW) &&
 20                 boot_cpu_has(X86_FEATURE_AVX51     25                 boot_cpu_has(X86_FEATURE_AVX512VL) &&
 21                 boot_cpu_has(X86_FEATURE_AVX51     26                 boot_cpu_has(X86_FEATURE_AVX512DQ);
 22 }                                                  27 }
 23                                                    28 
 24 static void raid6_2data_recov_avx512(int disks     29 static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
 25                                      int failb     30                                      int failb, void **ptrs)
 26 {                                                  31 {
 27         u8 *p, *q, *dp, *dq;                       32         u8 *p, *q, *dp, *dq;
 28         const u8 *pbmul;        /* P multiplie     33         const u8 *pbmul;        /* P multiplier table for B data */
 29         const u8 *qmul;         /* Q multiplie     34         const u8 *qmul;         /* Q multiplier table (for both) */
 30         const u8 x0f = 0x0f;                       35         const u8 x0f = 0x0f;
 31                                                    36 
 32         p = (u8 *)ptrs[disks-2];                   37         p = (u8 *)ptrs[disks-2];
 33         q = (u8 *)ptrs[disks-1];                   38         q = (u8 *)ptrs[disks-1];
 34                                                    39 
 35         /*                                         40         /*
 36          * Compute syndrome with zero for the      41          * Compute syndrome with zero for the missing data pages
 37          * Use the dead data pages as temporar     42          * Use the dead data pages as temporary storage for
 38          * delta p and delta q                     43          * delta p and delta q
 39          */                                        44          */
 40                                                    45 
 41         dp = (u8 *)ptrs[faila];                    46         dp = (u8 *)ptrs[faila];
 42         ptrs[faila] = (void *)raid6_empty_zero     47         ptrs[faila] = (void *)raid6_empty_zero_page;
 43         ptrs[disks-2] = dp;                        48         ptrs[disks-2] = dp;
 44         dq = (u8 *)ptrs[failb];                    49         dq = (u8 *)ptrs[failb];
 45         ptrs[failb] = (void *)raid6_empty_zero     50         ptrs[failb] = (void *)raid6_empty_zero_page;
 46         ptrs[disks-1] = dq;                        51         ptrs[disks-1] = dq;
 47                                                    52 
 48         raid6_call.gen_syndrome(disks, bytes,      53         raid6_call.gen_syndrome(disks, bytes, ptrs);
 49                                                    54 
 50         /* Restore pointer table */                55         /* Restore pointer table */
 51         ptrs[faila]   = dp;                        56         ptrs[faila]   = dp;
 52         ptrs[failb]   = dq;                        57         ptrs[failb]   = dq;
 53         ptrs[disks-2] = p;                         58         ptrs[disks-2] = p;
 54         ptrs[disks-1] = q;                         59         ptrs[disks-1] = q;
 55                                                    60 
 56         /* Now, pick the proper data tables */     61         /* Now, pick the proper data tables */
 57         pbmul = raid6_vgfmul[raid6_gfexi[failb     62         pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
 58         qmul  = raid6_vgfmul[raid6_gfinv[raid6     63         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
 59                 raid6_gfexp[failb]]];              64                 raid6_gfexp[failb]]];
 60                                                    65 
 61         kernel_fpu_begin();                        66         kernel_fpu_begin();
 62                                                    67 
 63         /* zmm0 = x0f[16] */                       68         /* zmm0 = x0f[16] */
 64         asm volatile("vpbroadcastb %0, %%zmm7"     69         asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
 65                                                    70 
 66         while (bytes) {                            71         while (bytes) {
 67 #ifdef CONFIG_X86_64                               72 #ifdef CONFIG_X86_64
 68                 asm volatile("vmovdqa64 %0, %%     73                 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
 69                              "vmovdqa64 %1, %%     74                              "vmovdqa64 %1, %%zmm9\n\t"
 70                              "vmovdqa64 %2, %%     75                              "vmovdqa64 %2, %%zmm0\n\t"
 71                              "vmovdqa64 %3, %%     76                              "vmovdqa64 %3, %%zmm8\n\t"
 72                              "vpxorq %4, %%zmm     77                              "vpxorq %4, %%zmm1, %%zmm1\n\t"
 73                              "vpxorq %5, %%zmm     78                              "vpxorq %5, %%zmm9, %%zmm9\n\t"
 74                              "vpxorq %6, %%zmm     79                              "vpxorq %6, %%zmm0, %%zmm0\n\t"
 75                              "vpxorq %7, %%zmm     80                              "vpxorq %7, %%zmm8, %%zmm8"
 76                              :                     81                              :
 77                              : "m" (q[0]), "m"     82                              : "m" (q[0]), "m" (q[64]), "m" (p[0]),
 78                                "m" (p[64]), "m     83                                "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
 79                                "m" (dp[0]), "m     84                                "m" (dp[0]), "m" (dp[64]));
 80                                                    85 
 81                 /*                                 86                 /*
 82                  * 1 = dq[0]  ^ q[0]               87                  * 1 = dq[0]  ^ q[0]
 83                  * 9 = dq[64] ^ q[64]              88                  * 9 = dq[64] ^ q[64]
 84                  * 0 = dp[0]  ^ p[0]               89                  * 0 = dp[0]  ^ p[0]
 85                  * 8 = dp[64] ^ p[64]              90                  * 8 = dp[64] ^ p[64]
 86                  */                                91                  */
 87                                                    92 
 88                 asm volatile("vbroadcasti64x2      93                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
 89                              "vbroadcasti64x2      94                              "vbroadcasti64x2 %1, %%zmm5"
 90                              :                     95                              :
 91                              : "m" (qmul[0]),      96                              : "m" (qmul[0]), "m" (qmul[16]));
 92                                                    97 
 93                 asm volatile("vpsraw $4, %%zmm     98                 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
 94                              "vpsraw $4, %%zmm     99                              "vpsraw $4, %%zmm9, %%zmm12\n\t"
 95                              "vpandq %%zmm7, %    100                              "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
 96                              "vpandq %%zmm7, %    101                              "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
 97                              "vpandq %%zmm7, %    102                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
 98                              "vpandq %%zmm7, %    103                              "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
 99                              "vpshufb %%zmm9,     104                              "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
100                              "vpshufb %%zmm1,     105                              "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
101                              "vpshufb %%zmm12,    106                              "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
102                              "vpshufb %%zmm3,     107                              "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
103                              "vpxorq %%zmm14,     108                              "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
104                              "vpxorq %%zmm4, %    109                              "vpxorq %%zmm4, %%zmm5, %%zmm5"
105                              :                    110                              :
106                              : );                 111                              : );
107                                                   112 
108                 /*                                113                 /*
109                  * 5 = qx[0]                      114                  * 5 = qx[0]
110                  * 15 = qx[64]                    115                  * 15 = qx[64]
111                  */                               116                  */
112                                                   117 
113                 asm volatile("vbroadcasti64x2     118                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
114                              "vbroadcasti64x2     119                              "vbroadcasti64x2 %1, %%zmm1\n\t"
115                              "vpsraw $4, %%zmm    120                              "vpsraw $4, %%zmm0, %%zmm2\n\t"
116                              "vpsraw $4, %%zmm    121                              "vpsraw $4, %%zmm8, %%zmm6\n\t"
117                              "vpandq %%zmm7, %    122                              "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
118                              "vpandq %%zmm7, %    123                              "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
119                              "vpandq %%zmm7, %    124                              "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
120                              "vpandq %%zmm7, %    125                              "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
121                              "vpshufb %%zmm14,    126                              "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
122                              "vpshufb %%zmm3,     127                              "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
123                              "vpshufb %%zmm6,     128                              "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
124                              "vpshufb %%zmm2,     129                              "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
125                              "vpxorq %%zmm4, %    130                              "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
126                              "vpxorq %%zmm12,     131                              "vpxorq %%zmm12, %%zmm13, %%zmm13"
127                              :                    132                              :
128                              : "m" (pbmul[0]),    133                              : "m" (pbmul[0]), "m" (pbmul[16]));
129                                                   134 
130                 /*                                135                 /*
131                  * 1  = pbmul[px[0]]              136                  * 1  = pbmul[px[0]]
132                  * 13 = pbmul[px[64]]             137                  * 13 = pbmul[px[64]]
133                  */                               138                  */
134                 asm volatile("vpxorq %%zmm5, %    139                 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
135                              "vpxorq %%zmm15,     140                              "vpxorq %%zmm15, %%zmm13, %%zmm13"
136                              :                    141                              :
137                              : );                 142                              : );
138                                                   143 
139                 /*                                144                 /*
140                  * 1 = db = DQ                    145                  * 1 = db = DQ
141                  * 13 = db[64] = DQ[64]           146                  * 13 = db[64] = DQ[64]
142                  */                               147                  */
143                 asm volatile("vmovdqa64 %%zmm1    148                 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
144                              "vmovdqa64 %%zmm1    149                              "vmovdqa64 %%zmm13,%1\n\t"
145                              "vpxorq %%zmm1, %    150                              "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
146                              "vpxorq %%zmm13,     151                              "vpxorq %%zmm13, %%zmm8, %%zmm8"
147                              :                    152                              :
148                              : "m" (dq[0]), "m    153                              : "m" (dq[0]), "m" (dq[64]));
149                                                   154 
150                 asm volatile("vmovdqa64 %%zmm0    155                 asm volatile("vmovdqa64 %%zmm0, %0\n\t"
151                              "vmovdqa64 %%zmm8    156                              "vmovdqa64 %%zmm8, %1"
152                              :                    157                              :
153                              : "m" (dp[0]), "m    158                              : "m" (dp[0]), "m" (dp[64]));
154                                                   159 
155                 bytes -= 128;                     160                 bytes -= 128;
156                 p += 128;                         161                 p += 128;
157                 q += 128;                         162                 q += 128;
158                 dp += 128;                        163                 dp += 128;
159                 dq += 128;                        164                 dq += 128;
160 #else                                             165 #else
161                 asm volatile("vmovdqa64 %0, %%    166                 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
162                              "vmovdqa64 %1, %%    167                              "vmovdqa64 %1, %%zmm0\n\t"
163                              "vpxorq %2, %%zmm    168                              "vpxorq %2, %%zmm1, %%zmm1\n\t"
164                              "vpxorq %3, %%zmm    169                              "vpxorq %3, %%zmm0, %%zmm0"
165                              :                    170                              :
166                              : "m" (*q), "m" (    171                              : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
167                                                   172 
168                 /* 1 = dq ^ q;  0 = dp ^ p */     173                 /* 1 = dq ^ q;  0 = dp ^ p */
169                                                   174 
170                 asm volatile("vbroadcasti64x2     175                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
171                              "vbroadcasti64x2     176                              "vbroadcasti64x2 %1, %%zmm5"
172                              :                    177                              :
173                              : "m" (qmul[0]),     178                              : "m" (qmul[0]), "m" (qmul[16]));
174                                                   179 
175                 /*                                180                 /*
176                  * 1 = dq ^ q                     181                  * 1 = dq ^ q
177                  * 3 = dq ^ p >> 4                182                  * 3 = dq ^ p >> 4
178                  */                               183                  */
179                 asm volatile("vpsraw $4, %%zmm    184                 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
180                              "vpandq %%zmm7, %    185                              "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
181                              "vpandq %%zmm7, %    186                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
182                              "vpshufb %%zmm1,     187                              "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
183                              "vpshufb %%zmm3,     188                              "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
184                              "vpxorq %%zmm4, %    189                              "vpxorq %%zmm4, %%zmm5, %%zmm5"
185                              :                    190                              :
186                              : );                 191                              : );
187                                                   192 
188                 /* 5 = qx */                      193                 /* 5 = qx */
189                                                   194 
190                 asm volatile("vbroadcasti64x2     195                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
191                              "vbroadcasti64x2     196                              "vbroadcasti64x2 %1, %%zmm1"
192                              :                    197                              :
193                              : "m" (pbmul[0]),    198                              : "m" (pbmul[0]), "m" (pbmul[16]));
194                                                   199 
195                 asm volatile("vpsraw $4, %%zmm    200                 asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
196                              "vpandq %%zmm7, %    201                              "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
197                              "vpandq %%zmm7, %    202                              "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
198                              "vpshufb %%zmm3,     203                              "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
199                              "vpshufb %%zmm2,     204                              "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
200                              "vpxorq %%zmm4, %    205                              "vpxorq %%zmm4, %%zmm1, %%zmm1"
201                              :                    206                              :
202                              : );                 207                              : );
203                                                   208 
204                 /* 1 = pbmul[px] */               209                 /* 1 = pbmul[px] */
205                 asm volatile("vpxorq %%zmm5, %    210                 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
206                              /* 1 = db = DQ */    211                              /* 1 = db = DQ */
207                              "vmovdqa64 %%zmm1    212                              "vmovdqa64 %%zmm1, %0\n\t"
208                              :                    213                              :
209                              : "m" (dq[0]));      214                              : "m" (dq[0]));
210                                                   215 
211                 asm volatile("vpxorq %%zmm1, %    216                 asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
212                              "vmovdqa64 %%zmm0    217                              "vmovdqa64 %%zmm0, %0"
213                              :                    218                              :
214                              : "m" (dp[0]));      219                              : "m" (dp[0]));
215                                                   220 
216                 bytes -= 64;                      221                 bytes -= 64;
217                 p += 64;                          222                 p += 64;
218                 q += 64;                          223                 q += 64;
219                 dp += 64;                         224                 dp += 64;
220                 dq += 64;                         225                 dq += 64;
221 #endif                                            226 #endif
222         }                                         227         }
223                                                   228 
224         kernel_fpu_end();                         229         kernel_fpu_end();
225 }                                                 230 }
226                                                   231 
227 static void raid6_datap_recov_avx512(int disks    232 static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
228                                      void **pt    233                                      void **ptrs)
229 {                                                 234 {
230         u8 *p, *q, *dq;                           235         u8 *p, *q, *dq;
231         const u8 *qmul;         /* Q multiplie    236         const u8 *qmul;         /* Q multiplier table */
232         const u8 x0f = 0x0f;                      237         const u8 x0f = 0x0f;
233                                                   238 
234         p = (u8 *)ptrs[disks-2];                  239         p = (u8 *)ptrs[disks-2];
235         q = (u8 *)ptrs[disks-1];                  240         q = (u8 *)ptrs[disks-1];
236                                                   241 
237         /*                                        242         /*
238          * Compute syndrome with zero for the     243          * Compute syndrome with zero for the missing data page
239          * Use the dead data page as temporary    244          * Use the dead data page as temporary storage for delta q
240          */                                       245          */
241                                                   246 
242         dq = (u8 *)ptrs[faila];                   247         dq = (u8 *)ptrs[faila];
243         ptrs[faila] = (void *)raid6_empty_zero    248         ptrs[faila] = (void *)raid6_empty_zero_page;
244         ptrs[disks-1] = dq;                       249         ptrs[disks-1] = dq;
245                                                   250 
246         raid6_call.gen_syndrome(disks, bytes,     251         raid6_call.gen_syndrome(disks, bytes, ptrs);
247                                                   252 
248         /* Restore pointer table */               253         /* Restore pointer table */
249         ptrs[faila]   = dq;                       254         ptrs[faila]   = dq;
250         ptrs[disks-1] = q;                        255         ptrs[disks-1] = q;
251                                                   256 
252         /* Now, pick the proper data tables */    257         /* Now, pick the proper data tables */
253         qmul  = raid6_vgfmul[raid6_gfinv[raid6    258         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
254                                                   259 
255         kernel_fpu_begin();                       260         kernel_fpu_begin();
256                                                   261 
257         asm volatile("vpbroadcastb %0, %%zmm7"    262         asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
258                                                   263 
259         while (bytes) {                           264         while (bytes) {
260 #ifdef CONFIG_X86_64                              265 #ifdef CONFIG_X86_64
261                 asm volatile("vmovdqa64 %0, %%    266                 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
262                              "vmovdqa64 %1, %%    267                              "vmovdqa64 %1, %%zmm8\n\t"
263                              "vpxorq %2, %%zmm    268                              "vpxorq %2, %%zmm3, %%zmm3\n\t"
264                              "vpxorq %3, %%zmm    269                              "vpxorq %3, %%zmm8, %%zmm8"
265                              :                    270                              :
266                              : "m" (dq[0]), "m    271                              : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
267                                "m" (q[64]));      272                                "m" (q[64]));
268                                                   273 
269                 /*                                274                 /*
270                  * 3 = q[0] ^ dq[0]               275                  * 3 = q[0] ^ dq[0]
271                  * 8 = q[64] ^ dq[64]             276                  * 8 = q[64] ^ dq[64]
272                  */                               277                  */
273                 asm volatile("vbroadcasti64x2     278                 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
274                              "vmovapd %%zmm0,     279                              "vmovapd %%zmm0, %%zmm13\n\t"
275                              "vbroadcasti64x2     280                              "vbroadcasti64x2 %1, %%zmm1\n\t"
276                              "vmovapd %%zmm1,     281                              "vmovapd %%zmm1, %%zmm14"
277                              :                    282                              :
278                              : "m" (qmul[0]),     283                              : "m" (qmul[0]), "m" (qmul[16]));
279                                                   284 
280                 asm volatile("vpsraw $4, %%zmm    285                 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
281                              "vpsraw $4, %%zmm    286                              "vpsraw $4, %%zmm8, %%zmm12\n\t"
282                              "vpandq %%zmm7, %    287                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
283                              "vpandq %%zmm7, %    288                              "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
284                              "vpandq %%zmm7, %    289                              "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
285                              "vpandq %%zmm7, %    290                              "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
286                              "vpshufb %%zmm3,     291                              "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
287                              "vpshufb %%zmm8,     292                              "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
288                              "vpshufb %%zmm6,     293                              "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
289                              "vpshufb %%zmm12,    294                              "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
290                              "vpxorq %%zmm0, %    295                              "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
291                              "vpxorq %%zmm13,     296                              "vpxorq %%zmm13, %%zmm14, %%zmm14"
292                              :                    297                              :
293                              : );                 298                              : );
294                                                   299 
295                 /*                                300                 /*
296                  * 1  = qmul[q[0]  ^ dq[0]]       301                  * 1  = qmul[q[0]  ^ dq[0]]
297                  * 14 = qmul[q[64] ^ dq[64]]      302                  * 14 = qmul[q[64] ^ dq[64]]
298                  */                               303                  */
299                 asm volatile("vmovdqa64 %0, %%    304                 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
300                              "vmovdqa64 %1, %%    305                              "vmovdqa64 %1, %%zmm12\n\t"
301                              "vpxorq %%zmm1, %    306                              "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
302                              "vpxorq %%zmm14,     307                              "vpxorq %%zmm14, %%zmm12, %%zmm12"
303                              :                    308                              :
304                              : "m" (p[0]), "m"    309                              : "m" (p[0]), "m" (p[64]));
305                                                   310 
306                 /*                                311                 /*
307                  * 2  = p[0]  ^ qmul[q[0]  ^ d    312                  * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
308                  * 12 = p[64] ^ qmul[q[64] ^ d    313                  * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
309                  */                               314                  */
310                                                   315 
311                 asm volatile("vmovdqa64 %%zmm1    316                 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
312                              "vmovdqa64 %%zmm1    317                              "vmovdqa64 %%zmm14, %1\n\t"
313                              "vmovdqa64 %%zmm2    318                              "vmovdqa64 %%zmm2, %2\n\t"
314                              "vmovdqa64 %%zmm1    319                              "vmovdqa64 %%zmm12,%3"
315                              :                    320                              :
316                              : "m" (dq[0]), "m    321                              : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
317                                "m" (p[64]));      322                                "m" (p[64]));
318                                                   323 
319                 bytes -= 128;                     324                 bytes -= 128;
320                 p += 128;                         325                 p += 128;
321                 q += 128;                         326                 q += 128;
322                 dq += 128;                        327                 dq += 128;
323 #else                                             328 #else
324                 asm volatile("vmovdqa64 %0, %%    329                 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
325                              "vpxorq %1, %%zmm    330                              "vpxorq %1, %%zmm3, %%zmm3"
326                              :                    331                              :
327                              : "m" (dq[0]), "m    332                              : "m" (dq[0]), "m" (q[0]));
328                                                   333 
329                 /* 3 = q ^ dq */                  334                 /* 3 = q ^ dq */
330                                                   335 
331                 asm volatile("vbroadcasti64x2     336                 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
332                              "vbroadcasti64x2     337                              "vbroadcasti64x2 %1, %%zmm1"
333                              :                    338                              :
334                              : "m" (qmul[0]),     339                              : "m" (qmul[0]), "m" (qmul[16]));
335                                                   340 
336                 asm volatile("vpsraw $4, %%zmm    341                 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
337                              "vpandq %%zmm7, %    342                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
338                              "vpandq %%zmm7, %    343                              "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
339                              "vpshufb %%zmm3,     344                              "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
340                              "vpshufb %%zmm6,     345                              "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
341                              "vpxorq %%zmm0, %    346                              "vpxorq %%zmm0, %%zmm1, %%zmm1"
342                              :                    347                              :
343                              : );                 348                              : );
344                                                   349 
345                 /* 1 = qmul[q ^ dq] */            350                 /* 1 = qmul[q ^ dq] */
346                                                   351 
347                 asm volatile("vmovdqa64 %0, %%    352                 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
348                              "vpxorq %%zmm1, %    353                              "vpxorq %%zmm1, %%zmm2, %%zmm2"
349                              :                    354                              :
350                              : "m" (p[0]));       355                              : "m" (p[0]));
351                                                   356 
352                 /* 2 = p ^ qmul[q ^ dq] */        357                 /* 2 = p ^ qmul[q ^ dq] */
353                                                   358 
354                 asm volatile("vmovdqa64 %%zmm1    359                 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
355                              "vmovdqa64 %%zmm2    360                              "vmovdqa64 %%zmm2, %1"
356                              :                    361                              :
357                              : "m" (dq[0]), "m    362                              : "m" (dq[0]), "m" (p[0]));
358                                                   363 
359                 bytes -= 64;                      364                 bytes -= 64;
360                 p += 64;                          365                 p += 64;
361                 q += 64;                          366                 q += 64;
362                 dq += 64;                         367                 dq += 64;
363 #endif                                            368 #endif
364         }                                         369         }
365                                                   370 
366         kernel_fpu_end();                         371         kernel_fpu_end();
367 }                                                 372 }
368                                                   373 
369 const struct raid6_recov_calls raid6_recov_avx    374 const struct raid6_recov_calls raid6_recov_avx512 = {
370         .data2 = raid6_2data_recov_avx512,        375         .data2 = raid6_2data_recov_avx512,
371         .datap = raid6_datap_recov_avx512,        376         .datap = raid6_datap_recov_avx512,
372         .valid = raid6_has_avx512,                377         .valid = raid6_has_avx512,
373 #ifdef CONFIG_X86_64                              378 #ifdef CONFIG_X86_64
374         .name = "avx512x2",                       379         .name = "avx512x2",
375 #else                                             380 #else
376         .name = "avx512x1",                       381         .name = "avx512x1",
377 #endif                                            382 #endif
378         .priority = 3,                            383         .priority = 3,
379 };                                                384 };
380                                                   385 
381 #else                                             386 #else
382 #warning "your version of binutils lacks AVX51    387 #warning "your version of binutils lacks AVX512 support"
383 #endif                                            388 #endif
384                                                   389 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php