1 // SPDX-License-Identifier: GPL-2.0-only 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 2 /* 3 * Copyright (C) 2016 Intel Corporation 3 * Copyright (C) 2016 Intel Corporation 4 * 4 * 5 * Author: Gayatri Kammela <gayatri.kammela@in 5 * Author: Gayatri Kammela <gayatri.kammela@intel.com> 6 * Author: Megha Dey <megha.dey@linux.intel.co 6 * Author: Megha Dey <megha.dey@linux.intel.com> 7 */ 7 */ 8 8 9 #ifdef CONFIG_AS_AVX512 9 #ifdef CONFIG_AS_AVX512 10 10 11 #include <linux/raid/pq.h> 11 #include <linux/raid/pq.h> 12 #include "x86.h" 12 #include "x86.h" 13 13 14 static int raid6_has_avx512(void) 14 static int raid6_has_avx512(void) 15 { 15 { 16 return boot_cpu_has(X86_FEATURE_AVX2) 16 return boot_cpu_has(X86_FEATURE_AVX2) && 17 boot_cpu_has(X86_FEATURE_AVX) 17 boot_cpu_has(X86_FEATURE_AVX) && 18 boot_cpu_has(X86_FEATURE_AVX51 18 boot_cpu_has(X86_FEATURE_AVX512F) && 19 boot_cpu_has(X86_FEATURE_AVX51 19 boot_cpu_has(X86_FEATURE_AVX512BW) && 20 boot_cpu_has(X86_FEATURE_AVX51 20 boot_cpu_has(X86_FEATURE_AVX512VL) && 21 boot_cpu_has(X86_FEATURE_AVX51 21 boot_cpu_has(X86_FEATURE_AVX512DQ); 22 } 22 } 23 23 24 static void raid6_2data_recov_avx512(int disks 24 static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila, 25 int failb 25 int failb, void **ptrs) 26 { 26 { 27 u8 *p, *q, *dp, *dq; 27 u8 *p, *q, *dp, *dq; 28 const u8 *pbmul; /* P multiplie 28 const u8 *pbmul; /* P multiplier table for B data */ 29 const u8 *qmul; /* Q multiplie 29 const u8 *qmul; /* Q multiplier table (for both) */ 30 const u8 x0f = 0x0f; 30 const u8 x0f = 0x0f; 31 31 32 p = (u8 *)ptrs[disks-2]; 32 p = (u8 *)ptrs[disks-2]; 33 q = (u8 *)ptrs[disks-1]; 33 q = (u8 *)ptrs[disks-1]; 34 34 35 /* 35 /* 36 * Compute syndrome with zero for the 36 * Compute syndrome with zero for the missing data pages 37 * Use the dead data pages as temporar 37 * Use the dead data pages as temporary storage for 38 * delta p and delta q 38 * delta p and delta q 39 */ 39 */ 40 40 41 dp = (u8 *)ptrs[faila]; 41 dp = (u8 *)ptrs[faila]; 42 ptrs[faila] = (void *)raid6_empty_zero 42 ptrs[faila] = (void *)raid6_empty_zero_page; 43 ptrs[disks-2] = dp; 43 ptrs[disks-2] = dp; 44 dq = (u8 *)ptrs[failb]; 44 dq = (u8 *)ptrs[failb]; 45 ptrs[failb] = (void *)raid6_empty_zero 45 ptrs[failb] = (void *)raid6_empty_zero_page; 46 ptrs[disks-1] = dq; 46 ptrs[disks-1] = dq; 47 47 48 raid6_call.gen_syndrome(disks, bytes, 48 raid6_call.gen_syndrome(disks, bytes, ptrs); 49 49 50 /* Restore pointer table */ 50 /* Restore pointer table */ 51 ptrs[faila] = dp; 51 ptrs[faila] = dp; 52 ptrs[failb] = dq; 52 ptrs[failb] = dq; 53 ptrs[disks-2] = p; 53 ptrs[disks-2] = p; 54 ptrs[disks-1] = q; 54 ptrs[disks-1] = q; 55 55 56 /* Now, pick the proper data tables */ 56 /* Now, pick the proper data tables */ 57 pbmul = raid6_vgfmul[raid6_gfexi[failb 57 pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; 58 qmul = raid6_vgfmul[raid6_gfinv[raid6 58 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ 59 raid6_gfexp[failb]]]; 59 raid6_gfexp[failb]]]; 60 60 61 kernel_fpu_begin(); 61 kernel_fpu_begin(); 62 62 63 /* zmm0 = x0f[16] */ 63 /* zmm0 = x0f[16] */ 64 asm volatile("vpbroadcastb %0, %%zmm7" 64 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); 65 65 66 while (bytes) { 66 while (bytes) { 67 #ifdef CONFIG_X86_64 67 #ifdef CONFIG_X86_64 68 asm volatile("vmovdqa64 %0, %% 68 asm volatile("vmovdqa64 %0, %%zmm1\n\t" 69 "vmovdqa64 %1, %% 69 "vmovdqa64 %1, %%zmm9\n\t" 70 "vmovdqa64 %2, %% 70 "vmovdqa64 %2, %%zmm0\n\t" 71 "vmovdqa64 %3, %% 71 "vmovdqa64 %3, %%zmm8\n\t" 72 "vpxorq %4, %%zmm 72 "vpxorq %4, %%zmm1, %%zmm1\n\t" 73 "vpxorq %5, %%zmm 73 "vpxorq %5, %%zmm9, %%zmm9\n\t" 74 "vpxorq %6, %%zmm 74 "vpxorq %6, %%zmm0, %%zmm0\n\t" 75 "vpxorq %7, %%zmm 75 "vpxorq %7, %%zmm8, %%zmm8" 76 : 76 : 77 : "m" (q[0]), "m" 77 : "m" (q[0]), "m" (q[64]), "m" (p[0]), 78 "m" (p[64]), "m 78 "m" (p[64]), "m" (dq[0]), "m" (dq[64]), 79 "m" (dp[0]), "m 79 "m" (dp[0]), "m" (dp[64])); 80 80 81 /* 81 /* 82 * 1 = dq[0] ^ q[0] 82 * 1 = dq[0] ^ q[0] 83 * 9 = dq[64] ^ q[64] 83 * 9 = dq[64] ^ q[64] 84 * 0 = dp[0] ^ p[0] 84 * 0 = dp[0] ^ p[0] 85 * 8 = dp[64] ^ p[64] 85 * 8 = dp[64] ^ p[64] 86 */ 86 */ 87 87 88 asm volatile("vbroadcasti64x2 88 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 89 "vbroadcasti64x2 89 "vbroadcasti64x2 %1, %%zmm5" 90 : 90 : 91 : "m" (qmul[0]), 91 : "m" (qmul[0]), "m" (qmul[16])); 92 92 93 asm volatile("vpsraw $4, %%zmm 93 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" 94 "vpsraw $4, %%zmm 94 "vpsraw $4, %%zmm9, %%zmm12\n\t" 95 "vpandq %%zmm7, % 95 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" 96 "vpandq %%zmm7, % 96 "vpandq %%zmm7, %%zmm9, %%zmm9\n\t" 97 "vpandq %%zmm7, % 97 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 98 "vpandq %%zmm7, % 98 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" 99 "vpshufb %%zmm9, 99 "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t" 100 "vpshufb %%zmm1, 100 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" 101 "vpshufb %%zmm12, 101 "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t" 102 "vpshufb %%zmm3, 102 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" 103 "vpxorq %%zmm14, 103 "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t" 104 "vpxorq %%zmm4, % 104 "vpxorq %%zmm4, %%zmm5, %%zmm5" 105 : 105 : 106 : ); 106 : ); 107 107 108 /* 108 /* 109 * 5 = qx[0] 109 * 5 = qx[0] 110 * 15 = qx[64] 110 * 15 = qx[64] 111 */ 111 */ 112 112 113 asm volatile("vbroadcasti64x2 113 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 114 "vbroadcasti64x2 114 "vbroadcasti64x2 %1, %%zmm1\n\t" 115 "vpsraw $4, %%zmm 115 "vpsraw $4, %%zmm0, %%zmm2\n\t" 116 "vpsraw $4, %%zmm 116 "vpsraw $4, %%zmm8, %%zmm6\n\t" 117 "vpandq %%zmm7, % 117 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" 118 "vpandq %%zmm7, % 118 "vpandq %%zmm7, %%zmm8, %%zmm14\n\t" 119 "vpandq %%zmm7, % 119 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" 120 "vpandq %%zmm7, % 120 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" 121 "vpshufb %%zmm14, 121 "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t" 122 "vpshufb %%zmm3, 122 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" 123 "vpshufb %%zmm6, 123 "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t" 124 "vpshufb %%zmm2, 124 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" 125 "vpxorq %%zmm4, % 125 "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t" 126 "vpxorq %%zmm12, 126 "vpxorq %%zmm12, %%zmm13, %%zmm13" 127 : 127 : 128 : "m" (pbmul[0]), 128 : "m" (pbmul[0]), "m" (pbmul[16])); 129 129 130 /* 130 /* 131 * 1 = pbmul[px[0]] 131 * 1 = pbmul[px[0]] 132 * 13 = pbmul[px[64]] 132 * 13 = pbmul[px[64]] 133 */ 133 */ 134 asm volatile("vpxorq %%zmm5, % 134 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" 135 "vpxorq %%zmm15, 135 "vpxorq %%zmm15, %%zmm13, %%zmm13" 136 : 136 : 137 : ); 137 : ); 138 138 139 /* 139 /* 140 * 1 = db = DQ 140 * 1 = db = DQ 141 * 13 = db[64] = DQ[64] 141 * 13 = db[64] = DQ[64] 142 */ 142 */ 143 asm volatile("vmovdqa64 %%zmm1 143 asm volatile("vmovdqa64 %%zmm1, %0\n\t" 144 "vmovdqa64 %%zmm1 144 "vmovdqa64 %%zmm13,%1\n\t" 145 "vpxorq %%zmm1, % 145 "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" 146 "vpxorq %%zmm13, 146 "vpxorq %%zmm13, %%zmm8, %%zmm8" 147 : 147 : 148 : "m" (dq[0]), "m 148 : "m" (dq[0]), "m" (dq[64])); 149 149 150 asm volatile("vmovdqa64 %%zmm0 150 asm volatile("vmovdqa64 %%zmm0, %0\n\t" 151 "vmovdqa64 %%zmm8 151 "vmovdqa64 %%zmm8, %1" 152 : 152 : 153 : "m" (dp[0]), "m 153 : "m" (dp[0]), "m" (dp[64])); 154 154 155 bytes -= 128; 155 bytes -= 128; 156 p += 128; 156 p += 128; 157 q += 128; 157 q += 128; 158 dp += 128; 158 dp += 128; 159 dq += 128; 159 dq += 128; 160 #else 160 #else 161 asm volatile("vmovdqa64 %0, %% 161 asm volatile("vmovdqa64 %0, %%zmm1\n\t" 162 "vmovdqa64 %1, %% 162 "vmovdqa64 %1, %%zmm0\n\t" 163 "vpxorq %2, %%zmm 163 "vpxorq %2, %%zmm1, %%zmm1\n\t" 164 "vpxorq %3, %%zmm 164 "vpxorq %3, %%zmm0, %%zmm0" 165 : 165 : 166 : "m" (*q), "m" ( 166 : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp)); 167 167 168 /* 1 = dq ^ q; 0 = dp ^ p */ 168 /* 1 = dq ^ q; 0 = dp ^ p */ 169 169 170 asm volatile("vbroadcasti64x2 170 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 171 "vbroadcasti64x2 171 "vbroadcasti64x2 %1, %%zmm5" 172 : 172 : 173 : "m" (qmul[0]), 173 : "m" (qmul[0]), "m" (qmul[16])); 174 174 175 /* 175 /* 176 * 1 = dq ^ q 176 * 1 = dq ^ q 177 * 3 = dq ^ p >> 4 177 * 3 = dq ^ p >> 4 178 */ 178 */ 179 asm volatile("vpsraw $4, %%zmm 179 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" 180 "vpandq %%zmm7, % 180 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" 181 "vpandq %%zmm7, % 181 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 182 "vpshufb %%zmm1, 182 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" 183 "vpshufb %%zmm3, 183 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" 184 "vpxorq %%zmm4, % 184 "vpxorq %%zmm4, %%zmm5, %%zmm5" 185 : 185 : 186 : ); 186 : ); 187 187 188 /* 5 = qx */ 188 /* 5 = qx */ 189 189 190 asm volatile("vbroadcasti64x2 190 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 191 "vbroadcasti64x2 191 "vbroadcasti64x2 %1, %%zmm1" 192 : 192 : 193 : "m" (pbmul[0]), 193 : "m" (pbmul[0]), "m" (pbmul[16])); 194 194 195 asm volatile("vpsraw $4, %%zmm 195 asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t" 196 "vpandq %%zmm7, % 196 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" 197 "vpandq %%zmm7, % 197 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" 198 "vpshufb %%zmm3, 198 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" 199 "vpshufb %%zmm2, 199 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" 200 "vpxorq %%zmm4, % 200 "vpxorq %%zmm4, %%zmm1, %%zmm1" 201 : 201 : 202 : ); 202 : ); 203 203 204 /* 1 = pbmul[px] */ 204 /* 1 = pbmul[px] */ 205 asm volatile("vpxorq %%zmm5, % 205 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" 206 /* 1 = db = DQ */ 206 /* 1 = db = DQ */ 207 "vmovdqa64 %%zmm1 207 "vmovdqa64 %%zmm1, %0\n\t" 208 : 208 : 209 : "m" (dq[0])); 209 : "m" (dq[0])); 210 210 211 asm volatile("vpxorq %%zmm1, % 211 asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" 212 "vmovdqa64 %%zmm0 212 "vmovdqa64 %%zmm0, %0" 213 : 213 : 214 : "m" (dp[0])); 214 : "m" (dp[0])); 215 215 216 bytes -= 64; 216 bytes -= 64; 217 p += 64; 217 p += 64; 218 q += 64; 218 q += 64; 219 dp += 64; 219 dp += 64; 220 dq += 64; 220 dq += 64; 221 #endif 221 #endif 222 } 222 } 223 223 224 kernel_fpu_end(); 224 kernel_fpu_end(); 225 } 225 } 226 226 227 static void raid6_datap_recov_avx512(int disks 227 static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila, 228 void **pt 228 void **ptrs) 229 { 229 { 230 u8 *p, *q, *dq; 230 u8 *p, *q, *dq; 231 const u8 *qmul; /* Q multiplie 231 const u8 *qmul; /* Q multiplier table */ 232 const u8 x0f = 0x0f; 232 const u8 x0f = 0x0f; 233 233 234 p = (u8 *)ptrs[disks-2]; 234 p = (u8 *)ptrs[disks-2]; 235 q = (u8 *)ptrs[disks-1]; 235 q = (u8 *)ptrs[disks-1]; 236 236 237 /* 237 /* 238 * Compute syndrome with zero for the 238 * Compute syndrome with zero for the missing data page 239 * Use the dead data page as temporary 239 * Use the dead data page as temporary storage for delta q 240 */ 240 */ 241 241 242 dq = (u8 *)ptrs[faila]; 242 dq = (u8 *)ptrs[faila]; 243 ptrs[faila] = (void *)raid6_empty_zero 243 ptrs[faila] = (void *)raid6_empty_zero_page; 244 ptrs[disks-1] = dq; 244 ptrs[disks-1] = dq; 245 245 246 raid6_call.gen_syndrome(disks, bytes, 246 raid6_call.gen_syndrome(disks, bytes, ptrs); 247 247 248 /* Restore pointer table */ 248 /* Restore pointer table */ 249 ptrs[faila] = dq; 249 ptrs[faila] = dq; 250 ptrs[disks-1] = q; 250 ptrs[disks-1] = q; 251 251 252 /* Now, pick the proper data tables */ 252 /* Now, pick the proper data tables */ 253 qmul = raid6_vgfmul[raid6_gfinv[raid6 253 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; 254 254 255 kernel_fpu_begin(); 255 kernel_fpu_begin(); 256 256 257 asm volatile("vpbroadcastb %0, %%zmm7" 257 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); 258 258 259 while (bytes) { 259 while (bytes) { 260 #ifdef CONFIG_X86_64 260 #ifdef CONFIG_X86_64 261 asm volatile("vmovdqa64 %0, %% 261 asm volatile("vmovdqa64 %0, %%zmm3\n\t" 262 "vmovdqa64 %1, %% 262 "vmovdqa64 %1, %%zmm8\n\t" 263 "vpxorq %2, %%zmm 263 "vpxorq %2, %%zmm3, %%zmm3\n\t" 264 "vpxorq %3, %%zmm 264 "vpxorq %3, %%zmm8, %%zmm8" 265 : 265 : 266 : "m" (dq[0]), "m 266 : "m" (dq[0]), "m" (dq[64]), "m" (q[0]), 267 "m" (q[64])); 267 "m" (q[64])); 268 268 269 /* 269 /* 270 * 3 = q[0] ^ dq[0] 270 * 3 = q[0] ^ dq[0] 271 * 8 = q[64] ^ dq[64] 271 * 8 = q[64] ^ dq[64] 272 */ 272 */ 273 asm volatile("vbroadcasti64x2 273 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" 274 "vmovapd %%zmm0, 274 "vmovapd %%zmm0, %%zmm13\n\t" 275 "vbroadcasti64x2 275 "vbroadcasti64x2 %1, %%zmm1\n\t" 276 "vmovapd %%zmm1, 276 "vmovapd %%zmm1, %%zmm14" 277 : 277 : 278 : "m" (qmul[0]), 278 : "m" (qmul[0]), "m" (qmul[16])); 279 279 280 asm volatile("vpsraw $4, %%zmm 280 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" 281 "vpsraw $4, %%zmm 281 "vpsraw $4, %%zmm8, %%zmm12\n\t" 282 "vpandq %%zmm7, % 282 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 283 "vpandq %%zmm7, % 283 "vpandq %%zmm7, %%zmm8, %%zmm8\n\t" 284 "vpandq %%zmm7, % 284 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" 285 "vpandq %%zmm7, % 285 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" 286 "vpshufb %%zmm3, 286 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" 287 "vpshufb %%zmm8, 287 "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t" 288 "vpshufb %%zmm6, 288 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" 289 "vpshufb %%zmm12, 289 "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t" 290 "vpxorq %%zmm0, % 290 "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t" 291 "vpxorq %%zmm13, 291 "vpxorq %%zmm13, %%zmm14, %%zmm14" 292 : 292 : 293 : ); 293 : ); 294 294 295 /* 295 /* 296 * 1 = qmul[q[0] ^ dq[0]] 296 * 1 = qmul[q[0] ^ dq[0]] 297 * 14 = qmul[q[64] ^ dq[64]] 297 * 14 = qmul[q[64] ^ dq[64]] 298 */ 298 */ 299 asm volatile("vmovdqa64 %0, %% 299 asm volatile("vmovdqa64 %0, %%zmm2\n\t" 300 "vmovdqa64 %1, %% 300 "vmovdqa64 %1, %%zmm12\n\t" 301 "vpxorq %%zmm1, % 301 "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t" 302 "vpxorq %%zmm14, 302 "vpxorq %%zmm14, %%zmm12, %%zmm12" 303 : 303 : 304 : "m" (p[0]), "m" 304 : "m" (p[0]), "m" (p[64])); 305 305 306 /* 306 /* 307 * 2 = p[0] ^ qmul[q[0] ^ d 307 * 2 = p[0] ^ qmul[q[0] ^ dq[0]] 308 * 12 = p[64] ^ qmul[q[64] ^ d 308 * 12 = p[64] ^ qmul[q[64] ^ dq[64]] 309 */ 309 */ 310 310 311 asm volatile("vmovdqa64 %%zmm1 311 asm volatile("vmovdqa64 %%zmm1, %0\n\t" 312 "vmovdqa64 %%zmm1 312 "vmovdqa64 %%zmm14, %1\n\t" 313 "vmovdqa64 %%zmm2 313 "vmovdqa64 %%zmm2, %2\n\t" 314 "vmovdqa64 %%zmm1 314 "vmovdqa64 %%zmm12,%3" 315 : 315 : 316 : "m" (dq[0]), "m 316 : "m" (dq[0]), "m" (dq[64]), "m" (p[0]), 317 "m" (p[64])); 317 "m" (p[64])); 318 318 319 bytes -= 128; 319 bytes -= 128; 320 p += 128; 320 p += 128; 321 q += 128; 321 q += 128; 322 dq += 128; 322 dq += 128; 323 #else 323 #else 324 asm volatile("vmovdqa64 %0, %% 324 asm volatile("vmovdqa64 %0, %%zmm3\n\t" 325 "vpxorq %1, %%zmm 325 "vpxorq %1, %%zmm3, %%zmm3" 326 : 326 : 327 : "m" (dq[0]), "m 327 : "m" (dq[0]), "m" (q[0])); 328 328 329 /* 3 = q ^ dq */ 329 /* 3 = q ^ dq */ 330 330 331 asm volatile("vbroadcasti64x2 331 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" 332 "vbroadcasti64x2 332 "vbroadcasti64x2 %1, %%zmm1" 333 : 333 : 334 : "m" (qmul[0]), 334 : "m" (qmul[0]), "m" (qmul[16])); 335 335 336 asm volatile("vpsraw $4, %%zmm 336 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" 337 "vpandq %%zmm7, % 337 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 338 "vpandq %%zmm7, % 338 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" 339 "vpshufb %%zmm3, 339 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" 340 "vpshufb %%zmm6, 340 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" 341 "vpxorq %%zmm0, % 341 "vpxorq %%zmm0, %%zmm1, %%zmm1" 342 : 342 : 343 : ); 343 : ); 344 344 345 /* 1 = qmul[q ^ dq] */ 345 /* 1 = qmul[q ^ dq] */ 346 346 347 asm volatile("vmovdqa64 %0, %% 347 asm volatile("vmovdqa64 %0, %%zmm2\n\t" 348 "vpxorq %%zmm1, % 348 "vpxorq %%zmm1, %%zmm2, %%zmm2" 349 : 349 : 350 : "m" (p[0])); 350 : "m" (p[0])); 351 351 352 /* 2 = p ^ qmul[q ^ dq] */ 352 /* 2 = p ^ qmul[q ^ dq] */ 353 353 354 asm volatile("vmovdqa64 %%zmm1 354 asm volatile("vmovdqa64 %%zmm1, %0\n\t" 355 "vmovdqa64 %%zmm2 355 "vmovdqa64 %%zmm2, %1" 356 : 356 : 357 : "m" (dq[0]), "m 357 : "m" (dq[0]), "m" (p[0])); 358 358 359 bytes -= 64; 359 bytes -= 64; 360 p += 64; 360 p += 64; 361 q += 64; 361 q += 64; 362 dq += 64; 362 dq += 64; 363 #endif 363 #endif 364 } 364 } 365 365 366 kernel_fpu_end(); 366 kernel_fpu_end(); 367 } 367 } 368 368 369 const struct raid6_recov_calls raid6_recov_avx 369 const struct raid6_recov_calls raid6_recov_avx512 = { 370 .data2 = raid6_2data_recov_avx512, 370 .data2 = raid6_2data_recov_avx512, 371 .datap = raid6_datap_recov_avx512, 371 .datap = raid6_datap_recov_avx512, 372 .valid = raid6_has_avx512, 372 .valid = raid6_has_avx512, 373 #ifdef CONFIG_X86_64 373 #ifdef CONFIG_X86_64 374 .name = "avx512x2", 374 .name = "avx512x2", 375 #else 375 #else 376 .name = "avx512x1", 376 .name = "avx512x1", 377 #endif 377 #endif 378 .priority = 3, 378 .priority = 3, 379 }; 379 }; 380 380 381 #else 381 #else 382 #warning "your version of binutils lacks AVX51 382 #warning "your version of binutils lacks AVX512 support" 383 #endif 383 #endif 384 384
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.