1 // SPDX-License-Identifier: GPL-2.0-only << 2 /* 1 /* 3 * Copyright (C) 2016 Intel Corporation 2 * Copyright (C) 2016 Intel Corporation 4 * 3 * 5 * Author: Gayatri Kammela <gayatri.kammela@in 4 * Author: Gayatri Kammela <gayatri.kammela@intel.com> 6 * Author: Megha Dey <megha.dey@linux.intel.co 5 * Author: Megha Dey <megha.dey@linux.intel.com> >> 6 * >> 7 * This program is free software; you can redistribute it and/or >> 8 * modify it under the terms of the GNU General Public License >> 9 * as published by the Free Software Foundation; version 2 >> 10 * of the License. >> 11 * 7 */ 12 */ 8 13 9 #ifdef CONFIG_AS_AVX512 14 #ifdef CONFIG_AS_AVX512 10 15 11 #include <linux/raid/pq.h> 16 #include <linux/raid/pq.h> 12 #include "x86.h" 17 #include "x86.h" 13 18 14 static int raid6_has_avx512(void) 19 static int raid6_has_avx512(void) 15 { 20 { 16 return boot_cpu_has(X86_FEATURE_AVX2) 21 return boot_cpu_has(X86_FEATURE_AVX2) && 17 boot_cpu_has(X86_FEATURE_AVX) 22 boot_cpu_has(X86_FEATURE_AVX) && 18 boot_cpu_has(X86_FEATURE_AVX51 23 boot_cpu_has(X86_FEATURE_AVX512F) && 19 boot_cpu_has(X86_FEATURE_AVX51 24 boot_cpu_has(X86_FEATURE_AVX512BW) && 20 boot_cpu_has(X86_FEATURE_AVX51 25 boot_cpu_has(X86_FEATURE_AVX512VL) && 21 boot_cpu_has(X86_FEATURE_AVX51 26 boot_cpu_has(X86_FEATURE_AVX512DQ); 22 } 27 } 23 28 24 static void raid6_2data_recov_avx512(int disks 29 static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila, 25 int failb 30 int failb, void **ptrs) 26 { 31 { 27 u8 *p, *q, *dp, *dq; 32 u8 *p, *q, *dp, *dq; 28 const u8 *pbmul; /* P multiplie 33 const u8 *pbmul; /* P multiplier table for B data */ 29 const u8 *qmul; /* Q multiplie 34 const u8 *qmul; /* Q multiplier table (for both) */ 30 const u8 x0f = 0x0f; 35 const u8 x0f = 0x0f; 31 36 32 p = (u8 *)ptrs[disks-2]; 37 p = (u8 *)ptrs[disks-2]; 33 q = (u8 *)ptrs[disks-1]; 38 q = (u8 *)ptrs[disks-1]; 34 39 35 /* 40 /* 36 * Compute syndrome with zero for the 41 * Compute syndrome with zero for the missing data pages 37 * Use the dead data pages as temporar 42 * Use the dead data pages as temporary storage for 38 * delta p and delta q 43 * delta p and delta q 39 */ 44 */ 40 45 41 dp = (u8 *)ptrs[faila]; 46 dp = (u8 *)ptrs[faila]; 42 ptrs[faila] = (void *)raid6_empty_zero 47 ptrs[faila] = (void *)raid6_empty_zero_page; 43 ptrs[disks-2] = dp; 48 ptrs[disks-2] = dp; 44 dq = (u8 *)ptrs[failb]; 49 dq = (u8 *)ptrs[failb]; 45 ptrs[failb] = (void *)raid6_empty_zero 50 ptrs[failb] = (void *)raid6_empty_zero_page; 46 ptrs[disks-1] = dq; 51 ptrs[disks-1] = dq; 47 52 48 raid6_call.gen_syndrome(disks, bytes, 53 raid6_call.gen_syndrome(disks, bytes, ptrs); 49 54 50 /* Restore pointer table */ 55 /* Restore pointer table */ 51 ptrs[faila] = dp; 56 ptrs[faila] = dp; 52 ptrs[failb] = dq; 57 ptrs[failb] = dq; 53 ptrs[disks-2] = p; 58 ptrs[disks-2] = p; 54 ptrs[disks-1] = q; 59 ptrs[disks-1] = q; 55 60 56 /* Now, pick the proper data tables */ 61 /* Now, pick the proper data tables */ 57 pbmul = raid6_vgfmul[raid6_gfexi[failb 62 pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; 58 qmul = raid6_vgfmul[raid6_gfinv[raid6 63 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ 59 raid6_gfexp[failb]]]; 64 raid6_gfexp[failb]]]; 60 65 61 kernel_fpu_begin(); 66 kernel_fpu_begin(); 62 67 63 /* zmm0 = x0f[16] */ 68 /* zmm0 = x0f[16] */ 64 asm volatile("vpbroadcastb %0, %%zmm7" 69 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); 65 70 66 while (bytes) { 71 while (bytes) { 67 #ifdef CONFIG_X86_64 72 #ifdef CONFIG_X86_64 68 asm volatile("vmovdqa64 %0, %% 73 asm volatile("vmovdqa64 %0, %%zmm1\n\t" 69 "vmovdqa64 %1, %% 74 "vmovdqa64 %1, %%zmm9\n\t" 70 "vmovdqa64 %2, %% 75 "vmovdqa64 %2, %%zmm0\n\t" 71 "vmovdqa64 %3, %% 76 "vmovdqa64 %3, %%zmm8\n\t" 72 "vpxorq %4, %%zmm 77 "vpxorq %4, %%zmm1, %%zmm1\n\t" 73 "vpxorq %5, %%zmm 78 "vpxorq %5, %%zmm9, %%zmm9\n\t" 74 "vpxorq %6, %%zmm 79 "vpxorq %6, %%zmm0, %%zmm0\n\t" 75 "vpxorq %7, %%zmm 80 "vpxorq %7, %%zmm8, %%zmm8" 76 : 81 : 77 : "m" (q[0]), "m" 82 : "m" (q[0]), "m" (q[64]), "m" (p[0]), 78 "m" (p[64]), "m 83 "m" (p[64]), "m" (dq[0]), "m" (dq[64]), 79 "m" (dp[0]), "m 84 "m" (dp[0]), "m" (dp[64])); 80 85 81 /* 86 /* 82 * 1 = dq[0] ^ q[0] 87 * 1 = dq[0] ^ q[0] 83 * 9 = dq[64] ^ q[64] 88 * 9 = dq[64] ^ q[64] 84 * 0 = dp[0] ^ p[0] 89 * 0 = dp[0] ^ p[0] 85 * 8 = dp[64] ^ p[64] 90 * 8 = dp[64] ^ p[64] 86 */ 91 */ 87 92 88 asm volatile("vbroadcasti64x2 93 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 89 "vbroadcasti64x2 94 "vbroadcasti64x2 %1, %%zmm5" 90 : 95 : 91 : "m" (qmul[0]), 96 : "m" (qmul[0]), "m" (qmul[16])); 92 97 93 asm volatile("vpsraw $4, %%zmm 98 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" 94 "vpsraw $4, %%zmm 99 "vpsraw $4, %%zmm9, %%zmm12\n\t" 95 "vpandq %%zmm7, % 100 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" 96 "vpandq %%zmm7, % 101 "vpandq %%zmm7, %%zmm9, %%zmm9\n\t" 97 "vpandq %%zmm7, % 102 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 98 "vpandq %%zmm7, % 103 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" 99 "vpshufb %%zmm9, 104 "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t" 100 "vpshufb %%zmm1, 105 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" 101 "vpshufb %%zmm12, 106 "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t" 102 "vpshufb %%zmm3, 107 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" 103 "vpxorq %%zmm14, 108 "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t" 104 "vpxorq %%zmm4, % 109 "vpxorq %%zmm4, %%zmm5, %%zmm5" 105 : 110 : 106 : ); 111 : ); 107 112 108 /* 113 /* 109 * 5 = qx[0] 114 * 5 = qx[0] 110 * 15 = qx[64] 115 * 15 = qx[64] 111 */ 116 */ 112 117 113 asm volatile("vbroadcasti64x2 118 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 114 "vbroadcasti64x2 119 "vbroadcasti64x2 %1, %%zmm1\n\t" 115 "vpsraw $4, %%zmm 120 "vpsraw $4, %%zmm0, %%zmm2\n\t" 116 "vpsraw $4, %%zmm 121 "vpsraw $4, %%zmm8, %%zmm6\n\t" 117 "vpandq %%zmm7, % 122 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" 118 "vpandq %%zmm7, % 123 "vpandq %%zmm7, %%zmm8, %%zmm14\n\t" 119 "vpandq %%zmm7, % 124 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" 120 "vpandq %%zmm7, % 125 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" 121 "vpshufb %%zmm14, 126 "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t" 122 "vpshufb %%zmm3, 127 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" 123 "vpshufb %%zmm6, 128 "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t" 124 "vpshufb %%zmm2, 129 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" 125 "vpxorq %%zmm4, % 130 "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t" 126 "vpxorq %%zmm12, 131 "vpxorq %%zmm12, %%zmm13, %%zmm13" 127 : 132 : 128 : "m" (pbmul[0]), 133 : "m" (pbmul[0]), "m" (pbmul[16])); 129 134 130 /* 135 /* 131 * 1 = pbmul[px[0]] 136 * 1 = pbmul[px[0]] 132 * 13 = pbmul[px[64]] 137 * 13 = pbmul[px[64]] 133 */ 138 */ 134 asm volatile("vpxorq %%zmm5, % 139 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" 135 "vpxorq %%zmm15, 140 "vpxorq %%zmm15, %%zmm13, %%zmm13" 136 : 141 : 137 : ); 142 : ); 138 143 139 /* 144 /* 140 * 1 = db = DQ 145 * 1 = db = DQ 141 * 13 = db[64] = DQ[64] 146 * 13 = db[64] = DQ[64] 142 */ 147 */ 143 asm volatile("vmovdqa64 %%zmm1 148 asm volatile("vmovdqa64 %%zmm1, %0\n\t" 144 "vmovdqa64 %%zmm1 149 "vmovdqa64 %%zmm13,%1\n\t" 145 "vpxorq %%zmm1, % 150 "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" 146 "vpxorq %%zmm13, 151 "vpxorq %%zmm13, %%zmm8, %%zmm8" 147 : 152 : 148 : "m" (dq[0]), "m 153 : "m" (dq[0]), "m" (dq[64])); 149 154 150 asm volatile("vmovdqa64 %%zmm0 155 asm volatile("vmovdqa64 %%zmm0, %0\n\t" 151 "vmovdqa64 %%zmm8 156 "vmovdqa64 %%zmm8, %1" 152 : 157 : 153 : "m" (dp[0]), "m 158 : "m" (dp[0]), "m" (dp[64])); 154 159 155 bytes -= 128; 160 bytes -= 128; 156 p += 128; 161 p += 128; 157 q += 128; 162 q += 128; 158 dp += 128; 163 dp += 128; 159 dq += 128; 164 dq += 128; 160 #else 165 #else 161 asm volatile("vmovdqa64 %0, %% 166 asm volatile("vmovdqa64 %0, %%zmm1\n\t" 162 "vmovdqa64 %1, %% 167 "vmovdqa64 %1, %%zmm0\n\t" 163 "vpxorq %2, %%zmm 168 "vpxorq %2, %%zmm1, %%zmm1\n\t" 164 "vpxorq %3, %%zmm 169 "vpxorq %3, %%zmm0, %%zmm0" 165 : 170 : 166 : "m" (*q), "m" ( 171 : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp)); 167 172 168 /* 1 = dq ^ q; 0 = dp ^ p */ 173 /* 1 = dq ^ q; 0 = dp ^ p */ 169 174 170 asm volatile("vbroadcasti64x2 175 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 171 "vbroadcasti64x2 176 "vbroadcasti64x2 %1, %%zmm5" 172 : 177 : 173 : "m" (qmul[0]), 178 : "m" (qmul[0]), "m" (qmul[16])); 174 179 175 /* 180 /* 176 * 1 = dq ^ q 181 * 1 = dq ^ q 177 * 3 = dq ^ p >> 4 182 * 3 = dq ^ p >> 4 178 */ 183 */ 179 asm volatile("vpsraw $4, %%zmm 184 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" 180 "vpandq %%zmm7, % 185 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" 181 "vpandq %%zmm7, % 186 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 182 "vpshufb %%zmm1, 187 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" 183 "vpshufb %%zmm3, 188 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" 184 "vpxorq %%zmm4, % 189 "vpxorq %%zmm4, %%zmm5, %%zmm5" 185 : 190 : 186 : ); 191 : ); 187 192 188 /* 5 = qx */ 193 /* 5 = qx */ 189 194 190 asm volatile("vbroadcasti64x2 195 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 191 "vbroadcasti64x2 196 "vbroadcasti64x2 %1, %%zmm1" 192 : 197 : 193 : "m" (pbmul[0]), 198 : "m" (pbmul[0]), "m" (pbmul[16])); 194 199 195 asm volatile("vpsraw $4, %%zmm 200 asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t" 196 "vpandq %%zmm7, % 201 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" 197 "vpandq %%zmm7, % 202 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" 198 "vpshufb %%zmm3, 203 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" 199 "vpshufb %%zmm2, 204 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" 200 "vpxorq %%zmm4, % 205 "vpxorq %%zmm4, %%zmm1, %%zmm1" 201 : 206 : 202 : ); 207 : ); 203 208 204 /* 1 = pbmul[px] */ 209 /* 1 = pbmul[px] */ 205 asm volatile("vpxorq %%zmm5, % 210 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" 206 /* 1 = db = DQ */ 211 /* 1 = db = DQ */ 207 "vmovdqa64 %%zmm1 212 "vmovdqa64 %%zmm1, %0\n\t" 208 : 213 : 209 : "m" (dq[0])); 214 : "m" (dq[0])); 210 215 211 asm volatile("vpxorq %%zmm1, % 216 asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" 212 "vmovdqa64 %%zmm0 217 "vmovdqa64 %%zmm0, %0" 213 : 218 : 214 : "m" (dp[0])); 219 : "m" (dp[0])); 215 220 216 bytes -= 64; 221 bytes -= 64; 217 p += 64; 222 p += 64; 218 q += 64; 223 q += 64; 219 dp += 64; 224 dp += 64; 220 dq += 64; 225 dq += 64; 221 #endif 226 #endif 222 } 227 } 223 228 224 kernel_fpu_end(); 229 kernel_fpu_end(); 225 } 230 } 226 231 227 static void raid6_datap_recov_avx512(int disks 232 static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila, 228 void **pt 233 void **ptrs) 229 { 234 { 230 u8 *p, *q, *dq; 235 u8 *p, *q, *dq; 231 const u8 *qmul; /* Q multiplie 236 const u8 *qmul; /* Q multiplier table */ 232 const u8 x0f = 0x0f; 237 const u8 x0f = 0x0f; 233 238 234 p = (u8 *)ptrs[disks-2]; 239 p = (u8 *)ptrs[disks-2]; 235 q = (u8 *)ptrs[disks-1]; 240 q = (u8 *)ptrs[disks-1]; 236 241 237 /* 242 /* 238 * Compute syndrome with zero for the 243 * Compute syndrome with zero for the missing data page 239 * Use the dead data page as temporary 244 * Use the dead data page as temporary storage for delta q 240 */ 245 */ 241 246 242 dq = (u8 *)ptrs[faila]; 247 dq = (u8 *)ptrs[faila]; 243 ptrs[faila] = (void *)raid6_empty_zero 248 ptrs[faila] = (void *)raid6_empty_zero_page; 244 ptrs[disks-1] = dq; 249 ptrs[disks-1] = dq; 245 250 246 raid6_call.gen_syndrome(disks, bytes, 251 raid6_call.gen_syndrome(disks, bytes, ptrs); 247 252 248 /* Restore pointer table */ 253 /* Restore pointer table */ 249 ptrs[faila] = dq; 254 ptrs[faila] = dq; 250 ptrs[disks-1] = q; 255 ptrs[disks-1] = q; 251 256 252 /* Now, pick the proper data tables */ 257 /* Now, pick the proper data tables */ 253 qmul = raid6_vgfmul[raid6_gfinv[raid6 258 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; 254 259 255 kernel_fpu_begin(); 260 kernel_fpu_begin(); 256 261 257 asm volatile("vpbroadcastb %0, %%zmm7" 262 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); 258 263 259 while (bytes) { 264 while (bytes) { 260 #ifdef CONFIG_X86_64 265 #ifdef CONFIG_X86_64 261 asm volatile("vmovdqa64 %0, %% 266 asm volatile("vmovdqa64 %0, %%zmm3\n\t" 262 "vmovdqa64 %1, %% 267 "vmovdqa64 %1, %%zmm8\n\t" 263 "vpxorq %2, %%zmm 268 "vpxorq %2, %%zmm3, %%zmm3\n\t" 264 "vpxorq %3, %%zmm 269 "vpxorq %3, %%zmm8, %%zmm8" 265 : 270 : 266 : "m" (dq[0]), "m 271 : "m" (dq[0]), "m" (dq[64]), "m" (q[0]), 267 "m" (q[64])); 272 "m" (q[64])); 268 273 269 /* 274 /* 270 * 3 = q[0] ^ dq[0] 275 * 3 = q[0] ^ dq[0] 271 * 8 = q[64] ^ dq[64] 276 * 8 = q[64] ^ dq[64] 272 */ 277 */ 273 asm volatile("vbroadcasti64x2 278 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" 274 "vmovapd %%zmm0, 279 "vmovapd %%zmm0, %%zmm13\n\t" 275 "vbroadcasti64x2 280 "vbroadcasti64x2 %1, %%zmm1\n\t" 276 "vmovapd %%zmm1, 281 "vmovapd %%zmm1, %%zmm14" 277 : 282 : 278 : "m" (qmul[0]), 283 : "m" (qmul[0]), "m" (qmul[16])); 279 284 280 asm volatile("vpsraw $4, %%zmm 285 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" 281 "vpsraw $4, %%zmm 286 "vpsraw $4, %%zmm8, %%zmm12\n\t" 282 "vpandq %%zmm7, % 287 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 283 "vpandq %%zmm7, % 288 "vpandq %%zmm7, %%zmm8, %%zmm8\n\t" 284 "vpandq %%zmm7, % 289 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" 285 "vpandq %%zmm7, % 290 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" 286 "vpshufb %%zmm3, 291 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" 287 "vpshufb %%zmm8, 292 "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t" 288 "vpshufb %%zmm6, 293 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" 289 "vpshufb %%zmm12, 294 "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t" 290 "vpxorq %%zmm0, % 295 "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t" 291 "vpxorq %%zmm13, 296 "vpxorq %%zmm13, %%zmm14, %%zmm14" 292 : 297 : 293 : ); 298 : ); 294 299 295 /* 300 /* 296 * 1 = qmul[q[0] ^ dq[0]] 301 * 1 = qmul[q[0] ^ dq[0]] 297 * 14 = qmul[q[64] ^ dq[64]] 302 * 14 = qmul[q[64] ^ dq[64]] 298 */ 303 */ 299 asm volatile("vmovdqa64 %0, %% 304 asm volatile("vmovdqa64 %0, %%zmm2\n\t" 300 "vmovdqa64 %1, %% 305 "vmovdqa64 %1, %%zmm12\n\t" 301 "vpxorq %%zmm1, % 306 "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t" 302 "vpxorq %%zmm14, 307 "vpxorq %%zmm14, %%zmm12, %%zmm12" 303 : 308 : 304 : "m" (p[0]), "m" 309 : "m" (p[0]), "m" (p[64])); 305 310 306 /* 311 /* 307 * 2 = p[0] ^ qmul[q[0] ^ d 312 * 2 = p[0] ^ qmul[q[0] ^ dq[0]] 308 * 12 = p[64] ^ qmul[q[64] ^ d 313 * 12 = p[64] ^ qmul[q[64] ^ dq[64]] 309 */ 314 */ 310 315 311 asm volatile("vmovdqa64 %%zmm1 316 asm volatile("vmovdqa64 %%zmm1, %0\n\t" 312 "vmovdqa64 %%zmm1 317 "vmovdqa64 %%zmm14, %1\n\t" 313 "vmovdqa64 %%zmm2 318 "vmovdqa64 %%zmm2, %2\n\t" 314 "vmovdqa64 %%zmm1 319 "vmovdqa64 %%zmm12,%3" 315 : 320 : 316 : "m" (dq[0]), "m 321 : "m" (dq[0]), "m" (dq[64]), "m" (p[0]), 317 "m" (p[64])); 322 "m" (p[64])); 318 323 319 bytes -= 128; 324 bytes -= 128; 320 p += 128; 325 p += 128; 321 q += 128; 326 q += 128; 322 dq += 128; 327 dq += 128; 323 #else 328 #else 324 asm volatile("vmovdqa64 %0, %% 329 asm volatile("vmovdqa64 %0, %%zmm3\n\t" 325 "vpxorq %1, %%zmm 330 "vpxorq %1, %%zmm3, %%zmm3" 326 : 331 : 327 : "m" (dq[0]), "m 332 : "m" (dq[0]), "m" (q[0])); 328 333 329 /* 3 = q ^ dq */ 334 /* 3 = q ^ dq */ 330 335 331 asm volatile("vbroadcasti64x2 336 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" 332 "vbroadcasti64x2 337 "vbroadcasti64x2 %1, %%zmm1" 333 : 338 : 334 : "m" (qmul[0]), 339 : "m" (qmul[0]), "m" (qmul[16])); 335 340 336 asm volatile("vpsraw $4, %%zmm 341 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" 337 "vpandq %%zmm7, % 342 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 338 "vpandq %%zmm7, % 343 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" 339 "vpshufb %%zmm3, 344 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" 340 "vpshufb %%zmm6, 345 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" 341 "vpxorq %%zmm0, % 346 "vpxorq %%zmm0, %%zmm1, %%zmm1" 342 : 347 : 343 : ); 348 : ); 344 349 345 /* 1 = qmul[q ^ dq] */ 350 /* 1 = qmul[q ^ dq] */ 346 351 347 asm volatile("vmovdqa64 %0, %% 352 asm volatile("vmovdqa64 %0, %%zmm2\n\t" 348 "vpxorq %%zmm1, % 353 "vpxorq %%zmm1, %%zmm2, %%zmm2" 349 : 354 : 350 : "m" (p[0])); 355 : "m" (p[0])); 351 356 352 /* 2 = p ^ qmul[q ^ dq] */ 357 /* 2 = p ^ qmul[q ^ dq] */ 353 358 354 asm volatile("vmovdqa64 %%zmm1 359 asm volatile("vmovdqa64 %%zmm1, %0\n\t" 355 "vmovdqa64 %%zmm2 360 "vmovdqa64 %%zmm2, %1" 356 : 361 : 357 : "m" (dq[0]), "m 362 : "m" (dq[0]), "m" (p[0])); 358 363 359 bytes -= 64; 364 bytes -= 64; 360 p += 64; 365 p += 64; 361 q += 64; 366 q += 64; 362 dq += 64; 367 dq += 64; 363 #endif 368 #endif 364 } 369 } 365 370 366 kernel_fpu_end(); 371 kernel_fpu_end(); 367 } 372 } 368 373 369 const struct raid6_recov_calls raid6_recov_avx 374 const struct raid6_recov_calls raid6_recov_avx512 = { 370 .data2 = raid6_2data_recov_avx512, 375 .data2 = raid6_2data_recov_avx512, 371 .datap = raid6_datap_recov_avx512, 376 .datap = raid6_datap_recov_avx512, 372 .valid = raid6_has_avx512, 377 .valid = raid6_has_avx512, 373 #ifdef CONFIG_X86_64 378 #ifdef CONFIG_X86_64 374 .name = "avx512x2", 379 .name = "avx512x2", 375 #else 380 #else 376 .name = "avx512x1", 381 .name = "avx512x1", 377 #endif 382 #endif 378 .priority = 3, 383 .priority = 3, 379 }; 384 }; 380 385 381 #else 386 #else 382 #warning "your version of binutils lacks AVX51 387 #warning "your version of binutils lacks AVX512 support" 383 #endif 388 #endif 384 389
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.