1 // SPDX-License-Identifier: GPL-2.0-or-later 1 2 /* -*- linux-c -*- --------------------------- 3 * 4 * Copyright (C) 2016 Intel Corporation 5 * 6 * Author: Gayatri Kammela <gayatri.kammela@ 7 * Author: Megha Dey <megha.dey@linux.intel. 8 * 9 * Based on avx2.c: Copyright 2012 Yuanhan L 10 * Based on sse2.c: Copyright 2002 H. Peter 11 * 12 * ------------------------------------------- 13 */ 14 15 /* 16 * AVX512 implementation of RAID-6 syndrome fu 17 * 18 */ 19 20 #ifdef CONFIG_AS_AVX512 21 22 #include <linux/raid/pq.h> 23 #include "x86.h" 24 25 static const struct raid6_avx512_constants { 26 u64 x1d[8]; 27 } raid6_avx512_constants __aligned(512/8) = { 28 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1 29 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1 30 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1 31 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1 32 }; 33 34 static int raid6_have_avx512(void) 35 { 36 return boot_cpu_has(X86_FEATURE_AVX2) 37 boot_cpu_has(X86_FEATURE_AVX) 38 boot_cpu_has(X86_FEATURE_AVX51 39 boot_cpu_has(X86_FEATURE_AVX51 40 boot_cpu_has(X86_FEATURE_AVX51 41 boot_cpu_has(X86_FEATURE_AVX51 42 } 43 44 static void raid6_avx5121_gen_syndrome(int dis 45 { 46 u8 **dptr = (u8 **)ptrs; 47 u8 *p, *q; 48 int d, z, z0; 49 50 z0 = disks - 3; /* Highest dat 51 p = dptr[z0+1]; /* XOR parity 52 q = dptr[z0+2]; /* RS syndrome 53 54 kernel_fpu_begin(); 55 56 asm volatile("vmovdqa64 %0,%%zmm0\n\t" 57 "vpxorq %%zmm1,%%zmm1,%%z 58 : 59 : "m" (raid6_avx512_const 60 61 for (d = 0; d < bytes; d += 64) { 62 asm volatile("prefetchnta %0\n 63 "vmovdqa64 %0,%%z 64 "prefetchnta %1\n 65 "vmovdqa64 %%zmm2 66 "vmovdqa64 %1,%%z 67 : 68 : "m" (dptr[z0][d 69 for (z = z0-2; z >= 0; z--) { 70 asm volatile("prefetch 71 "vpcmpgtb 72 "vpmovm2b 73 "vpaddb % 74 "vpandq % 75 "vpxorq % 76 "vpxorq % 77 "vpxorq % 78 "vmovdqa6 79 : 80 : "m" (dp 81 } 82 asm volatile("vpcmpgtb %%zmm4, 83 "vpmovm2b %%k1,%% 84 "vpaddb %%zmm4,%% 85 "vpandq %%zmm0,%% 86 "vpxorq %%zmm5,%% 87 "vpxorq %%zmm6,%% 88 "vpxorq %%zmm6,%% 89 "vmovntdq %%zmm2, 90 "vpxorq %%zmm2,%% 91 "vmovntdq %%zmm4, 92 "vpxorq %%zmm4,%% 93 : 94 : "m" (p[d]), "m" 95 } 96 97 asm volatile("sfence" : : : "memory"); 98 kernel_fpu_end(); 99 } 100 101 static void raid6_avx5121_xor_syndrome(int dis 102 size_t 103 { 104 u8 **dptr = (u8 **)ptrs; 105 u8 *p, *q; 106 int d, z, z0; 107 108 z0 = stop; /* P/Q right s 109 p = dptr[disks-2]; /* XOR parity 110 q = dptr[disks-1]; /* RS syndrome 111 112 kernel_fpu_begin(); 113 114 asm volatile("vmovdqa64 %0,%%zmm0" 115 : : "m" (raid6_avx512_con 116 117 for (d = 0 ; d < bytes ; d += 64) { 118 asm volatile("vmovdqa64 %0,%%z 119 "vmovdqa64 %1,%%z 120 "vpxorq %%zmm4,%% 121 : 122 : "m" (dptr[z0][d 123 /* P/Q data pages */ 124 for (z = z0-1 ; z >= start ; z 125 asm volatile("vpxorq % 126 "vpcmpgtb 127 "vpmovm2b 128 "vpaddb % 129 "vpandq % 130 "vpxorq % 131 "vmovdqa6 132 "vpxorq % 133 "vpxorq % 134 : 135 : "m" (dp 136 } 137 /* P/Q left side optimization 138 for (z = start-1 ; z >= 0 ; z- 139 asm volatile("vpxorq % 140 "vpcmpgtb 141 "vpmovm2b 142 "vpaddb % 143 "vpandq % 144 "vpxorq % 145 : 146 : ); 147 } 148 asm volatile("vpxorq %0,%%zmm4 149 /* Don't use movntdq for r/w m 150 "vmovdqa64 %%zmm4 151 "vmovdqa64 %%zmm2 152 : 153 : "m" (q[d]), "m" 154 } 155 156 asm volatile("sfence" : : : "memory"); 157 kernel_fpu_end(); 158 } 159 160 const struct raid6_calls raid6_avx512x1 = { 161 raid6_avx5121_gen_syndrome, 162 raid6_avx5121_xor_syndrome, 163 raid6_have_avx512, 164 "avx512x1", 165 .priority = 2 /* Prefer AVX5 166 }; 167 168 /* 169 * Unrolled-by-2 AVX512 implementation 170 */ 171 static void raid6_avx5122_gen_syndrome(int dis 172 { 173 u8 **dptr = (u8 **)ptrs; 174 u8 *p, *q; 175 int d, z, z0; 176 177 z0 = disks - 3; /* Highest dat 178 p = dptr[z0+1]; /* XOR parity 179 q = dptr[z0+2]; /* RS syndrome 180 181 kernel_fpu_begin(); 182 183 asm volatile("vmovdqa64 %0,%%zmm0\n\t" 184 "vpxorq %%zmm1,%%zmm1,%%z 185 : 186 : "m" (raid6_avx512_const 187 188 /* We uniformly assume a single prefet 189 for (d = 0; d < bytes; d += 128) { 190 asm volatile("prefetchnta %0\n 191 "prefetchnta %1\n 192 "vmovdqa64 %0,%%z 193 "vmovdqa64 %1,%%z 194 "vmovdqa64 %%zmm2 195 "vmovdqa64 %%zmm3 196 : 197 : "m" (dptr[z0][d 198 for (z = z0-1; z >= 0; z--) { 199 asm volatile("prefetch 200 "prefetch 201 "vpcmpgtb 202 "vpcmpgtb 203 "vpmovm2b 204 "vpmovm2b 205 "vpaddb % 206 "vpaddb % 207 "vpandq % 208 "vpandq % 209 "vpxorq % 210 "vpxorq % 211 "vmovdqa6 212 "vmovdqa6 213 "vpxorq % 214 "vpxorq % 215 "vpxorq % 216 "vpxorq % 217 : 218 : "m" (dp 219 } 220 asm volatile("vmovntdq %%zmm2, 221 "vmovntdq %%zmm3, 222 "vmovntdq %%zmm4, 223 "vmovntdq %%zmm6, 224 : 225 : "m" (p[d]), "m" 226 "m" (q[d+64])); 227 } 228 229 asm volatile("sfence" : : : "memory"); 230 kernel_fpu_end(); 231 } 232 233 static void raid6_avx5122_xor_syndrome(int dis 234 size_t 235 { 236 u8 **dptr = (u8 **)ptrs; 237 u8 *p, *q; 238 int d, z, z0; 239 240 z0 = stop; /* P/Q right s 241 p = dptr[disks-2]; /* XOR parity 242 q = dptr[disks-1]; /* RS syndrome 243 244 kernel_fpu_begin(); 245 246 asm volatile("vmovdqa64 %0,%%zmm0" 247 : : "m" (raid6_avx512_con 248 249 for (d = 0 ; d < bytes ; d += 128) { 250 asm volatile("vmovdqa64 %0,%%z 251 "vmovdqa64 %1,%%z 252 "vmovdqa64 %2,%%z 253 "vmovdqa64 %3,%%z 254 "vpxorq %%zmm4,%% 255 "vpxorq %%zmm6,%% 256 : 257 : "m" (dptr[z0][d 258 "m" (p[d]), "m" 259 /* P/Q data pages */ 260 for (z = z0-1 ; z >= start ; z 261 asm volatile("vpxorq % 262 "vpxorq % 263 "vpcmpgtb 264 "vpcmpgtb 265 "vpmovm2b 266 "vpmovm2b 267 "vpaddb % 268 "vpaddb % 269 "vpandq % 270 "vpandq % 271 "vpxorq % 272 "vpxorq % 273 "vmovdqa6 274 "vmovdqa6 275 "vpxorq % 276 "vpxorq % 277 "vpxorq % 278 "vpxorq % 279 : 280 : "m" (dp 281 } 282 /* P/Q left side optimization 283 for (z = start-1 ; z >= 0 ; z- 284 asm volatile("vpxorq % 285 "vpxorq % 286 "vpcmpgtb 287 "vpcmpgtb 288 "vpmovm2b 289 "vpmovm2b 290 "vpaddb % 291 "vpaddb % 292 "vpandq % 293 "vpandq % 294 "vpxorq % 295 "vpxorq % 296 : 297 : ); 298 } 299 asm volatile("vpxorq %0,%%zmm4 300 "vpxorq %1,%%zmm6 301 /* Don't use movn 302 * memory area < 303 */ 304 "vmovdqa64 %%zmm4 305 "vmovdqa64 %%zmm6 306 "vmovdqa64 %%zmm2 307 "vmovdqa64 %%zmm3 308 : 309 : "m" (q[d]), "m" 310 "m" (p[d+64])); 311 } 312 313 asm volatile("sfence" : : : "memory"); 314 kernel_fpu_end(); 315 } 316 317 const struct raid6_calls raid6_avx512x2 = { 318 raid6_avx5122_gen_syndrome, 319 raid6_avx5122_xor_syndrome, 320 raid6_have_avx512, 321 "avx512x2", 322 .priority = 2 /* Prefer AVX5 323 }; 324 325 #ifdef CONFIG_X86_64 326 327 /* 328 * Unrolled-by-4 AVX2 implementation 329 */ 330 static void raid6_avx5124_gen_syndrome(int dis 331 { 332 u8 **dptr = (u8 **)ptrs; 333 u8 *p, *q; 334 int d, z, z0; 335 336 z0 = disks - 3; /* Highest dat 337 p = dptr[z0+1]; /* XOR parity 338 q = dptr[z0+2]; /* RS syndrome 339 340 kernel_fpu_begin(); 341 342 asm volatile("vmovdqa64 %0,%%zmm0\n\t" 343 "vpxorq %%zmm1,%%zmm1,%%z 344 "vpxorq %%zmm2,%%zmm2,%%z 345 "vpxorq %%zmm3,%%zmm3,%%z 346 "vpxorq %%zmm4,%%zmm4,%%z 347 "vpxorq %%zmm6,%%zmm6,%%z 348 "vpxorq %%zmm10,%%zmm10,% 349 "vpxorq %%zmm11,%%zmm11,% 350 "vpxorq %%zmm12,%%zmm12,% 351 "vpxorq %%zmm14,%%zmm14,% 352 : 353 : "m" (raid6_avx512_const 354 355 for (d = 0; d < bytes; d += 256) { 356 for (z = z0; z >= 0; z--) { 357 asm volatile("prefetchnta %0\n 358 "prefetchnta %1\n 359 "prefetchnta %2\n 360 "prefetchnta %3\n 361 "vpcmpgtb %%zmm4, 362 "vpcmpgtb %%zmm6, 363 "vpcmpgtb %%zmm12 364 "vpcmpgtb %%zmm14 365 "vpmovm2b %%k1,%% 366 "vpmovm2b %%k2,%% 367 "vpmovm2b %%k3,%% 368 "vpmovm2b %%k4,%% 369 "vpaddb %%zmm4,%% 370 "vpaddb %%zmm6,%% 371 "vpaddb %%zmm12,% 372 "vpaddb %%zmm14,% 373 "vpandq %%zmm0,%% 374 "vpandq %%zmm0,%% 375 "vpandq %%zmm0,%% 376 "vpandq %%zmm0,%% 377 "vpxorq %%zmm5,%% 378 "vpxorq %%zmm7,%% 379 "vpxorq %%zmm13,% 380 "vpxorq %%zmm15,% 381 "vmovdqa64 %0,%%z 382 "vmovdqa64 %1,%%z 383 "vmovdqa64 %2,%%z 384 "vmovdqa64 %3,%%z 385 "vpxorq %%zmm5,%% 386 "vpxorq %%zmm7,%% 387 "vpxorq %%zmm13,% 388 "vpxorq %%zmm15,% 389 "vpxorq %%zmm5,%% 390 "vpxorq %%zmm7,%% 391 "vpxorq %%zmm13,% 392 "vpxorq %%zmm15,% 393 : 394 : "m" (dptr[z][d] 395 "m" (dptr[z][d+ 396 } 397 asm volatile("vmovntdq %%zmm2, 398 "vpxorq %%zmm2,%% 399 "vmovntdq %%zmm3, 400 "vpxorq %%zmm3,%% 401 "vmovntdq %%zmm10 402 "vpxorq %%zmm10,% 403 "vmovntdq %%zmm11 404 "vpxorq %%zmm11,% 405 "vmovntdq %%zmm4, 406 "vpxorq %%zmm4,%% 407 "vmovntdq %%zmm6, 408 "vpxorq %%zmm6,%% 409 "vmovntdq %%zmm12 410 "vpxorq %%zmm12,% 411 "vmovntdq %%zmm14 412 "vpxorq %%zmm14,% 413 : 414 : "m" (p[d]), "m" 415 "m" (p[d+192]), 416 "m" (q[d+128]), 417 } 418 419 asm volatile("sfence" : : : "memory"); 420 kernel_fpu_end(); 421 } 422 423 static void raid6_avx5124_xor_syndrome(int dis 424 size_t 425 { 426 u8 **dptr = (u8 **)ptrs; 427 u8 *p, *q; 428 int d, z, z0; 429 430 z0 = stop; /* P/Q right s 431 p = dptr[disks-2]; /* XOR parity 432 q = dptr[disks-1]; /* RS syndrome 433 434 kernel_fpu_begin(); 435 436 asm volatile("vmovdqa64 %0,%%zmm0" 437 :: "m" (raid6_avx512_cons 438 439 for (d = 0 ; d < bytes ; d += 256) { 440 asm volatile("vmovdqa64 %0,%%z 441 "vmovdqa64 %1,%%z 442 "vmovdqa64 %2,%%z 443 "vmovdqa64 %3,%%z 444 "vmovdqa64 %4,%%z 445 "vmovdqa64 %5,%%z 446 "vmovdqa64 %6,%%z 447 "vmovdqa64 %7,%%z 448 "vpxorq %%zmm4,%% 449 "vpxorq %%zmm6,%% 450 "vpxorq %%zmm12,% 451 "vpxorq %%zmm14,% 452 : 453 : "m" (dptr[z0][d 454 "m" (dptr[z0][d 455 "m" (p[d]), "m" 456 "m" (p[d+192])) 457 /* P/Q data pages */ 458 for (z = z0-1 ; z >= start ; z 459 asm volatile("vpxorq % 460 "vpxorq % 461 "vpxorq % 462 "vpxorq % 463 "prefetch 464 "prefetch 465 "vpcmpgtb 466 "vpcmpgtb 467 "vpcmpgtb 468 "vpcmpgtb 469 "vpmovm2b 470 "vpmovm2b 471 "vpmovm2b 472 "vpmovm2b 473 "vpaddb % 474 "vpaddb % 475 "vpaddb % 476 "vpaddb % 477 "vpandq % 478 "vpandq % 479 "vpandq % 480 "vpandq % 481 "vpxorq % 482 "vpxorq % 483 "vpxorq % 484 "vpxorq % 485 "vmovdqa6 486 "vmovdqa6 487 "vmovdqa6 488 "vmovdqa6 489 "vpxorq % 490 "vpxorq % 491 "vpxorq % 492 "vpxorq % 493 "vpxorq % 494 "vpxorq % 495 "vpxorq % 496 "vpxorq % 497 : 498 : "m" (dp 499 "m" (dp 500 "m" (dp 501 } 502 asm volatile("prefetchnta %0\n 503 "prefetchnta %1\n 504 : 505 : "m" (q[d]), "m" 506 /* P/Q left side optimization 507 for (z = start-1 ; z >= 0 ; z- 508 asm volatile("vpxorq % 509 "vpxorq % 510 "vpxorq % 511 "vpxorq % 512 "vpcmpgtb 513 "vpcmpgtb 514 "vpcmpgtb 515 "vpcmpgtb 516 "vpmovm2b 517 "vpmovm2b 518 "vpmovm2b 519 "vpmovm2b 520 "vpaddb % 521 "vpaddb % 522 "vpaddb % 523 "vpaddb % 524 "vpandq % 525 "vpandq % 526 "vpandq % 527 "vpandq % 528 "vpxorq % 529 "vpxorq % 530 "vpxorq % 531 "vpxorq % 532 : 533 : ); 534 } 535 asm volatile("vmovntdq %%zmm2, 536 "vmovntdq %%zmm3, 537 "vmovntdq %%zmm10 538 "vmovntdq %%zmm11 539 "vpxorq %4,%%zmm4 540 "vpxorq %5,%%zmm6 541 "vpxorq %6,%%zmm1 542 "vpxorq %7,%%zmm1 543 "vmovntdq %%zmm4, 544 "vmovntdq %%zmm6, 545 "vmovntdq %%zmm12 546 "vmovntdq %%zmm14 547 : 548 : "m" (p[d]), "m 549 "m" (p[d+192]), 550 "m" (q[d+128]), 551 } 552 asm volatile("sfence" : : : "memory"); 553 kernel_fpu_end(); 554 } 555 const struct raid6_calls raid6_avx512x4 = { 556 raid6_avx5124_gen_syndrome, 557 raid6_avx5124_xor_syndrome, 558 raid6_have_avx512, 559 "avx512x4", 560 .priority = 2 /* Prefer AVX5 561 }; 562 #endif 563 564 #endif /* CONFIG_AS_AVX512 */ 565
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.