1 // SPDX-License-Identifier: GPL-2.0-or-later 1 2 /* -*- linux-c -*- --------------------------- 3 * 4 * Copyright 2002 H. Peter Anvin - All Right 5 * 6 * ------------------------------------------- 7 8 /* 9 * raid6/sse2.c 10 * 11 * SSE-2 implementation of RAID-6 syndrome fun 12 * 13 */ 14 15 #include <linux/raid/pq.h> 16 #include "x86.h" 17 18 static const struct raid6_sse_constants { 19 u64 x1d[2]; 20 } raid6_sse_constants __attribute__((aligned( 21 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1 22 }; 23 24 static int raid6_have_sse2(void) 25 { 26 /* Not really boot_cpu but "all_cpus" 27 return boot_cpu_has(X86_FEATURE_MMX) & 28 boot_cpu_has(X86_FEATURE_FXSR) 29 boot_cpu_has(X86_FEATURE_XMM) 30 boot_cpu_has(X86_FEATURE_XMM2) 31 } 32 33 /* 34 * Plain SSE2 implementation 35 */ 36 static void raid6_sse21_gen_syndrome(int disks 37 { 38 u8 **dptr = (u8 **)ptrs; 39 u8 *p, *q; 40 int d, z, z0; 41 42 z0 = disks - 3; /* Highest dat 43 p = dptr[z0+1]; /* XOR parity 44 q = dptr[z0+2]; /* RS syndrome 45 46 kernel_fpu_begin(); 47 48 asm volatile("movdqa %0,%%xmm0" : : "m 49 asm volatile("pxor %xmm5,%xmm5"); 50 51 for ( d = 0 ; d < bytes ; d += 16 ) { 52 asm volatile("prefetchnta %0" 53 asm volatile("movdqa %0,%%xmm2 54 asm volatile("prefetchnta %0" 55 asm volatile("movdqa %xmm2,%xm 56 asm volatile("movdqa %0,%%xmm6 57 for ( z = z0-2 ; z >= 0 ; z-- 58 asm volatile("prefetch 59 asm volatile("pcmpgtb 60 asm volatile("paddb %x 61 asm volatile("pand %xm 62 asm volatile("pxor %xm 63 asm volatile("pxor %xm 64 asm volatile("pxor %xm 65 asm volatile("pxor %xm 66 asm volatile("movdqa % 67 } 68 asm volatile("pcmpgtb %xmm4,%x 69 asm volatile("paddb %xmm4,%xmm 70 asm volatile("pand %xmm0,%xmm5 71 asm volatile("pxor %xmm5,%xmm4 72 asm volatile("pxor %xmm5,%xmm5 73 asm volatile("pxor %xmm6,%xmm2 74 asm volatile("pxor %xmm6,%xmm4 75 76 asm volatile("movntdq %%xmm2,% 77 asm volatile("pxor %xmm2,%xmm2 78 asm volatile("movntdq %%xmm4,% 79 asm volatile("pxor %xmm4,%xmm4 80 } 81 82 asm volatile("sfence" : : : "memory"); 83 kernel_fpu_end(); 84 } 85 86 87 static void raid6_sse21_xor_syndrome(int disks 88 size_t by 89 { 90 u8 **dptr = (u8 **)ptrs; 91 u8 *p, *q; 92 int d, z, z0; 93 94 z0 = stop; /* P/Q right s 95 p = dptr[disks-2]; /* XOR parity 96 q = dptr[disks-1]; /* RS syndrome 97 98 kernel_fpu_begin(); 99 100 asm volatile("movdqa %0,%%xmm0" : : "m 101 102 for ( d = 0 ; d < bytes ; d += 16 ) { 103 asm volatile("movdqa %0,%%xmm4 104 asm volatile("movdqa %0,%%xmm2 105 asm volatile("pxor %xmm4,%xmm2 106 /* P/Q data pages */ 107 for ( z = z0-1 ; z >= start ; 108 asm volatile("pxor %xm 109 asm volatile("pcmpgtb 110 asm volatile("paddb %x 111 asm volatile("pand %xm 112 asm volatile("pxor %xm 113 asm volatile("movdqa % 114 asm volatile("pxor %xm 115 asm volatile("pxor %xm 116 } 117 /* P/Q left side optimization 118 for ( z = start-1 ; z >= 0 ; z 119 asm volatile("pxor %xm 120 asm volatile("pcmpgtb 121 asm volatile("paddb %x 122 asm volatile("pand %xm 123 asm volatile("pxor %xm 124 } 125 asm volatile("pxor %0,%%xmm4" 126 /* Don't use movntdq for r/w m 127 asm volatile("movdqa %%xmm4,%0 128 asm volatile("movdqa %%xmm2,%0 129 } 130 131 asm volatile("sfence" : : : "memory"); 132 kernel_fpu_end(); 133 } 134 135 const struct raid6_calls raid6_sse2x1 = { 136 raid6_sse21_gen_syndrome, 137 raid6_sse21_xor_syndrome, 138 raid6_have_sse2, 139 "sse2x1", 140 1 /* Has cache h 141 }; 142 143 /* 144 * Unrolled-by-2 SSE2 implementation 145 */ 146 static void raid6_sse22_gen_syndrome(int disks 147 { 148 u8 **dptr = (u8 **)ptrs; 149 u8 *p, *q; 150 int d, z, z0; 151 152 z0 = disks - 3; /* Highest dat 153 p = dptr[z0+1]; /* XOR parity 154 q = dptr[z0+2]; /* RS syndrome 155 156 kernel_fpu_begin(); 157 158 asm volatile("movdqa %0,%%xmm0" : : "m 159 asm volatile("pxor %xmm5,%xmm5"); /* Z 160 asm volatile("pxor %xmm7,%xmm7"); /* Z 161 162 /* We uniformly assume a single prefet 163 for ( d = 0 ; d < bytes ; d += 32 ) { 164 asm volatile("prefetchnta %0" 165 asm volatile("movdqa %0,%%xmm2 166 asm volatile("movdqa %0,%%xmm3 167 asm volatile("movdqa %xmm2,%xm 168 asm volatile("movdqa %xmm3,%xm 169 for ( z = z0-1 ; z >= 0 ; z-- 170 asm volatile("prefetch 171 asm volatile("pcmpgtb 172 asm volatile("pcmpgtb 173 asm volatile("paddb %x 174 asm volatile("paddb %x 175 asm volatile("pand %xm 176 asm volatile("pand %xm 177 asm volatile("pxor %xm 178 asm volatile("pxor %xm 179 asm volatile("movdqa % 180 asm volatile("movdqa % 181 asm volatile("pxor %xm 182 asm volatile("pxor %xm 183 asm volatile("pxor %xm 184 asm volatile("pxor %xm 185 asm volatile("pxor %xm 186 asm volatile("pxor %xm 187 } 188 asm volatile("movntdq %%xmm2,% 189 asm volatile("movntdq %%xmm3,% 190 asm volatile("movntdq %%xmm4,% 191 asm volatile("movntdq %%xmm6,% 192 } 193 194 asm volatile("sfence" : : : "memory"); 195 kernel_fpu_end(); 196 } 197 198 static void raid6_sse22_xor_syndrome(int disks 199 size_t by 200 { 201 u8 **dptr = (u8 **)ptrs; 202 u8 *p, *q; 203 int d, z, z0; 204 205 z0 = stop; /* P/Q right s 206 p = dptr[disks-2]; /* XOR parity 207 q = dptr[disks-1]; /* RS syndrome 208 209 kernel_fpu_begin(); 210 211 asm volatile("movdqa %0,%%xmm0" : : "m 212 213 for ( d = 0 ; d < bytes ; d += 32 ) { 214 asm volatile("movdqa %0,%%xmm4 215 asm volatile("movdqa %0,%%xmm6 216 asm volatile("movdqa %0,%%xmm2 217 asm volatile("movdqa %0,%%xmm3 218 asm volatile("pxor %xmm4,%xmm2 219 asm volatile("pxor %xmm6,%xmm3 220 /* P/Q data pages */ 221 for ( z = z0-1 ; z >= start ; 222 asm volatile("pxor %xm 223 asm volatile("pxor %xm 224 asm volatile("pcmpgtb 225 asm volatile("pcmpgtb 226 asm volatile("paddb %x 227 asm volatile("paddb %x 228 asm volatile("pand %xm 229 asm volatile("pand %xm 230 asm volatile("pxor %xm 231 asm volatile("pxor %xm 232 asm volatile("movdqa % 233 asm volatile("movdqa % 234 asm volatile("pxor %xm 235 asm volatile("pxor %xm 236 asm volatile("pxor %xm 237 asm volatile("pxor %xm 238 } 239 /* P/Q left side optimization 240 for ( z = start-1 ; z >= 0 ; z 241 asm volatile("pxor %xm 242 asm volatile("pxor %xm 243 asm volatile("pcmpgtb 244 asm volatile("pcmpgtb 245 asm volatile("paddb %x 246 asm volatile("paddb %x 247 asm volatile("pand %xm 248 asm volatile("pand %xm 249 asm volatile("pxor %xm 250 asm volatile("pxor %xm 251 } 252 asm volatile("pxor %0,%%xmm4" 253 asm volatile("pxor %0,%%xmm6" 254 /* Don't use movntdq for r/w m 255 asm volatile("movdqa %%xmm4,%0 256 asm volatile("movdqa %%xmm6,%0 257 asm volatile("movdqa %%xmm2,%0 258 asm volatile("movdqa %%xmm3,%0 259 } 260 261 asm volatile("sfence" : : : "memory"); 262 kernel_fpu_end(); 263 } 264 265 const struct raid6_calls raid6_sse2x2 = { 266 raid6_sse22_gen_syndrome, 267 raid6_sse22_xor_syndrome, 268 raid6_have_sse2, 269 "sse2x2", 270 1 /* Has cache h 271 }; 272 273 #ifdef CONFIG_X86_64 274 275 /* 276 * Unrolled-by-4 SSE2 implementation 277 */ 278 static void raid6_sse24_gen_syndrome(int disks 279 { 280 u8 **dptr = (u8 **)ptrs; 281 u8 *p, *q; 282 int d, z, z0; 283 284 z0 = disks - 3; /* Highest dat 285 p = dptr[z0+1]; /* XOR parity 286 q = dptr[z0+2]; /* RS syndrome 287 288 kernel_fpu_begin(); 289 290 asm volatile("movdqa %0,%%xmm0" :: "m" 291 asm volatile("pxor %xmm2,%xmm2"); 292 asm volatile("pxor %xmm3,%xmm3"); 293 asm volatile("pxor %xmm4,%xmm4"); 294 asm volatile("pxor %xmm5,%xmm5"); 295 asm volatile("pxor %xmm6,%xmm6"); 296 asm volatile("pxor %xmm7,%xmm7"); 297 asm volatile("pxor %xmm10,%xmm10"); 298 asm volatile("pxor %xmm11,%xmm11"); 299 asm volatile("pxor %xmm12,%xmm12"); 300 asm volatile("pxor %xmm13,%xmm13"); 301 asm volatile("pxor %xmm14,%xmm14"); 302 asm volatile("pxor %xmm15,%xmm15"); 303 304 for ( d = 0 ; d < bytes ; d += 64 ) { 305 for ( z = z0 ; z >= 0 ; z-- ) 306 /* The second prefetch 307 asm volatile("prefetch 308 asm volatile("prefetch 309 asm volatile("pcmpgtb 310 asm volatile("pcmpgtb 311 asm volatile("pcmpgtb 312 asm volatile("pcmpgtb 313 asm volatile("paddb %x 314 asm volatile("paddb %x 315 asm volatile("paddb %x 316 asm volatile("paddb %x 317 asm volatile("pand %xm 318 asm volatile("pand %xm 319 asm volatile("pand %xm 320 asm volatile("pand %xm 321 asm volatile("pxor %xm 322 asm volatile("pxor %xm 323 asm volatile("pxor %xm 324 asm volatile("pxor %xm 325 asm volatile("movdqa % 326 asm volatile("movdqa % 327 asm volatile("movdqa % 328 asm volatile("movdqa % 329 asm volatile("pxor %xm 330 asm volatile("pxor %xm 331 asm volatile("pxor %xm 332 asm volatile("pxor %xm 333 asm volatile("pxor %xm 334 asm volatile("pxor %xm 335 asm volatile("pxor %xm 336 asm volatile("pxor %xm 337 asm volatile("pxor %xm 338 asm volatile("pxor %xm 339 asm volatile("pxor %xm 340 asm volatile("pxor %xm 341 } 342 asm volatile("movntdq %%xmm2,% 343 asm volatile("pxor %xmm2,%xmm2 344 asm volatile("movntdq %%xmm3,% 345 asm volatile("pxor %xmm3,%xmm3 346 asm volatile("movntdq %%xmm10, 347 asm volatile("pxor %xmm10,%xmm 348 asm volatile("movntdq %%xmm11, 349 asm volatile("pxor %xmm11,%xmm 350 asm volatile("movntdq %%xmm4,% 351 asm volatile("pxor %xmm4,%xmm4 352 asm volatile("movntdq %%xmm6,% 353 asm volatile("pxor %xmm6,%xmm6 354 asm volatile("movntdq %%xmm12, 355 asm volatile("pxor %xmm12,%xmm 356 asm volatile("movntdq %%xmm14, 357 asm volatile("pxor %xmm14,%xmm 358 } 359 360 asm volatile("sfence" : : : "memory"); 361 kernel_fpu_end(); 362 } 363 364 static void raid6_sse24_xor_syndrome(int disks 365 size_t by 366 { 367 u8 **dptr = (u8 **)ptrs; 368 u8 *p, *q; 369 int d, z, z0; 370 371 z0 = stop; /* P/Q right s 372 p = dptr[disks-2]; /* XOR parity 373 q = dptr[disks-1]; /* RS syndrome 374 375 kernel_fpu_begin(); 376 377 asm volatile("movdqa %0,%%xmm0" :: "m" 378 379 for ( d = 0 ; d < bytes ; d += 64 ) { 380 asm volatile("movdqa %0,%%xmm4 381 asm volatile("movdqa %0,%%xmm6 382 asm volatile("movdqa %0,%%xmm1 383 asm volatile("movdqa %0,%%xmm1 384 asm volatile("movdqa %0,%%xmm2 385 asm volatile("movdqa %0,%%xmm3 386 asm volatile("movdqa %0,%%xmm1 387 asm volatile("movdqa %0,%%xmm1 388 asm volatile("pxor %xmm4,%xmm2 389 asm volatile("pxor %xmm6,%xmm3 390 asm volatile("pxor %xmm12,%xmm 391 asm volatile("pxor %xmm14,%xmm 392 /* P/Q data pages */ 393 for ( z = z0-1 ; z >= start ; 394 asm volatile("prefetch 395 asm volatile("prefetch 396 asm volatile("pxor %xm 397 asm volatile("pxor %xm 398 asm volatile("pxor %xm 399 asm volatile("pxor %xm 400 asm volatile("pcmpgtb 401 asm volatile("pcmpgtb 402 asm volatile("pcmpgtb 403 asm volatile("pcmpgtb 404 asm volatile("paddb %x 405 asm volatile("paddb %x 406 asm volatile("paddb %x 407 asm volatile("paddb %x 408 asm volatile("pand %xm 409 asm volatile("pand %xm 410 asm volatile("pand %xm 411 asm volatile("pand %xm 412 asm volatile("pxor %xm 413 asm volatile("pxor %xm 414 asm volatile("pxor %xm 415 asm volatile("pxor %xm 416 asm volatile("movdqa % 417 asm volatile("movdqa % 418 asm volatile("movdqa % 419 asm volatile("movdqa % 420 asm volatile("pxor %xm 421 asm volatile("pxor %xm 422 asm volatile("pxor %xm 423 asm volatile("pxor %xm 424 asm volatile("pxor %xm 425 asm volatile("pxor %xm 426 asm volatile("pxor %xm 427 asm volatile("pxor %xm 428 } 429 asm volatile("prefetchnta %0" 430 asm volatile("prefetchnta %0" 431 /* P/Q left side optimization 432 for ( z = start-1 ; z >= 0 ; z 433 asm volatile("pxor %xm 434 asm volatile("pxor %xm 435 asm volatile("pxor %xm 436 asm volatile("pxor %xm 437 asm volatile("pcmpgtb 438 asm volatile("pcmpgtb 439 asm volatile("pcmpgtb 440 asm volatile("pcmpgtb 441 asm volatile("paddb %x 442 asm volatile("paddb %x 443 asm volatile("paddb %x 444 asm volatile("paddb %x 445 asm volatile("pand %xm 446 asm volatile("pand %xm 447 asm volatile("pand %xm 448 asm volatile("pand %xm 449 asm volatile("pxor %xm 450 asm volatile("pxor %xm 451 asm volatile("pxor %xm 452 asm volatile("pxor %xm 453 } 454 asm volatile("movntdq %%xmm2,% 455 asm volatile("movntdq %%xmm3,% 456 asm volatile("movntdq %%xmm10, 457 asm volatile("movntdq %%xmm11, 458 asm volatile("pxor %0,%%xmm4" 459 asm volatile("pxor %0,%%xmm6" 460 asm volatile("pxor %0,%%xmm12" 461 asm volatile("pxor %0,%%xmm14" 462 asm volatile("movntdq %%xmm4,% 463 asm volatile("movntdq %%xmm6,% 464 asm volatile("movntdq %%xmm12, 465 asm volatile("movntdq %%xmm14, 466 } 467 asm volatile("sfence" : : : "memory"); 468 kernel_fpu_end(); 469 } 470 471 472 const struct raid6_calls raid6_sse2x4 = { 473 raid6_sse24_gen_syndrome, 474 raid6_sse24_xor_syndrome, 475 raid6_have_sse2, 476 "sse2x4", 477 1 /* Has cache h 478 }; 479 480 #endif /* CONFIG_X86_64 */ 481
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.