~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/include/asm/xor_avx.h

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0-only */
  2 #ifndef _ASM_X86_XOR_AVX_H
  3 #define _ASM_X86_XOR_AVX_H
  4 
  5 /*
  6  * Optimized RAID-5 checksumming functions for AVX
  7  *
  8  * Copyright (C) 2012 Intel Corporation
  9  * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
 10  *
 11  * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
 12  */
 13 
 14 #include <linux/compiler.h>
 15 #include <asm/fpu/api.h>
 16 
 17 #define BLOCK4(i) \
 18                 BLOCK(32 * i, 0) \
 19                 BLOCK(32 * (i + 1), 1) \
 20                 BLOCK(32 * (i + 2), 2) \
 21                 BLOCK(32 * (i + 3), 3)
 22 
 23 #define BLOCK16() \
 24                 BLOCK4(0) \
 25                 BLOCK4(4) \
 26                 BLOCK4(8) \
 27                 BLOCK4(12)
 28 
 29 static void xor_avx_2(unsigned long bytes, unsigned long * __restrict p0,
 30                       const unsigned long * __restrict p1)
 31 {
 32         unsigned long lines = bytes >> 9;
 33 
 34         kernel_fpu_begin();
 35 
 36         while (lines--) {
 37 #undef BLOCK
 38 #define BLOCK(i, reg) \
 39 do { \
 40         asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \
 41         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm"  #reg : : \
 42                 "m" (p0[i / sizeof(*p0)])); \
 43         asm volatile("vmovdqa %%ymm" #reg ", %0" : \
 44                 "=m" (p0[i / sizeof(*p0)])); \
 45 } while (0);
 46 
 47                 BLOCK16()
 48 
 49                 p0 = (unsigned long *)((uintptr_t)p0 + 512);
 50                 p1 = (unsigned long *)((uintptr_t)p1 + 512);
 51         }
 52 
 53         kernel_fpu_end();
 54 }
 55 
 56 static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0,
 57                       const unsigned long * __restrict p1,
 58                       const unsigned long * __restrict p2)
 59 {
 60         unsigned long lines = bytes >> 9;
 61 
 62         kernel_fpu_begin();
 63 
 64         while (lines--) {
 65 #undef BLOCK
 66 #define BLOCK(i, reg) \
 67 do { \
 68         asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \
 69         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 70                 "m" (p1[i / sizeof(*p1)])); \
 71         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 72                 "m" (p0[i / sizeof(*p0)])); \
 73         asm volatile("vmovdqa %%ymm" #reg ", %0" : \
 74                 "=m" (p0[i / sizeof(*p0)])); \
 75 } while (0);
 76 
 77                 BLOCK16()
 78 
 79                 p0 = (unsigned long *)((uintptr_t)p0 + 512);
 80                 p1 = (unsigned long *)((uintptr_t)p1 + 512);
 81                 p2 = (unsigned long *)((uintptr_t)p2 + 512);
 82         }
 83 
 84         kernel_fpu_end();
 85 }
 86 
 87 static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0,
 88                       const unsigned long * __restrict p1,
 89                       const unsigned long * __restrict p2,
 90                       const unsigned long * __restrict p3)
 91 {
 92         unsigned long lines = bytes >> 9;
 93 
 94         kernel_fpu_begin();
 95 
 96         while (lines--) {
 97 #undef BLOCK
 98 #define BLOCK(i, reg) \
 99 do { \
100         asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \
101         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
102                 "m" (p2[i / sizeof(*p2)])); \
103         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
104                 "m" (p1[i / sizeof(*p1)])); \
105         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
106                 "m" (p0[i / sizeof(*p0)])); \
107         asm volatile("vmovdqa %%ymm" #reg ", %0" : \
108                 "=m" (p0[i / sizeof(*p0)])); \
109 } while (0);
110 
111                 BLOCK16();
112 
113                 p0 = (unsigned long *)((uintptr_t)p0 + 512);
114                 p1 = (unsigned long *)((uintptr_t)p1 + 512);
115                 p2 = (unsigned long *)((uintptr_t)p2 + 512);
116                 p3 = (unsigned long *)((uintptr_t)p3 + 512);
117         }
118 
119         kernel_fpu_end();
120 }
121 
122 static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0,
123              const unsigned long * __restrict p1,
124              const unsigned long * __restrict p2,
125              const unsigned long * __restrict p3,
126              const unsigned long * __restrict p4)
127 {
128         unsigned long lines = bytes >> 9;
129 
130         kernel_fpu_begin();
131 
132         while (lines--) {
133 #undef BLOCK
134 #define BLOCK(i, reg) \
135 do { \
136         asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \
137         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
138                 "m" (p3[i / sizeof(*p3)])); \
139         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
140                 "m" (p2[i / sizeof(*p2)])); \
141         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
142                 "m" (p1[i / sizeof(*p1)])); \
143         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
144                 "m" (p0[i / sizeof(*p0)])); \
145         asm volatile("vmovdqa %%ymm" #reg ", %0" : \
146                 "=m" (p0[i / sizeof(*p0)])); \
147 } while (0);
148 
149                 BLOCK16()
150 
151                 p0 = (unsigned long *)((uintptr_t)p0 + 512);
152                 p1 = (unsigned long *)((uintptr_t)p1 + 512);
153                 p2 = (unsigned long *)((uintptr_t)p2 + 512);
154                 p3 = (unsigned long *)((uintptr_t)p3 + 512);
155                 p4 = (unsigned long *)((uintptr_t)p4 + 512);
156         }
157 
158         kernel_fpu_end();
159 }
160 
161 static struct xor_block_template xor_block_avx = {
162         .name = "avx",
163         .do_2 = xor_avx_2,
164         .do_3 = xor_avx_3,
165         .do_4 = xor_avx_4,
166         .do_5 = xor_avx_5,
167 };
168 
169 #define AVX_XOR_SPEED \
170 do { \
171         if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \
172                 xor_speed(&xor_block_avx); \
173 } while (0)
174 
175 #define AVX_SELECT(FASTEST) \
176         (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)
177 
178 #endif
179 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php