~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/lib/raid6/sse2.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /lib/raid6/sse2.c (Version linux-6.12-rc7) and /lib/raid6/sse2.c (Version linux-4.14.336)


  1 // SPDX-License-Identifier: GPL-2.0-or-later   << 
  2 /* -*- linux-c -*- ---------------------------      1 /* -*- linux-c -*- ------------------------------------------------------- *
  3  *                                                  2  *
  4  *   Copyright 2002 H. Peter Anvin - All Right      3  *   Copyright 2002 H. Peter Anvin - All Rights Reserved
  5  *                                                  4  *
                                                   >>   5  *   This program is free software; you can redistribute it and/or modify
                                                   >>   6  *   it under the terms of the GNU General Public License as published by
                                                   >>   7  *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
                                                   >>   8  *   Boston MA 02111-1307, USA; either version 2 of the License, or
                                                   >>   9  *   (at your option) any later version; incorporated herein by reference.
                                                   >>  10  *
  6  * -------------------------------------------     11  * ----------------------------------------------------------------------- */
  7                                                    12 
  8 /*                                                 13 /*
  9  * raid6/sse2.c                                    14  * raid6/sse2.c
 10  *                                                 15  *
 11  * SSE-2 implementation of RAID-6 syndrome fun     16  * SSE-2 implementation of RAID-6 syndrome functions
 12  *                                                 17  *
 13  */                                                18  */
 14                                                    19 
 15 #include <linux/raid/pq.h>                         20 #include <linux/raid/pq.h>
 16 #include "x86.h"                                   21 #include "x86.h"
 17                                                    22 
 18 static const struct raid6_sse_constants {          23 static const struct raid6_sse_constants {
 19         u64 x1d[2];                                24         u64 x1d[2];
 20 } raid6_sse_constants  __attribute__((aligned(     25 } raid6_sse_constants  __attribute__((aligned(16))) = {
 21         { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1     26         { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
 22 };                                                 27 };
 23                                                    28 
 24 static int raid6_have_sse2(void)                   29 static int raid6_have_sse2(void)
 25 {                                                  30 {
 26         /* Not really boot_cpu but "all_cpus"      31         /* Not really boot_cpu but "all_cpus" */
 27         return boot_cpu_has(X86_FEATURE_MMX) &     32         return boot_cpu_has(X86_FEATURE_MMX) &&
 28                 boot_cpu_has(X86_FEATURE_FXSR)     33                 boot_cpu_has(X86_FEATURE_FXSR) &&
 29                 boot_cpu_has(X86_FEATURE_XMM)      34                 boot_cpu_has(X86_FEATURE_XMM) &&
 30                 boot_cpu_has(X86_FEATURE_XMM2)     35                 boot_cpu_has(X86_FEATURE_XMM2);
 31 }                                                  36 }
 32                                                    37 
 33 /*                                                 38 /*
 34  * Plain SSE2 implementation                       39  * Plain SSE2 implementation
 35  */                                                40  */
 36 static void raid6_sse21_gen_syndrome(int disks     41 static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
 37 {                                                  42 {
 38         u8 **dptr = (u8 **)ptrs;                   43         u8 **dptr = (u8 **)ptrs;
 39         u8 *p, *q;                                 44         u8 *p, *q;
 40         int d, z, z0;                              45         int d, z, z0;
 41                                                    46 
 42         z0 = disks - 3;         /* Highest dat     47         z0 = disks - 3;         /* Highest data disk */
 43         p = dptr[z0+1];         /* XOR parity      48         p = dptr[z0+1];         /* XOR parity */
 44         q = dptr[z0+2];         /* RS syndrome     49         q = dptr[z0+2];         /* RS syndrome */
 45                                                    50 
 46         kernel_fpu_begin();                        51         kernel_fpu_begin();
 47                                                    52 
 48         asm volatile("movdqa %0,%%xmm0" : : "m     53         asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
 49         asm volatile("pxor %xmm5,%xmm5");          54         asm volatile("pxor %xmm5,%xmm5");       /* Zero temp */
 50                                                    55 
 51         for ( d = 0 ; d < bytes ; d += 16 ) {      56         for ( d = 0 ; d < bytes ; d += 16 ) {
 52                 asm volatile("prefetchnta %0"      57                 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
 53                 asm volatile("movdqa %0,%%xmm2     58                 asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
 54                 asm volatile("prefetchnta %0"      59                 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
 55                 asm volatile("movdqa %xmm2,%xm     60                 asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
 56                 asm volatile("movdqa %0,%%xmm6     61                 asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
 57                 for ( z = z0-2 ; z >= 0 ; z--      62                 for ( z = z0-2 ; z >= 0 ; z-- ) {
 58                         asm volatile("prefetch     63                         asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
 59                         asm volatile("pcmpgtb      64                         asm volatile("pcmpgtb %xmm4,%xmm5");
 60                         asm volatile("paddb %x     65                         asm volatile("paddb %xmm4,%xmm4");
 61                         asm volatile("pand %xm     66                         asm volatile("pand %xmm0,%xmm5");
 62                         asm volatile("pxor %xm     67                         asm volatile("pxor %xmm5,%xmm4");
 63                         asm volatile("pxor %xm     68                         asm volatile("pxor %xmm5,%xmm5");
 64                         asm volatile("pxor %xm     69                         asm volatile("pxor %xmm6,%xmm2");
 65                         asm volatile("pxor %xm     70                         asm volatile("pxor %xmm6,%xmm4");
 66                         asm volatile("movdqa %     71                         asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
 67                 }                                  72                 }
 68                 asm volatile("pcmpgtb %xmm4,%x     73                 asm volatile("pcmpgtb %xmm4,%xmm5");
 69                 asm volatile("paddb %xmm4,%xmm     74                 asm volatile("paddb %xmm4,%xmm4");
 70                 asm volatile("pand %xmm0,%xmm5     75                 asm volatile("pand %xmm0,%xmm5");
 71                 asm volatile("pxor %xmm5,%xmm4     76                 asm volatile("pxor %xmm5,%xmm4");
 72                 asm volatile("pxor %xmm5,%xmm5     77                 asm volatile("pxor %xmm5,%xmm5");
 73                 asm volatile("pxor %xmm6,%xmm2     78                 asm volatile("pxor %xmm6,%xmm2");
 74                 asm volatile("pxor %xmm6,%xmm4     79                 asm volatile("pxor %xmm6,%xmm4");
 75                                                    80 
 76                 asm volatile("movntdq %%xmm2,%     81                 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
 77                 asm volatile("pxor %xmm2,%xmm2     82                 asm volatile("pxor %xmm2,%xmm2");
 78                 asm volatile("movntdq %%xmm4,%     83                 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
 79                 asm volatile("pxor %xmm4,%xmm4     84                 asm volatile("pxor %xmm4,%xmm4");
 80         }                                          85         }
 81                                                    86 
 82         asm volatile("sfence" : : : "memory");     87         asm volatile("sfence" : : : "memory");
 83         kernel_fpu_end();                          88         kernel_fpu_end();
 84 }                                                  89 }
 85                                                    90 
 86                                                    91 
 87 static void raid6_sse21_xor_syndrome(int disks     92 static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
 88                                      size_t by     93                                      size_t bytes, void **ptrs)
 89 {                                              !!  94  {
 90         u8 **dptr = (u8 **)ptrs;                   95         u8 **dptr = (u8 **)ptrs;
 91         u8 *p, *q;                                 96         u8 *p, *q;
 92         int d, z, z0;                              97         int d, z, z0;
 93                                                    98 
 94         z0 = stop;              /* P/Q right s     99         z0 = stop;              /* P/Q right side optimization */
 95         p = dptr[disks-2];      /* XOR parity     100         p = dptr[disks-2];      /* XOR parity */
 96         q = dptr[disks-1];      /* RS syndrome    101         q = dptr[disks-1];      /* RS syndrome */
 97                                                   102 
 98         kernel_fpu_begin();                       103         kernel_fpu_begin();
 99                                                   104 
100         asm volatile("movdqa %0,%%xmm0" : : "m    105         asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
101                                                   106 
102         for ( d = 0 ; d < bytes ; d += 16 ) {     107         for ( d = 0 ; d < bytes ; d += 16 ) {
103                 asm volatile("movdqa %0,%%xmm4    108                 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
104                 asm volatile("movdqa %0,%%xmm2    109                 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
105                 asm volatile("pxor %xmm4,%xmm2    110                 asm volatile("pxor %xmm4,%xmm2");
106                 /* P/Q data pages */              111                 /* P/Q data pages */
107                 for ( z = z0-1 ; z >= start ;     112                 for ( z = z0-1 ; z >= start ; z-- ) {
108                         asm volatile("pxor %xm    113                         asm volatile("pxor %xmm5,%xmm5");
109                         asm volatile("pcmpgtb     114                         asm volatile("pcmpgtb %xmm4,%xmm5");
110                         asm volatile("paddb %x    115                         asm volatile("paddb %xmm4,%xmm4");
111                         asm volatile("pand %xm    116                         asm volatile("pand %xmm0,%xmm5");
112                         asm volatile("pxor %xm    117                         asm volatile("pxor %xmm5,%xmm4");
113                         asm volatile("movdqa %    118                         asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
114                         asm volatile("pxor %xm    119                         asm volatile("pxor %xmm5,%xmm2");
115                         asm volatile("pxor %xm    120                         asm volatile("pxor %xmm5,%xmm4");
116                 }                                 121                 }
117                 /* P/Q left side optimization     122                 /* P/Q left side optimization */
118                 for ( z = start-1 ; z >= 0 ; z    123                 for ( z = start-1 ; z >= 0 ; z-- ) {
119                         asm volatile("pxor %xm    124                         asm volatile("pxor %xmm5,%xmm5");
120                         asm volatile("pcmpgtb     125                         asm volatile("pcmpgtb %xmm4,%xmm5");
121                         asm volatile("paddb %x    126                         asm volatile("paddb %xmm4,%xmm4");
122                         asm volatile("pand %xm    127                         asm volatile("pand %xmm0,%xmm5");
123                         asm volatile("pxor %xm    128                         asm volatile("pxor %xmm5,%xmm4");
124                 }                                 129                 }
125                 asm volatile("pxor %0,%%xmm4"     130                 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
126                 /* Don't use movntdq for r/w m    131                 /* Don't use movntdq for r/w memory area < cache line */
127                 asm volatile("movdqa %%xmm4,%0    132                 asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
128                 asm volatile("movdqa %%xmm2,%0    133                 asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
129         }                                         134         }
130                                                   135 
131         asm volatile("sfence" : : : "memory");    136         asm volatile("sfence" : : : "memory");
132         kernel_fpu_end();                         137         kernel_fpu_end();
133 }                                                 138 }
134                                                   139 
135 const struct raid6_calls raid6_sse2x1 = {         140 const struct raid6_calls raid6_sse2x1 = {
136         raid6_sse21_gen_syndrome,                 141         raid6_sse21_gen_syndrome,
137         raid6_sse21_xor_syndrome,                 142         raid6_sse21_xor_syndrome,
138         raid6_have_sse2,                          143         raid6_have_sse2,
139         "sse2x1",                                 144         "sse2x1",
140         1                       /* Has cache h    145         1                       /* Has cache hints */
141 };                                                146 };
142                                                   147 
143 /*                                                148 /*
144  * Unrolled-by-2 SSE2 implementation              149  * Unrolled-by-2 SSE2 implementation
145  */                                               150  */
146 static void raid6_sse22_gen_syndrome(int disks    151 static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
147 {                                                 152 {
148         u8 **dptr = (u8 **)ptrs;                  153         u8 **dptr = (u8 **)ptrs;
149         u8 *p, *q;                                154         u8 *p, *q;
150         int d, z, z0;                             155         int d, z, z0;
151                                                   156 
152         z0 = disks - 3;         /* Highest dat    157         z0 = disks - 3;         /* Highest data disk */
153         p = dptr[z0+1];         /* XOR parity     158         p = dptr[z0+1];         /* XOR parity */
154         q = dptr[z0+2];         /* RS syndrome    159         q = dptr[z0+2];         /* RS syndrome */
155                                                   160 
156         kernel_fpu_begin();                       161         kernel_fpu_begin();
157                                                   162 
158         asm volatile("movdqa %0,%%xmm0" : : "m    163         asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
159         asm volatile("pxor %xmm5,%xmm5"); /* Z    164         asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
160         asm volatile("pxor %xmm7,%xmm7"); /* Z    165         asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
161                                                   166 
162         /* We uniformly assume a single prefet    167         /* We uniformly assume a single prefetch covers at least 32 bytes */
163         for ( d = 0 ; d < bytes ; d += 32 ) {     168         for ( d = 0 ; d < bytes ; d += 32 ) {
164                 asm volatile("prefetchnta %0"     169                 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
165                 asm volatile("movdqa %0,%%xmm2    170                 asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d]));    /* P[0] */
166                 asm volatile("movdqa %0,%%xmm3    171                 asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */
167                 asm volatile("movdqa %xmm2,%xm    172                 asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
168                 asm volatile("movdqa %xmm3,%xm    173                 asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */
169                 for ( z = z0-1 ; z >= 0 ; z--     174                 for ( z = z0-1 ; z >= 0 ; z-- ) {
170                         asm volatile("prefetch    175                         asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
171                         asm volatile("pcmpgtb     176                         asm volatile("pcmpgtb %xmm4,%xmm5");
172                         asm volatile("pcmpgtb     177                         asm volatile("pcmpgtb %xmm6,%xmm7");
173                         asm volatile("paddb %x    178                         asm volatile("paddb %xmm4,%xmm4");
174                         asm volatile("paddb %x    179                         asm volatile("paddb %xmm6,%xmm6");
175                         asm volatile("pand %xm    180                         asm volatile("pand %xmm0,%xmm5");
176                         asm volatile("pand %xm    181                         asm volatile("pand %xmm0,%xmm7");
177                         asm volatile("pxor %xm    182                         asm volatile("pxor %xmm5,%xmm4");
178                         asm volatile("pxor %xm    183                         asm volatile("pxor %xmm7,%xmm6");
179                         asm volatile("movdqa %    184                         asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
180                         asm volatile("movdqa %    185                         asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
181                         asm volatile("pxor %xm    186                         asm volatile("pxor %xmm5,%xmm2");
182                         asm volatile("pxor %xm    187                         asm volatile("pxor %xmm7,%xmm3");
183                         asm volatile("pxor %xm    188                         asm volatile("pxor %xmm5,%xmm4");
184                         asm volatile("pxor %xm    189                         asm volatile("pxor %xmm7,%xmm6");
185                         asm volatile("pxor %xm    190                         asm volatile("pxor %xmm5,%xmm5");
186                         asm volatile("pxor %xm    191                         asm volatile("pxor %xmm7,%xmm7");
187                 }                                 192                 }
188                 asm volatile("movntdq %%xmm2,%    193                 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
189                 asm volatile("movntdq %%xmm3,%    194                 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
190                 asm volatile("movntdq %%xmm4,%    195                 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
191                 asm volatile("movntdq %%xmm6,%    196                 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
192         }                                         197         }
193                                                   198 
194         asm volatile("sfence" : : : "memory");    199         asm volatile("sfence" : : : "memory");
195         kernel_fpu_end();                         200         kernel_fpu_end();
196 }                                                 201 }
197                                                   202 
198 static void raid6_sse22_xor_syndrome(int disks !! 203  static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
199                                      size_t by    204                                      size_t bytes, void **ptrs)
200 {                                              !! 205  {
201         u8 **dptr = (u8 **)ptrs;                  206         u8 **dptr = (u8 **)ptrs;
202         u8 *p, *q;                                207         u8 *p, *q;
203         int d, z, z0;                             208         int d, z, z0;
204                                                   209 
205         z0 = stop;              /* P/Q right s    210         z0 = stop;              /* P/Q right side optimization */
206         p = dptr[disks-2];      /* XOR parity     211         p = dptr[disks-2];      /* XOR parity */
207         q = dptr[disks-1];      /* RS syndrome    212         q = dptr[disks-1];      /* RS syndrome */
208                                                   213 
209         kernel_fpu_begin();                       214         kernel_fpu_begin();
210                                                   215 
211         asm volatile("movdqa %0,%%xmm0" : : "m    216         asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
212                                                   217 
213         for ( d = 0 ; d < bytes ; d += 32 ) {     218         for ( d = 0 ; d < bytes ; d += 32 ) {
214                 asm volatile("movdqa %0,%%xmm4    219                 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
215                 asm volatile("movdqa %0,%%xmm6    220                 asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
216                 asm volatile("movdqa %0,%%xmm2    221                 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
217                 asm volatile("movdqa %0,%%xmm3    222                 asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
218                 asm volatile("pxor %xmm4,%xmm2    223                 asm volatile("pxor %xmm4,%xmm2");
219                 asm volatile("pxor %xmm6,%xmm3    224                 asm volatile("pxor %xmm6,%xmm3");
220                 /* P/Q data pages */              225                 /* P/Q data pages */
221                 for ( z = z0-1 ; z >= start ;     226                 for ( z = z0-1 ; z >= start ; z-- ) {
222                         asm volatile("pxor %xm    227                         asm volatile("pxor %xmm5,%xmm5");
223                         asm volatile("pxor %xm    228                         asm volatile("pxor %xmm7,%xmm7");
224                         asm volatile("pcmpgtb     229                         asm volatile("pcmpgtb %xmm4,%xmm5");
225                         asm volatile("pcmpgtb     230                         asm volatile("pcmpgtb %xmm6,%xmm7");
226                         asm volatile("paddb %x    231                         asm volatile("paddb %xmm4,%xmm4");
227                         asm volatile("paddb %x    232                         asm volatile("paddb %xmm6,%xmm6");
228                         asm volatile("pand %xm    233                         asm volatile("pand %xmm0,%xmm5");
229                         asm volatile("pand %xm    234                         asm volatile("pand %xmm0,%xmm7");
230                         asm volatile("pxor %xm    235                         asm volatile("pxor %xmm5,%xmm4");
231                         asm volatile("pxor %xm    236                         asm volatile("pxor %xmm7,%xmm6");
232                         asm volatile("movdqa %    237                         asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
233                         asm volatile("movdqa %    238                         asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
234                         asm volatile("pxor %xm    239                         asm volatile("pxor %xmm5,%xmm2");
235                         asm volatile("pxor %xm    240                         asm volatile("pxor %xmm7,%xmm3");
236                         asm volatile("pxor %xm    241                         asm volatile("pxor %xmm5,%xmm4");
237                         asm volatile("pxor %xm    242                         asm volatile("pxor %xmm7,%xmm6");
238                 }                                 243                 }
239                 /* P/Q left side optimization     244                 /* P/Q left side optimization */
240                 for ( z = start-1 ; z >= 0 ; z    245                 for ( z = start-1 ; z >= 0 ; z-- ) {
241                         asm volatile("pxor %xm    246                         asm volatile("pxor %xmm5,%xmm5");
242                         asm volatile("pxor %xm    247                         asm volatile("pxor %xmm7,%xmm7");
243                         asm volatile("pcmpgtb     248                         asm volatile("pcmpgtb %xmm4,%xmm5");
244                         asm volatile("pcmpgtb     249                         asm volatile("pcmpgtb %xmm6,%xmm7");
245                         asm volatile("paddb %x    250                         asm volatile("paddb %xmm4,%xmm4");
246                         asm volatile("paddb %x    251                         asm volatile("paddb %xmm6,%xmm6");
247                         asm volatile("pand %xm    252                         asm volatile("pand %xmm0,%xmm5");
248                         asm volatile("pand %xm    253                         asm volatile("pand %xmm0,%xmm7");
249                         asm volatile("pxor %xm    254                         asm volatile("pxor %xmm5,%xmm4");
250                         asm volatile("pxor %xm    255                         asm volatile("pxor %xmm7,%xmm6");
251                 }                                 256                 }
252                 asm volatile("pxor %0,%%xmm4"     257                 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
253                 asm volatile("pxor %0,%%xmm6"     258                 asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
254                 /* Don't use movntdq for r/w m    259                 /* Don't use movntdq for r/w memory area < cache line */
255                 asm volatile("movdqa %%xmm4,%0    260                 asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
256                 asm volatile("movdqa %%xmm6,%0    261                 asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
257                 asm volatile("movdqa %%xmm2,%0    262                 asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
258                 asm volatile("movdqa %%xmm3,%0    263                 asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
259         }                                         264         }
260                                                   265 
261         asm volatile("sfence" : : : "memory");    266         asm volatile("sfence" : : : "memory");
262         kernel_fpu_end();                         267         kernel_fpu_end();
263 }                                              !! 268  }
264                                                   269 
265 const struct raid6_calls raid6_sse2x2 = {         270 const struct raid6_calls raid6_sse2x2 = {
266         raid6_sse22_gen_syndrome,                 271         raid6_sse22_gen_syndrome,
267         raid6_sse22_xor_syndrome,                 272         raid6_sse22_xor_syndrome,
268         raid6_have_sse2,                          273         raid6_have_sse2,
269         "sse2x2",                                 274         "sse2x2",
270         1                       /* Has cache h    275         1                       /* Has cache hints */
271 };                                                276 };
272                                                   277 
273 #ifdef CONFIG_X86_64                              278 #ifdef CONFIG_X86_64
274                                                   279 
275 /*                                                280 /*
276  * Unrolled-by-4 SSE2 implementation              281  * Unrolled-by-4 SSE2 implementation
277  */                                               282  */
278 static void raid6_sse24_gen_syndrome(int disks    283 static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
279 {                                                 284 {
280         u8 **dptr = (u8 **)ptrs;                  285         u8 **dptr = (u8 **)ptrs;
281         u8 *p, *q;                                286         u8 *p, *q;
282         int d, z, z0;                             287         int d, z, z0;
283                                                   288 
284         z0 = disks - 3;         /* Highest dat    289         z0 = disks - 3;         /* Highest data disk */
285         p = dptr[z0+1];         /* XOR parity     290         p = dptr[z0+1];         /* XOR parity */
286         q = dptr[z0+2];         /* RS syndrome    291         q = dptr[z0+2];         /* RS syndrome */
287                                                   292 
288         kernel_fpu_begin();                       293         kernel_fpu_begin();
289                                                   294 
290         asm volatile("movdqa %0,%%xmm0" :: "m"    295         asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
291         asm volatile("pxor %xmm2,%xmm2");         296         asm volatile("pxor %xmm2,%xmm2");       /* P[0] */
292         asm volatile("pxor %xmm3,%xmm3");         297         asm volatile("pxor %xmm3,%xmm3");       /* P[1] */
293         asm volatile("pxor %xmm4,%xmm4");         298         asm volatile("pxor %xmm4,%xmm4");       /* Q[0] */
294         asm volatile("pxor %xmm5,%xmm5");         299         asm volatile("pxor %xmm5,%xmm5");       /* Zero temp */
295         asm volatile("pxor %xmm6,%xmm6");         300         asm volatile("pxor %xmm6,%xmm6");       /* Q[1] */
296         asm volatile("pxor %xmm7,%xmm7");         301         asm volatile("pxor %xmm7,%xmm7");       /* Zero temp */
297         asm volatile("pxor %xmm10,%xmm10");       302         asm volatile("pxor %xmm10,%xmm10");     /* P[2] */
298         asm volatile("pxor %xmm11,%xmm11");       303         asm volatile("pxor %xmm11,%xmm11");     /* P[3] */
299         asm volatile("pxor %xmm12,%xmm12");       304         asm volatile("pxor %xmm12,%xmm12");     /* Q[2] */
300         asm volatile("pxor %xmm13,%xmm13");       305         asm volatile("pxor %xmm13,%xmm13");     /* Zero temp */
301         asm volatile("pxor %xmm14,%xmm14");       306         asm volatile("pxor %xmm14,%xmm14");     /* Q[3] */
302         asm volatile("pxor %xmm15,%xmm15");       307         asm volatile("pxor %xmm15,%xmm15");     /* Zero temp */
303                                                   308 
304         for ( d = 0 ; d < bytes ; d += 64 ) {     309         for ( d = 0 ; d < bytes ; d += 64 ) {
305                 for ( z = z0 ; z >= 0 ; z-- )     310                 for ( z = z0 ; z >= 0 ; z-- ) {
306                         /* The second prefetch    311                         /* The second prefetch seems to improve performance... */
307                         asm volatile("prefetch    312                         asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
308                         asm volatile("prefetch    313                         asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
309                         asm volatile("pcmpgtb     314                         asm volatile("pcmpgtb %xmm4,%xmm5");
310                         asm volatile("pcmpgtb     315                         asm volatile("pcmpgtb %xmm6,%xmm7");
311                         asm volatile("pcmpgtb     316                         asm volatile("pcmpgtb %xmm12,%xmm13");
312                         asm volatile("pcmpgtb     317                         asm volatile("pcmpgtb %xmm14,%xmm15");
313                         asm volatile("paddb %x    318                         asm volatile("paddb %xmm4,%xmm4");
314                         asm volatile("paddb %x    319                         asm volatile("paddb %xmm6,%xmm6");
315                         asm volatile("paddb %x    320                         asm volatile("paddb %xmm12,%xmm12");
316                         asm volatile("paddb %x    321                         asm volatile("paddb %xmm14,%xmm14");
317                         asm volatile("pand %xm    322                         asm volatile("pand %xmm0,%xmm5");
318                         asm volatile("pand %xm    323                         asm volatile("pand %xmm0,%xmm7");
319                         asm volatile("pand %xm    324                         asm volatile("pand %xmm0,%xmm13");
320                         asm volatile("pand %xm    325                         asm volatile("pand %xmm0,%xmm15");
321                         asm volatile("pxor %xm    326                         asm volatile("pxor %xmm5,%xmm4");
322                         asm volatile("pxor %xm    327                         asm volatile("pxor %xmm7,%xmm6");
323                         asm volatile("pxor %xm    328                         asm volatile("pxor %xmm13,%xmm12");
324                         asm volatile("pxor %xm    329                         asm volatile("pxor %xmm15,%xmm14");
325                         asm volatile("movdqa %    330                         asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
326                         asm volatile("movdqa %    331                         asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
327                         asm volatile("movdqa %    332                         asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
328                         asm volatile("movdqa %    333                         asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
329                         asm volatile("pxor %xm    334                         asm volatile("pxor %xmm5,%xmm2");
330                         asm volatile("pxor %xm    335                         asm volatile("pxor %xmm7,%xmm3");
331                         asm volatile("pxor %xm    336                         asm volatile("pxor %xmm13,%xmm10");
332                         asm volatile("pxor %xm    337                         asm volatile("pxor %xmm15,%xmm11");
333                         asm volatile("pxor %xm    338                         asm volatile("pxor %xmm5,%xmm4");
334                         asm volatile("pxor %xm    339                         asm volatile("pxor %xmm7,%xmm6");
335                         asm volatile("pxor %xm    340                         asm volatile("pxor %xmm13,%xmm12");
336                         asm volatile("pxor %xm    341                         asm volatile("pxor %xmm15,%xmm14");
337                         asm volatile("pxor %xm    342                         asm volatile("pxor %xmm5,%xmm5");
338                         asm volatile("pxor %xm    343                         asm volatile("pxor %xmm7,%xmm7");
339                         asm volatile("pxor %xm    344                         asm volatile("pxor %xmm13,%xmm13");
340                         asm volatile("pxor %xm    345                         asm volatile("pxor %xmm15,%xmm15");
341                 }                                 346                 }
342                 asm volatile("movntdq %%xmm2,%    347                 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
343                 asm volatile("pxor %xmm2,%xmm2    348                 asm volatile("pxor %xmm2,%xmm2");
344                 asm volatile("movntdq %%xmm3,%    349                 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
345                 asm volatile("pxor %xmm3,%xmm3    350                 asm volatile("pxor %xmm3,%xmm3");
346                 asm volatile("movntdq %%xmm10,    351                 asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
347                 asm volatile("pxor %xmm10,%xmm    352                 asm volatile("pxor %xmm10,%xmm10");
348                 asm volatile("movntdq %%xmm11,    353                 asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
349                 asm volatile("pxor %xmm11,%xmm    354                 asm volatile("pxor %xmm11,%xmm11");
350                 asm volatile("movntdq %%xmm4,%    355                 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
351                 asm volatile("pxor %xmm4,%xmm4    356                 asm volatile("pxor %xmm4,%xmm4");
352                 asm volatile("movntdq %%xmm6,%    357                 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
353                 asm volatile("pxor %xmm6,%xmm6    358                 asm volatile("pxor %xmm6,%xmm6");
354                 asm volatile("movntdq %%xmm12,    359                 asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
355                 asm volatile("pxor %xmm12,%xmm    360                 asm volatile("pxor %xmm12,%xmm12");
356                 asm volatile("movntdq %%xmm14,    361                 asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
357                 asm volatile("pxor %xmm14,%xmm    362                 asm volatile("pxor %xmm14,%xmm14");
358         }                                         363         }
359                                                   364 
360         asm volatile("sfence" : : : "memory");    365         asm volatile("sfence" : : : "memory");
361         kernel_fpu_end();                         366         kernel_fpu_end();
362 }                                                 367 }
363                                                   368 
364 static void raid6_sse24_xor_syndrome(int disks !! 369  static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
365                                      size_t by    370                                      size_t bytes, void **ptrs)
366 {                                              !! 371  {
367         u8 **dptr = (u8 **)ptrs;                  372         u8 **dptr = (u8 **)ptrs;
368         u8 *p, *q;                                373         u8 *p, *q;
369         int d, z, z0;                             374         int d, z, z0;
370                                                   375 
371         z0 = stop;              /* P/Q right s    376         z0 = stop;              /* P/Q right side optimization */
372         p = dptr[disks-2];      /* XOR parity     377         p = dptr[disks-2];      /* XOR parity */
373         q = dptr[disks-1];      /* RS syndrome    378         q = dptr[disks-1];      /* RS syndrome */
374                                                   379 
375         kernel_fpu_begin();                       380         kernel_fpu_begin();
376                                                   381 
377         asm volatile("movdqa %0,%%xmm0" :: "m"    382         asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
378                                                   383 
379         for ( d = 0 ; d < bytes ; d += 64 ) {     384         for ( d = 0 ; d < bytes ; d += 64 ) {
380                 asm volatile("movdqa %0,%%xmm4    385                 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
381                 asm volatile("movdqa %0,%%xmm6    386                 asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
382                 asm volatile("movdqa %0,%%xmm1    387                 asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
383                 asm volatile("movdqa %0,%%xmm1    388                 asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
384                 asm volatile("movdqa %0,%%xmm2    389                 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
385                 asm volatile("movdqa %0,%%xmm3    390                 asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
386                 asm volatile("movdqa %0,%%xmm1    391                 asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
387                 asm volatile("movdqa %0,%%xmm1    392                 asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
388                 asm volatile("pxor %xmm4,%xmm2    393                 asm volatile("pxor %xmm4,%xmm2");
389                 asm volatile("pxor %xmm6,%xmm3    394                 asm volatile("pxor %xmm6,%xmm3");
390                 asm volatile("pxor %xmm12,%xmm    395                 asm volatile("pxor %xmm12,%xmm10");
391                 asm volatile("pxor %xmm14,%xmm    396                 asm volatile("pxor %xmm14,%xmm11");
392                 /* P/Q data pages */              397                 /* P/Q data pages */
393                 for ( z = z0-1 ; z >= start ;     398                 for ( z = z0-1 ; z >= start ; z-- ) {
394                         asm volatile("prefetch    399                         asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
395                         asm volatile("prefetch    400                         asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
396                         asm volatile("pxor %xm    401                         asm volatile("pxor %xmm5,%xmm5");
397                         asm volatile("pxor %xm    402                         asm volatile("pxor %xmm7,%xmm7");
398                         asm volatile("pxor %xm    403                         asm volatile("pxor %xmm13,%xmm13");
399                         asm volatile("pxor %xm    404                         asm volatile("pxor %xmm15,%xmm15");
400                         asm volatile("pcmpgtb     405                         asm volatile("pcmpgtb %xmm4,%xmm5");
401                         asm volatile("pcmpgtb     406                         asm volatile("pcmpgtb %xmm6,%xmm7");
402                         asm volatile("pcmpgtb     407                         asm volatile("pcmpgtb %xmm12,%xmm13");
403                         asm volatile("pcmpgtb     408                         asm volatile("pcmpgtb %xmm14,%xmm15");
404                         asm volatile("paddb %x    409                         asm volatile("paddb %xmm4,%xmm4");
405                         asm volatile("paddb %x    410                         asm volatile("paddb %xmm6,%xmm6");
406                         asm volatile("paddb %x    411                         asm volatile("paddb %xmm12,%xmm12");
407                         asm volatile("paddb %x    412                         asm volatile("paddb %xmm14,%xmm14");
408                         asm volatile("pand %xm    413                         asm volatile("pand %xmm0,%xmm5");
409                         asm volatile("pand %xm    414                         asm volatile("pand %xmm0,%xmm7");
410                         asm volatile("pand %xm    415                         asm volatile("pand %xmm0,%xmm13");
411                         asm volatile("pand %xm    416                         asm volatile("pand %xmm0,%xmm15");
412                         asm volatile("pxor %xm    417                         asm volatile("pxor %xmm5,%xmm4");
413                         asm volatile("pxor %xm    418                         asm volatile("pxor %xmm7,%xmm6");
414                         asm volatile("pxor %xm    419                         asm volatile("pxor %xmm13,%xmm12");
415                         asm volatile("pxor %xm    420                         asm volatile("pxor %xmm15,%xmm14");
416                         asm volatile("movdqa %    421                         asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
417                         asm volatile("movdqa %    422                         asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
418                         asm volatile("movdqa %    423                         asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
419                         asm volatile("movdqa %    424                         asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
420                         asm volatile("pxor %xm    425                         asm volatile("pxor %xmm5,%xmm2");
421                         asm volatile("pxor %xm    426                         asm volatile("pxor %xmm7,%xmm3");
422                         asm volatile("pxor %xm    427                         asm volatile("pxor %xmm13,%xmm10");
423                         asm volatile("pxor %xm    428                         asm volatile("pxor %xmm15,%xmm11");
424                         asm volatile("pxor %xm    429                         asm volatile("pxor %xmm5,%xmm4");
425                         asm volatile("pxor %xm    430                         asm volatile("pxor %xmm7,%xmm6");
426                         asm volatile("pxor %xm    431                         asm volatile("pxor %xmm13,%xmm12");
427                         asm volatile("pxor %xm    432                         asm volatile("pxor %xmm15,%xmm14");
428                 }                                 433                 }
429                 asm volatile("prefetchnta %0"     434                 asm volatile("prefetchnta %0" :: "m" (q[d]));
430                 asm volatile("prefetchnta %0"     435                 asm volatile("prefetchnta %0" :: "m" (q[d+32]));
431                 /* P/Q left side optimization     436                 /* P/Q left side optimization */
432                 for ( z = start-1 ; z >= 0 ; z    437                 for ( z = start-1 ; z >= 0 ; z-- ) {
433                         asm volatile("pxor %xm    438                         asm volatile("pxor %xmm5,%xmm5");
434                         asm volatile("pxor %xm    439                         asm volatile("pxor %xmm7,%xmm7");
435                         asm volatile("pxor %xm    440                         asm volatile("pxor %xmm13,%xmm13");
436                         asm volatile("pxor %xm    441                         asm volatile("pxor %xmm15,%xmm15");
437                         asm volatile("pcmpgtb     442                         asm volatile("pcmpgtb %xmm4,%xmm5");
438                         asm volatile("pcmpgtb     443                         asm volatile("pcmpgtb %xmm6,%xmm7");
439                         asm volatile("pcmpgtb     444                         asm volatile("pcmpgtb %xmm12,%xmm13");
440                         asm volatile("pcmpgtb     445                         asm volatile("pcmpgtb %xmm14,%xmm15");
441                         asm volatile("paddb %x    446                         asm volatile("paddb %xmm4,%xmm4");
442                         asm volatile("paddb %x    447                         asm volatile("paddb %xmm6,%xmm6");
443                         asm volatile("paddb %x    448                         asm volatile("paddb %xmm12,%xmm12");
444                         asm volatile("paddb %x    449                         asm volatile("paddb %xmm14,%xmm14");
445                         asm volatile("pand %xm    450                         asm volatile("pand %xmm0,%xmm5");
446                         asm volatile("pand %xm    451                         asm volatile("pand %xmm0,%xmm7");
447                         asm volatile("pand %xm    452                         asm volatile("pand %xmm0,%xmm13");
448                         asm volatile("pand %xm    453                         asm volatile("pand %xmm0,%xmm15");
449                         asm volatile("pxor %xm    454                         asm volatile("pxor %xmm5,%xmm4");
450                         asm volatile("pxor %xm    455                         asm volatile("pxor %xmm7,%xmm6");
451                         asm volatile("pxor %xm    456                         asm volatile("pxor %xmm13,%xmm12");
452                         asm volatile("pxor %xm    457                         asm volatile("pxor %xmm15,%xmm14");
453                 }                                 458                 }
454                 asm volatile("movntdq %%xmm2,%    459                 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
455                 asm volatile("movntdq %%xmm3,%    460                 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
456                 asm volatile("movntdq %%xmm10,    461                 asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
457                 asm volatile("movntdq %%xmm11,    462                 asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
458                 asm volatile("pxor %0,%%xmm4"     463                 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
459                 asm volatile("pxor %0,%%xmm6"     464                 asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
460                 asm volatile("pxor %0,%%xmm12"    465                 asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
461                 asm volatile("pxor %0,%%xmm14"    466                 asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
462                 asm volatile("movntdq %%xmm4,%    467                 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
463                 asm volatile("movntdq %%xmm6,%    468                 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
464                 asm volatile("movntdq %%xmm12,    469                 asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
465                 asm volatile("movntdq %%xmm14,    470                 asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
466         }                                         471         }
467         asm volatile("sfence" : : : "memory");    472         asm volatile("sfence" : : : "memory");
468         kernel_fpu_end();                         473         kernel_fpu_end();
469 }                                              !! 474  }
470                                                   475 
471                                                   476 
472 const struct raid6_calls raid6_sse2x4 = {         477 const struct raid6_calls raid6_sse2x4 = {
473         raid6_sse24_gen_syndrome,                 478         raid6_sse24_gen_syndrome,
474         raid6_sse24_xor_syndrome,                 479         raid6_sse24_xor_syndrome,
475         raid6_have_sse2,                          480         raid6_have_sse2,
476         "sse2x4",                                 481         "sse2x4",
477         1                       /* Has cache h    482         1                       /* Has cache hints */
478 };                                                483 };
479                                                   484 
480 #endif /* CONFIG_X86_64 */                        485 #endif /* CONFIG_X86_64 */
481                                                   486 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php