~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/lib/raid6/recov_loongarch_simd.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /lib/raid6/recov_loongarch_simd.c (Architecture alpha) and /lib/raid6/recov_loongarch_simd.c (Architecture mips)


  1 // SPDX-License-Identifier: GPL-2.0-only            1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*                                                  2 /*
  3  * RAID6 recovery algorithms in LoongArch SIMD      3  * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX)
  4  *                                                  4  *
  5  * Copyright (C) 2023 WANG Xuerui <git@xen0n.n      5  * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
  6  *                                                  6  *
  7  * Originally based on recov_avx2.c and recov_      7  * Originally based on recov_avx2.c and recov_ssse3.c:
  8  *                                                  8  *
  9  * Copyright (C) 2012 Intel Corporation             9  * Copyright (C) 2012 Intel Corporation
 10  * Author: Jim Kukunas <james.t.kukunas@linux.     10  * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
 11  */                                                11  */
 12                                                    12 
 13 #include <linux/raid/pq.h>                         13 #include <linux/raid/pq.h>
 14 #include "loongarch.h"                             14 #include "loongarch.h"
 15                                                    15 
 16 /*                                                 16 /*
 17  * Unlike with the syndrome calculation algori     17  * Unlike with the syndrome calculation algorithms, there's no boot-time
 18  * selection of recovery algorithms by benchma     18  * selection of recovery algorithms by benchmarking, so we have to specify
 19  * the priorities and hope the future cores wi     19  * the priorities and hope the future cores will all have decent vector
 20  * support (i.e. no LASX slower than LSX, or e     20  * support (i.e. no LASX slower than LSX, or even scalar code).
 21  */                                                21  */
 22                                                    22 
 23 #ifdef CONFIG_CPU_HAS_LSX                          23 #ifdef CONFIG_CPU_HAS_LSX
 24 static int raid6_has_lsx(void)                     24 static int raid6_has_lsx(void)
 25 {                                                  25 {
 26         return cpu_has_lsx;                        26         return cpu_has_lsx;
 27 }                                                  27 }
 28                                                    28 
 29 static void raid6_2data_recov_lsx(int disks, s     29 static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila,
 30                                   int failb, v     30                                   int failb, void **ptrs)
 31 {                                                  31 {
 32         u8 *p, *q, *dp, *dq;                       32         u8 *p, *q, *dp, *dq;
 33         const u8 *pbmul;        /* P multiplie     33         const u8 *pbmul;        /* P multiplier table for B data */
 34         const u8 *qmul;         /* Q multiplie     34         const u8 *qmul;         /* Q multiplier table (for both) */
 35                                                    35 
 36         p = (u8 *)ptrs[disks - 2];                 36         p = (u8 *)ptrs[disks - 2];
 37         q = (u8 *)ptrs[disks - 1];                 37         q = (u8 *)ptrs[disks - 1];
 38                                                    38 
 39         /*                                         39         /*
 40          * Compute syndrome with zero for the      40          * Compute syndrome with zero for the missing data pages
 41          * Use the dead data pages as temporar     41          * Use the dead data pages as temporary storage for
 42          * delta p and delta q                     42          * delta p and delta q
 43          */                                        43          */
 44         dp = (u8 *)ptrs[faila];                    44         dp = (u8 *)ptrs[faila];
 45         ptrs[faila] = (void *)raid6_empty_zero     45         ptrs[faila] = (void *)raid6_empty_zero_page;
 46         ptrs[disks - 2] = dp;                      46         ptrs[disks - 2] = dp;
 47         dq = (u8 *)ptrs[failb];                    47         dq = (u8 *)ptrs[failb];
 48         ptrs[failb] = (void *)raid6_empty_zero     48         ptrs[failb] = (void *)raid6_empty_zero_page;
 49         ptrs[disks - 1] = dq;                      49         ptrs[disks - 1] = dq;
 50                                                    50 
 51         raid6_call.gen_syndrome(disks, bytes,      51         raid6_call.gen_syndrome(disks, bytes, ptrs);
 52                                                    52 
 53         /* Restore pointer table */                53         /* Restore pointer table */
 54         ptrs[faila] = dp;                          54         ptrs[faila] = dp;
 55         ptrs[failb] = dq;                          55         ptrs[failb] = dq;
 56         ptrs[disks - 2] = p;                       56         ptrs[disks - 2] = p;
 57         ptrs[disks - 1] = q;                       57         ptrs[disks - 1] = q;
 58                                                    58 
 59         /* Now, pick the proper data tables */     59         /* Now, pick the proper data tables */
 60         pbmul = raid6_vgfmul[raid6_gfexi[failb     60         pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
 61         qmul  = raid6_vgfmul[raid6_gfinv[raid6     61         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
 62                                                    62 
 63         kernel_fpu_begin();                        63         kernel_fpu_begin();
 64                                                    64 
 65         /*                                         65         /*
 66          * vr20, vr21: qmul                        66          * vr20, vr21: qmul
 67          * vr22, vr23: pbmul                       67          * vr22, vr23: pbmul
 68          */                                        68          */
 69         asm volatile("vld $vr20, %0" : : "m" (     69         asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
 70         asm volatile("vld $vr21, %0" : : "m" (     70         asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
 71         asm volatile("vld $vr22, %0" : : "m" (     71         asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
 72         asm volatile("vld $vr23, %0" : : "m" (     72         asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
 73                                                    73 
 74         while (bytes) {                            74         while (bytes) {
 75                 /* vr4 - vr7: Q */                 75                 /* vr4 - vr7: Q */
 76                 asm volatile("vld $vr4, %0" :      76                 asm volatile("vld $vr4, %0" : : "m" (q[0]));
 77                 asm volatile("vld $vr5, %0" :      77                 asm volatile("vld $vr5, %0" : : "m" (q[16]));
 78                 asm volatile("vld $vr6, %0" :      78                 asm volatile("vld $vr6, %0" : : "m" (q[32]));
 79                 asm volatile("vld $vr7, %0" :      79                 asm volatile("vld $vr7, %0" : : "m" (q[48]));
 80                 /*  vr4 - vr7: Q + Qxy */          80                 /*  vr4 - vr7: Q + Qxy */
 81                 asm volatile("vld $vr8, %0" :      81                 asm volatile("vld $vr8, %0" : : "m" (dq[0]));
 82                 asm volatile("vld $vr9, %0" :      82                 asm volatile("vld $vr9, %0" : : "m" (dq[16]));
 83                 asm volatile("vld $vr10, %0" :     83                 asm volatile("vld $vr10, %0" : : "m" (dq[32]));
 84                 asm volatile("vld $vr11, %0" :     84                 asm volatile("vld $vr11, %0" : : "m" (dq[48]));
 85                 asm volatile("vxor.v $vr4, $vr     85                 asm volatile("vxor.v $vr4, $vr4, $vr8");
 86                 asm volatile("vxor.v $vr5, $vr     86                 asm volatile("vxor.v $vr5, $vr5, $vr9");
 87                 asm volatile("vxor.v $vr6, $vr     87                 asm volatile("vxor.v $vr6, $vr6, $vr10");
 88                 asm volatile("vxor.v $vr7, $vr     88                 asm volatile("vxor.v $vr7, $vr7, $vr11");
 89                 /* vr0 - vr3: P */                 89                 /* vr0 - vr3: P */
 90                 asm volatile("vld $vr0, %0" :      90                 asm volatile("vld $vr0, %0" : : "m" (p[0]));
 91                 asm volatile("vld $vr1, %0" :      91                 asm volatile("vld $vr1, %0" : : "m" (p[16]));
 92                 asm volatile("vld $vr2, %0" :      92                 asm volatile("vld $vr2, %0" : : "m" (p[32]));
 93                 asm volatile("vld $vr3, %0" :      93                 asm volatile("vld $vr3, %0" : : "m" (p[48]));
 94                 /* vr0 - vr3: P + Pxy */           94                 /* vr0 - vr3: P + Pxy */
 95                 asm volatile("vld $vr8, %0" :      95                 asm volatile("vld $vr8, %0" : : "m" (dp[0]));
 96                 asm volatile("vld $vr9, %0" :      96                 asm volatile("vld $vr9, %0" : : "m" (dp[16]));
 97                 asm volatile("vld $vr10, %0" :     97                 asm volatile("vld $vr10, %0" : : "m" (dp[32]));
 98                 asm volatile("vld $vr11, %0" :     98                 asm volatile("vld $vr11, %0" : : "m" (dp[48]));
 99                 asm volatile("vxor.v $vr0, $vr     99                 asm volatile("vxor.v $vr0, $vr0, $vr8");
100                 asm volatile("vxor.v $vr1, $vr    100                 asm volatile("vxor.v $vr1, $vr1, $vr9");
101                 asm volatile("vxor.v $vr2, $vr    101                 asm volatile("vxor.v $vr2, $vr2, $vr10");
102                 asm volatile("vxor.v $vr3, $vr    102                 asm volatile("vxor.v $vr3, $vr3, $vr11");
103                                                   103 
104                 /* vr8 - vr11: higher 4 bits o    104                 /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */
105                 asm volatile("vsrli.b $vr8, $v    105                 asm volatile("vsrli.b $vr8, $vr4, 4");
106                 asm volatile("vsrli.b $vr9, $v    106                 asm volatile("vsrli.b $vr9, $vr5, 4");
107                 asm volatile("vsrli.b $vr10, $    107                 asm volatile("vsrli.b $vr10, $vr6, 4");
108                 asm volatile("vsrli.b $vr11, $    108                 asm volatile("vsrli.b $vr11, $vr7, 4");
109                 /* vr4 - vr7: lower 4 bits of     109                 /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */
110                 asm volatile("vandi.b $vr4, $v    110                 asm volatile("vandi.b $vr4, $vr4, 0x0f");
111                 asm volatile("vandi.b $vr5, $v    111                 asm volatile("vandi.b $vr5, $vr5, 0x0f");
112                 asm volatile("vandi.b $vr6, $v    112                 asm volatile("vandi.b $vr6, $vr6, 0x0f");
113                 asm volatile("vandi.b $vr7, $v    113                 asm volatile("vandi.b $vr7, $vr7, 0x0f");
114                 /* lookup from qmul[0] */         114                 /* lookup from qmul[0] */
115                 asm volatile("vshuf.b $vr4, $v    115                 asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4");
116                 asm volatile("vshuf.b $vr5, $v    116                 asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5");
117                 asm volatile("vshuf.b $vr6, $v    117                 asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6");
118                 asm volatile("vshuf.b $vr7, $v    118                 asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7");
119                 /* lookup from qmul[16] */        119                 /* lookup from qmul[16] */
120                 asm volatile("vshuf.b $vr8, $v    120                 asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8");
121                 asm volatile("vshuf.b $vr9, $v    121                 asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9");
122                 asm volatile("vshuf.b $vr10, $    122                 asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10");
123                 asm volatile("vshuf.b $vr11, $    123                 asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11");
124                 /* vr16 - vr19: B(Q + Qxy) */     124                 /* vr16 - vr19: B(Q + Qxy) */
125                 asm volatile("vxor.v $vr16, $v    125                 asm volatile("vxor.v $vr16, $vr8, $vr4");
126                 asm volatile("vxor.v $vr17, $v    126                 asm volatile("vxor.v $vr17, $vr9, $vr5");
127                 asm volatile("vxor.v $vr18, $v    127                 asm volatile("vxor.v $vr18, $vr10, $vr6");
128                 asm volatile("vxor.v $vr19, $v    128                 asm volatile("vxor.v $vr19, $vr11, $vr7");
129                                                   129 
130                 /* vr4 - vr7: higher 4 bits of    130                 /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */
131                 asm volatile("vsrli.b $vr4, $v    131                 asm volatile("vsrli.b $vr4, $vr0, 4");
132                 asm volatile("vsrli.b $vr5, $v    132                 asm volatile("vsrli.b $vr5, $vr1, 4");
133                 asm volatile("vsrli.b $vr6, $v    133                 asm volatile("vsrli.b $vr6, $vr2, 4");
134                 asm volatile("vsrli.b $vr7, $v    134                 asm volatile("vsrli.b $vr7, $vr3, 4");
135                 /* vr12 - vr15: lower 4 bits o    135                 /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */
136                 asm volatile("vandi.b $vr12, $    136                 asm volatile("vandi.b $vr12, $vr0, 0x0f");
137                 asm volatile("vandi.b $vr13, $    137                 asm volatile("vandi.b $vr13, $vr1, 0x0f");
138                 asm volatile("vandi.b $vr14, $    138                 asm volatile("vandi.b $vr14, $vr2, 0x0f");
139                 asm volatile("vandi.b $vr15, $    139                 asm volatile("vandi.b $vr15, $vr3, 0x0f");
140                 /* lookup from pbmul[0] */        140                 /* lookup from pbmul[0] */
141                 asm volatile("vshuf.b $vr12, $    141                 asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12");
142                 asm volatile("vshuf.b $vr13, $    142                 asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13");
143                 asm volatile("vshuf.b $vr14, $    143                 asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14");
144                 asm volatile("vshuf.b $vr15, $    144                 asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15");
145                 /* lookup from pbmul[16] */       145                 /* lookup from pbmul[16] */
146                 asm volatile("vshuf.b $vr4, $v    146                 asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4");
147                 asm volatile("vshuf.b $vr5, $v    147                 asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5");
148                 asm volatile("vshuf.b $vr6, $v    148                 asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6");
149                 asm volatile("vshuf.b $vr7, $v    149                 asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7");
150                 /* vr4 - vr7: A(P + Pxy) */       150                 /* vr4 - vr7: A(P + Pxy) */
151                 asm volatile("vxor.v $vr4, $vr    151                 asm volatile("vxor.v $vr4, $vr4, $vr12");
152                 asm volatile("vxor.v $vr5, $vr    152                 asm volatile("vxor.v $vr5, $vr5, $vr13");
153                 asm volatile("vxor.v $vr6, $vr    153                 asm volatile("vxor.v $vr6, $vr6, $vr14");
154                 asm volatile("vxor.v $vr7, $vr    154                 asm volatile("vxor.v $vr7, $vr7, $vr15");
155                                                   155 
156                 /* vr4 - vr7: A(P + Pxy) + B(Q    156                 /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */
157                 asm volatile("vxor.v $vr4, $vr    157                 asm volatile("vxor.v $vr4, $vr4, $vr16");
158                 asm volatile("vxor.v $vr5, $vr    158                 asm volatile("vxor.v $vr5, $vr5, $vr17");
159                 asm volatile("vxor.v $vr6, $vr    159                 asm volatile("vxor.v $vr6, $vr6, $vr18");
160                 asm volatile("vxor.v $vr7, $vr    160                 asm volatile("vxor.v $vr7, $vr7, $vr19");
161                 asm volatile("vst $vr4, %0" :     161                 asm volatile("vst $vr4, %0" : "=m" (dq[0]));
162                 asm volatile("vst $vr5, %0" :     162                 asm volatile("vst $vr5, %0" : "=m" (dq[16]));
163                 asm volatile("vst $vr6, %0" :     163                 asm volatile("vst $vr6, %0" : "=m" (dq[32]));
164                 asm volatile("vst $vr7, %0" :     164                 asm volatile("vst $vr7, %0" : "=m" (dq[48]));
165                                                   165 
166                 /* vr0 - vr3: P + Pxy + Dx = D    166                 /* vr0 - vr3: P + Pxy + Dx = Dy */
167                 asm volatile("vxor.v $vr0, $vr    167                 asm volatile("vxor.v $vr0, $vr0, $vr4");
168                 asm volatile("vxor.v $vr1, $vr    168                 asm volatile("vxor.v $vr1, $vr1, $vr5");
169                 asm volatile("vxor.v $vr2, $vr    169                 asm volatile("vxor.v $vr2, $vr2, $vr6");
170                 asm volatile("vxor.v $vr3, $vr    170                 asm volatile("vxor.v $vr3, $vr3, $vr7");
171                 asm volatile("vst $vr0, %0" :     171                 asm volatile("vst $vr0, %0" : "=m" (dp[0]));
172                 asm volatile("vst $vr1, %0" :     172                 asm volatile("vst $vr1, %0" : "=m" (dp[16]));
173                 asm volatile("vst $vr2, %0" :     173                 asm volatile("vst $vr2, %0" : "=m" (dp[32]));
174                 asm volatile("vst $vr3, %0" :     174                 asm volatile("vst $vr3, %0" : "=m" (dp[48]));
175                                                   175 
176                 bytes -= 64;                      176                 bytes -= 64;
177                 p += 64;                          177                 p += 64;
178                 q += 64;                          178                 q += 64;
179                 dp += 64;                         179                 dp += 64;
180                 dq += 64;                         180                 dq += 64;
181         }                                         181         }
182                                                   182 
183         kernel_fpu_end();                         183         kernel_fpu_end();
184 }                                                 184 }
185                                                   185 
186 static void raid6_datap_recov_lsx(int disks, s    186 static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila,
187                                   void **ptrs)    187                                   void **ptrs)
188 {                                                 188 {
189         u8 *p, *q, *dq;                           189         u8 *p, *q, *dq;
190         const u8 *qmul;         /* Q multiplie    190         const u8 *qmul;         /* Q multiplier table */
191                                                   191 
192         p = (u8 *)ptrs[disks - 2];                192         p = (u8 *)ptrs[disks - 2];
193         q = (u8 *)ptrs[disks - 1];                193         q = (u8 *)ptrs[disks - 1];
194                                                   194 
195         /*                                        195         /*
196          * Compute syndrome with zero for the     196          * Compute syndrome with zero for the missing data page
197          * Use the dead data page as temporary    197          * Use the dead data page as temporary storage for delta q
198          */                                       198          */
199         dq = (u8 *)ptrs[faila];                   199         dq = (u8 *)ptrs[faila];
200         ptrs[faila] = (void *)raid6_empty_zero    200         ptrs[faila] = (void *)raid6_empty_zero_page;
201         ptrs[disks - 1] = dq;                     201         ptrs[disks - 1] = dq;
202                                                   202 
203         raid6_call.gen_syndrome(disks, bytes,     203         raid6_call.gen_syndrome(disks, bytes, ptrs);
204                                                   204 
205         /* Restore pointer table */               205         /* Restore pointer table */
206         ptrs[faila] = dq;                         206         ptrs[faila] = dq;
207         ptrs[disks - 1] = q;                      207         ptrs[disks - 1] = q;
208                                                   208 
209         /* Now, pick the proper data tables */    209         /* Now, pick the proper data tables */
210         qmul  = raid6_vgfmul[raid6_gfinv[raid6    210         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
211                                                   211 
212         kernel_fpu_begin();                       212         kernel_fpu_begin();
213                                                   213 
214         /* vr22, vr23: qmul */                    214         /* vr22, vr23: qmul */
215         asm volatile("vld $vr22, %0" : : "m" (    215         asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
216         asm volatile("vld $vr23, %0" : : "m" (    216         asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
217                                                   217 
218         while (bytes) {                           218         while (bytes) {
219                 /* vr0 - vr3: P + Dx */           219                 /* vr0 - vr3: P + Dx */
220                 asm volatile("vld $vr0, %0" :     220                 asm volatile("vld $vr0, %0" : : "m" (p[0]));
221                 asm volatile("vld $vr1, %0" :     221                 asm volatile("vld $vr1, %0" : : "m" (p[16]));
222                 asm volatile("vld $vr2, %0" :     222                 asm volatile("vld $vr2, %0" : : "m" (p[32]));
223                 asm volatile("vld $vr3, %0" :     223                 asm volatile("vld $vr3, %0" : : "m" (p[48]));
224                 /* vr4 - vr7: Qx */               224                 /* vr4 - vr7: Qx */
225                 asm volatile("vld $vr4, %0" :     225                 asm volatile("vld $vr4, %0" : : "m" (dq[0]));
226                 asm volatile("vld $vr5, %0" :     226                 asm volatile("vld $vr5, %0" : : "m" (dq[16]));
227                 asm volatile("vld $vr6, %0" :     227                 asm volatile("vld $vr6, %0" : : "m" (dq[32]));
228                 asm volatile("vld $vr7, %0" :     228                 asm volatile("vld $vr7, %0" : : "m" (dq[48]));
229                 /* vr4 - vr7: Q + Qx */           229                 /* vr4 - vr7: Q + Qx */
230                 asm volatile("vld $vr8, %0" :     230                 asm volatile("vld $vr8, %0" : : "m" (q[0]));
231                 asm volatile("vld $vr9, %0" :     231                 asm volatile("vld $vr9, %0" : : "m" (q[16]));
232                 asm volatile("vld $vr10, %0" :    232                 asm volatile("vld $vr10, %0" : : "m" (q[32]));
233                 asm volatile("vld $vr11, %0" :    233                 asm volatile("vld $vr11, %0" : : "m" (q[48]));
234                 asm volatile("vxor.v $vr4, $vr    234                 asm volatile("vxor.v $vr4, $vr4, $vr8");
235                 asm volatile("vxor.v $vr5, $vr    235                 asm volatile("vxor.v $vr5, $vr5, $vr9");
236                 asm volatile("vxor.v $vr6, $vr    236                 asm volatile("vxor.v $vr6, $vr6, $vr10");
237                 asm volatile("vxor.v $vr7, $vr    237                 asm volatile("vxor.v $vr7, $vr7, $vr11");
238                                                   238 
239                 /* vr8 - vr11: higher 4 bits o    239                 /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */
240                 asm volatile("vsrli.b $vr8, $v    240                 asm volatile("vsrli.b $vr8, $vr4, 4");
241                 asm volatile("vsrli.b $vr9, $v    241                 asm volatile("vsrli.b $vr9, $vr5, 4");
242                 asm volatile("vsrli.b $vr10, $    242                 asm volatile("vsrli.b $vr10, $vr6, 4");
243                 asm volatile("vsrli.b $vr11, $    243                 asm volatile("vsrli.b $vr11, $vr7, 4");
244                 /* vr4 - vr7: lower 4 bits of     244                 /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */
245                 asm volatile("vandi.b $vr4, $v    245                 asm volatile("vandi.b $vr4, $vr4, 0x0f");
246                 asm volatile("vandi.b $vr5, $v    246                 asm volatile("vandi.b $vr5, $vr5, 0x0f");
247                 asm volatile("vandi.b $vr6, $v    247                 asm volatile("vandi.b $vr6, $vr6, 0x0f");
248                 asm volatile("vandi.b $vr7, $v    248                 asm volatile("vandi.b $vr7, $vr7, 0x0f");
249                 /* lookup from qmul[0] */         249                 /* lookup from qmul[0] */
250                 asm volatile("vshuf.b $vr4, $v    250                 asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4");
251                 asm volatile("vshuf.b $vr5, $v    251                 asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5");
252                 asm volatile("vshuf.b $vr6, $v    252                 asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6");
253                 asm volatile("vshuf.b $vr7, $v    253                 asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7");
254                 /* lookup from qmul[16] */        254                 /* lookup from qmul[16] */
255                 asm volatile("vshuf.b $vr8, $v    255                 asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8");
256                 asm volatile("vshuf.b $vr9, $v    256                 asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9");
257                 asm volatile("vshuf.b $vr10, $    257                 asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10");
258                 asm volatile("vshuf.b $vr11, $    258                 asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11");
259                 /* vr4 - vr7: qmul(Q + Qx) = D    259                 /* vr4 - vr7: qmul(Q + Qx) = Dx */
260                 asm volatile("vxor.v $vr4, $vr    260                 asm volatile("vxor.v $vr4, $vr4, $vr8");
261                 asm volatile("vxor.v $vr5, $vr    261                 asm volatile("vxor.v $vr5, $vr5, $vr9");
262                 asm volatile("vxor.v $vr6, $vr    262                 asm volatile("vxor.v $vr6, $vr6, $vr10");
263                 asm volatile("vxor.v $vr7, $vr    263                 asm volatile("vxor.v $vr7, $vr7, $vr11");
264                 asm volatile("vst $vr4, %0" :     264                 asm volatile("vst $vr4, %0" : "=m" (dq[0]));
265                 asm volatile("vst $vr5, %0" :     265                 asm volatile("vst $vr5, %0" : "=m" (dq[16]));
266                 asm volatile("vst $vr6, %0" :     266                 asm volatile("vst $vr6, %0" : "=m" (dq[32]));
267                 asm volatile("vst $vr7, %0" :     267                 asm volatile("vst $vr7, %0" : "=m" (dq[48]));
268                                                   268 
269                 /* vr0 - vr3: P + Dx + Dx = P     269                 /* vr0 - vr3: P + Dx + Dx = P */
270                 asm volatile("vxor.v $vr0, $vr    270                 asm volatile("vxor.v $vr0, $vr0, $vr4");
271                 asm volatile("vxor.v $vr1, $vr    271                 asm volatile("vxor.v $vr1, $vr1, $vr5");
272                 asm volatile("vxor.v $vr2, $vr    272                 asm volatile("vxor.v $vr2, $vr2, $vr6");
273                 asm volatile("vxor.v $vr3, $vr    273                 asm volatile("vxor.v $vr3, $vr3, $vr7");
274                 asm volatile("vst $vr0, %0" :     274                 asm volatile("vst $vr0, %0" : "=m" (p[0]));
275                 asm volatile("vst $vr1, %0" :     275                 asm volatile("vst $vr1, %0" : "=m" (p[16]));
276                 asm volatile("vst $vr2, %0" :     276                 asm volatile("vst $vr2, %0" : "=m" (p[32]));
277                 asm volatile("vst $vr3, %0" :     277                 asm volatile("vst $vr3, %0" : "=m" (p[48]));
278                                                   278 
279                 bytes -= 64;                      279                 bytes -= 64;
280                 p += 64;                          280                 p += 64;
281                 q += 64;                          281                 q += 64;
282                 dq += 64;                         282                 dq += 64;
283         }                                         283         }
284                                                   284 
285         kernel_fpu_end();                         285         kernel_fpu_end();
286 }                                                 286 }
287                                                   287 
288 const struct raid6_recov_calls raid6_recov_lsx    288 const struct raid6_recov_calls raid6_recov_lsx = {
289         .data2 = raid6_2data_recov_lsx,           289         .data2 = raid6_2data_recov_lsx,
290         .datap = raid6_datap_recov_lsx,           290         .datap = raid6_datap_recov_lsx,
291         .valid = raid6_has_lsx,                   291         .valid = raid6_has_lsx,
292         .name = "lsx",                            292         .name = "lsx",
293         .priority = 1,                            293         .priority = 1,
294 };                                                294 };
295 #endif /* CONFIG_CPU_HAS_LSX */                   295 #endif /* CONFIG_CPU_HAS_LSX */
296                                                   296 
297 #ifdef CONFIG_CPU_HAS_LASX                        297 #ifdef CONFIG_CPU_HAS_LASX
298 static int raid6_has_lasx(void)                   298 static int raid6_has_lasx(void)
299 {                                                 299 {
300         return cpu_has_lasx;                      300         return cpu_has_lasx;
301 }                                                 301 }
302                                                   302 
303 static void raid6_2data_recov_lasx(int disks,     303 static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila,
304                                    int failb,     304                                    int failb, void **ptrs)
305 {                                                 305 {
306         u8 *p, *q, *dp, *dq;                      306         u8 *p, *q, *dp, *dq;
307         const u8 *pbmul;        /* P multiplie    307         const u8 *pbmul;        /* P multiplier table for B data */
308         const u8 *qmul;         /* Q multiplie    308         const u8 *qmul;         /* Q multiplier table (for both) */
309                                                   309 
310         p = (u8 *)ptrs[disks - 2];                310         p = (u8 *)ptrs[disks - 2];
311         q = (u8 *)ptrs[disks - 1];                311         q = (u8 *)ptrs[disks - 1];
312                                                   312 
313         /*                                        313         /*
314          * Compute syndrome with zero for the     314          * Compute syndrome with zero for the missing data pages
315          * Use the dead data pages as temporar    315          * Use the dead data pages as temporary storage for
316          * delta p and delta q                    316          * delta p and delta q
317          */                                       317          */
318         dp = (u8 *)ptrs[faila];                   318         dp = (u8 *)ptrs[faila];
319         ptrs[faila] = (void *)raid6_empty_zero    319         ptrs[faila] = (void *)raid6_empty_zero_page;
320         ptrs[disks - 2] = dp;                     320         ptrs[disks - 2] = dp;
321         dq = (u8 *)ptrs[failb];                   321         dq = (u8 *)ptrs[failb];
322         ptrs[failb] = (void *)raid6_empty_zero    322         ptrs[failb] = (void *)raid6_empty_zero_page;
323         ptrs[disks - 1] = dq;                     323         ptrs[disks - 1] = dq;
324                                                   324 
325         raid6_call.gen_syndrome(disks, bytes,     325         raid6_call.gen_syndrome(disks, bytes, ptrs);
326                                                   326 
327         /* Restore pointer table */               327         /* Restore pointer table */
328         ptrs[faila] = dp;                         328         ptrs[faila] = dp;
329         ptrs[failb] = dq;                         329         ptrs[failb] = dq;
330         ptrs[disks - 2] = p;                      330         ptrs[disks - 2] = p;
331         ptrs[disks - 1] = q;                      331         ptrs[disks - 1] = q;
332                                                   332 
333         /* Now, pick the proper data tables */    333         /* Now, pick the proper data tables */
334         pbmul = raid6_vgfmul[raid6_gfexi[failb    334         pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
335         qmul  = raid6_vgfmul[raid6_gfinv[raid6    335         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
336                                                   336 
337         kernel_fpu_begin();                       337         kernel_fpu_begin();
338                                                   338 
339         /*                                        339         /*
340          * xr20, xr21: qmul                       340          * xr20, xr21: qmul
341          * xr22, xr23: pbmul                      341          * xr22, xr23: pbmul
342          */                                       342          */
343         asm volatile("vld $vr20, %0" : : "m" (    343         asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
344         asm volatile("vld $vr21, %0" : : "m" (    344         asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
345         asm volatile("vld $vr22, %0" : : "m" (    345         asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
346         asm volatile("vld $vr23, %0" : : "m" (    346         asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
347         asm volatile("xvreplve0.q $xr20, $xr20    347         asm volatile("xvreplve0.q $xr20, $xr20");
348         asm volatile("xvreplve0.q $xr21, $xr21    348         asm volatile("xvreplve0.q $xr21, $xr21");
349         asm volatile("xvreplve0.q $xr22, $xr22    349         asm volatile("xvreplve0.q $xr22, $xr22");
350         asm volatile("xvreplve0.q $xr23, $xr23    350         asm volatile("xvreplve0.q $xr23, $xr23");
351                                                   351 
352         while (bytes) {                           352         while (bytes) {
353                 /* xr0, xr1: Q */                 353                 /* xr0, xr1: Q */
354                 asm volatile("xvld $xr0, %0" :    354                 asm volatile("xvld $xr0, %0" : : "m" (q[0]));
355                 asm volatile("xvld $xr1, %0" :    355                 asm volatile("xvld $xr1, %0" : : "m" (q[32]));
356                 /* xr0, xr1: Q + Qxy */           356                 /* xr0, xr1: Q + Qxy */
357                 asm volatile("xvld $xr4, %0" :    357                 asm volatile("xvld $xr4, %0" : : "m" (dq[0]));
358                 asm volatile("xvld $xr5, %0" :    358                 asm volatile("xvld $xr5, %0" : : "m" (dq[32]));
359                 asm volatile("xvxor.v $xr0, $x    359                 asm volatile("xvxor.v $xr0, $xr0, $xr4");
360                 asm volatile("xvxor.v $xr1, $x    360                 asm volatile("xvxor.v $xr1, $xr1, $xr5");
361                 /* xr2, xr3: P */                 361                 /* xr2, xr3: P */
362                 asm volatile("xvld $xr2, %0" :    362                 asm volatile("xvld $xr2, %0" : : "m" (p[0]));
363                 asm volatile("xvld $xr3, %0" :    363                 asm volatile("xvld $xr3, %0" : : "m" (p[32]));
364                 /* xr2, xr3: P + Pxy */           364                 /* xr2, xr3: P + Pxy */
365                 asm volatile("xvld $xr4, %0" :    365                 asm volatile("xvld $xr4, %0" : : "m" (dp[0]));
366                 asm volatile("xvld $xr5, %0" :    366                 asm volatile("xvld $xr5, %0" : : "m" (dp[32]));
367                 asm volatile("xvxor.v $xr2, $x    367                 asm volatile("xvxor.v $xr2, $xr2, $xr4");
368                 asm volatile("xvxor.v $xr3, $x    368                 asm volatile("xvxor.v $xr3, $xr3, $xr5");
369                                                   369 
370                 /* xr4, xr5: higher 4 bits of     370                 /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */
371                 asm volatile("xvsrli.b $xr4, $    371                 asm volatile("xvsrli.b $xr4, $xr0, 4");
372                 asm volatile("xvsrli.b $xr5, $    372                 asm volatile("xvsrli.b $xr5, $xr1, 4");
373                 /* xr0, xr1: lower 4 bits of e    373                 /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */
374                 asm volatile("xvandi.b $xr0, $    374                 asm volatile("xvandi.b $xr0, $xr0, 0x0f");
375                 asm volatile("xvandi.b $xr1, $    375                 asm volatile("xvandi.b $xr1, $xr1, 0x0f");
376                 /* lookup from qmul[0] */         376                 /* lookup from qmul[0] */
377                 asm volatile("xvshuf.b $xr0, $    377                 asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0");
378                 asm volatile("xvshuf.b $xr1, $    378                 asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1");
379                 /* lookup from qmul[16] */        379                 /* lookup from qmul[16] */
380                 asm volatile("xvshuf.b $xr4, $    380                 asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4");
381                 asm volatile("xvshuf.b $xr5, $    381                 asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5");
382                 /* xr6, xr7: B(Q + Qxy) */        382                 /* xr6, xr7: B(Q + Qxy) */
383                 asm volatile("xvxor.v $xr6, $x    383                 asm volatile("xvxor.v $xr6, $xr4, $xr0");
384                 asm volatile("xvxor.v $xr7, $x    384                 asm volatile("xvxor.v $xr7, $xr5, $xr1");
385                                                   385 
386                 /* xr4, xr5: higher 4 bits of     386                 /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */
387                 asm volatile("xvsrli.b $xr4, $    387                 asm volatile("xvsrli.b $xr4, $xr2, 4");
388                 asm volatile("xvsrli.b $xr5, $    388                 asm volatile("xvsrli.b $xr5, $xr3, 4");
389                 /* xr0, xr1: lower 4 bits of e    389                 /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */
390                 asm volatile("xvandi.b $xr0, $    390                 asm volatile("xvandi.b $xr0, $xr2, 0x0f");
391                 asm volatile("xvandi.b $xr1, $    391                 asm volatile("xvandi.b $xr1, $xr3, 0x0f");
392                 /* lookup from pbmul[0] */        392                 /* lookup from pbmul[0] */
393                 asm volatile("xvshuf.b $xr0, $    393                 asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0");
394                 asm volatile("xvshuf.b $xr1, $    394                 asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1");
395                 /* lookup from pbmul[16] */       395                 /* lookup from pbmul[16] */
396                 asm volatile("xvshuf.b $xr4, $    396                 asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
397                 asm volatile("xvshuf.b $xr5, $    397                 asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
398                 /* xr0, xr1: A(P + Pxy) */        398                 /* xr0, xr1: A(P + Pxy) */
399                 asm volatile("xvxor.v $xr0, $x    399                 asm volatile("xvxor.v $xr0, $xr0, $xr4");
400                 asm volatile("xvxor.v $xr1, $x    400                 asm volatile("xvxor.v $xr1, $xr1, $xr5");
401                                                   401 
402                 /* xr0, xr1: A(P + Pxy) + B(Q     402                 /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */
403                 asm volatile("xvxor.v $xr0, $x    403                 asm volatile("xvxor.v $xr0, $xr0, $xr6");
404                 asm volatile("xvxor.v $xr1, $x    404                 asm volatile("xvxor.v $xr1, $xr1, $xr7");
405                                                   405 
406                 /* xr2, xr3: P + Pxy + Dx = Dy    406                 /* xr2, xr3: P + Pxy + Dx = Dy */
407                 asm volatile("xvxor.v $xr2, $x    407                 asm volatile("xvxor.v $xr2, $xr2, $xr0");
408                 asm volatile("xvxor.v $xr3, $x    408                 asm volatile("xvxor.v $xr3, $xr3, $xr1");
409                                                   409 
410                 asm volatile("xvst $xr0, %0" :    410                 asm volatile("xvst $xr0, %0" : "=m" (dq[0]));
411                 asm volatile("xvst $xr1, %0" :    411                 asm volatile("xvst $xr1, %0" : "=m" (dq[32]));
412                 asm volatile("xvst $xr2, %0" :    412                 asm volatile("xvst $xr2, %0" : "=m" (dp[0]));
413                 asm volatile("xvst $xr3, %0" :    413                 asm volatile("xvst $xr3, %0" : "=m" (dp[32]));
414                                                   414 
415                 bytes -= 64;                      415                 bytes -= 64;
416                 p += 64;                          416                 p += 64;
417                 q += 64;                          417                 q += 64;
418                 dp += 64;                         418                 dp += 64;
419                 dq += 64;                         419                 dq += 64;
420         }                                         420         }
421                                                   421 
422         kernel_fpu_end();                         422         kernel_fpu_end();
423 }                                                 423 }
424                                                   424 
425 static void raid6_datap_recov_lasx(int disks,     425 static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila,
426                                    void **ptrs    426                                    void **ptrs)
427 {                                                 427 {
428         u8 *p, *q, *dq;                           428         u8 *p, *q, *dq;
429         const u8 *qmul;         /* Q multiplie    429         const u8 *qmul;         /* Q multiplier table */
430                                                   430 
431         p = (u8 *)ptrs[disks - 2];                431         p = (u8 *)ptrs[disks - 2];
432         q = (u8 *)ptrs[disks - 1];                432         q = (u8 *)ptrs[disks - 1];
433                                                   433 
434         /*                                        434         /*
435          * Compute syndrome with zero for the     435          * Compute syndrome with zero for the missing data page
436          * Use the dead data page as temporary    436          * Use the dead data page as temporary storage for delta q
437          */                                       437          */
438         dq = (u8 *)ptrs[faila];                   438         dq = (u8 *)ptrs[faila];
439         ptrs[faila] = (void *)raid6_empty_zero    439         ptrs[faila] = (void *)raid6_empty_zero_page;
440         ptrs[disks - 1] = dq;                     440         ptrs[disks - 1] = dq;
441                                                   441 
442         raid6_call.gen_syndrome(disks, bytes,     442         raid6_call.gen_syndrome(disks, bytes, ptrs);
443                                                   443 
444         /* Restore pointer table */               444         /* Restore pointer table */
445         ptrs[faila] = dq;                         445         ptrs[faila] = dq;
446         ptrs[disks - 1] = q;                      446         ptrs[disks - 1] = q;
447                                                   447 
448         /* Now, pick the proper data tables */    448         /* Now, pick the proper data tables */
449         qmul  = raid6_vgfmul[raid6_gfinv[raid6    449         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
450                                                   450 
451         kernel_fpu_begin();                       451         kernel_fpu_begin();
452                                                   452 
453         /* xr22, xr23: qmul */                    453         /* xr22, xr23: qmul */
454         asm volatile("vld $vr22, %0" : : "m" (    454         asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
455         asm volatile("xvreplve0.q $xr22, $xr22    455         asm volatile("xvreplve0.q $xr22, $xr22");
456         asm volatile("vld $vr23, %0" : : "m" (    456         asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
457         asm volatile("xvreplve0.q $xr23, $xr23    457         asm volatile("xvreplve0.q $xr23, $xr23");
458                                                   458 
459         while (bytes) {                           459         while (bytes) {
460                 /* xr0, xr1: P + Dx */            460                 /* xr0, xr1: P + Dx */
461                 asm volatile("xvld $xr0, %0" :    461                 asm volatile("xvld $xr0, %0" : : "m" (p[0]));
462                 asm volatile("xvld $xr1, %0" :    462                 asm volatile("xvld $xr1, %0" : : "m" (p[32]));
463                 /* xr2, xr3: Qx */                463                 /* xr2, xr3: Qx */
464                 asm volatile("xvld $xr2, %0" :    464                 asm volatile("xvld $xr2, %0" : : "m" (dq[0]));
465                 asm volatile("xvld $xr3, %0" :    465                 asm volatile("xvld $xr3, %0" : : "m" (dq[32]));
466                 /* xr2, xr3: Q + Qx */            466                 /* xr2, xr3: Q + Qx */
467                 asm volatile("xvld $xr4, %0" :    467                 asm volatile("xvld $xr4, %0" : : "m" (q[0]));
468                 asm volatile("xvld $xr5, %0" :    468                 asm volatile("xvld $xr5, %0" : : "m" (q[32]));
469                 asm volatile("xvxor.v $xr2, $x    469                 asm volatile("xvxor.v $xr2, $xr2, $xr4");
470                 asm volatile("xvxor.v $xr3, $x    470                 asm volatile("xvxor.v $xr3, $xr3, $xr5");
471                                                   471 
472                 /* xr4, xr5: higher 4 bits of     472                 /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */
473                 asm volatile("xvsrli.b $xr4, $    473                 asm volatile("xvsrli.b $xr4, $xr2, 4");
474                 asm volatile("xvsrli.b $xr5, $    474                 asm volatile("xvsrli.b $xr5, $xr3, 4");
475                 /* xr2, xr3: lower 4 bits of e    475                 /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */
476                 asm volatile("xvandi.b $xr2, $    476                 asm volatile("xvandi.b $xr2, $xr2, 0x0f");
477                 asm volatile("xvandi.b $xr3, $    477                 asm volatile("xvandi.b $xr3, $xr3, 0x0f");
478                 /* lookup from qmul[0] */         478                 /* lookup from qmul[0] */
479                 asm volatile("xvshuf.b $xr2, $    479                 asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2");
480                 asm volatile("xvshuf.b $xr3, $    480                 asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3");
481                 /* lookup from qmul[16] */        481                 /* lookup from qmul[16] */
482                 asm volatile("xvshuf.b $xr4, $    482                 asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
483                 asm volatile("xvshuf.b $xr5, $    483                 asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
484                 /* xr2, xr3: qmul(Q + Qx) = Dx    484                 /* xr2, xr3: qmul(Q + Qx) = Dx */
485                 asm volatile("xvxor.v $xr2, $x    485                 asm volatile("xvxor.v $xr2, $xr2, $xr4");
486                 asm volatile("xvxor.v $xr3, $x    486                 asm volatile("xvxor.v $xr3, $xr3, $xr5");
487                                                   487 
488                 /* xr0, xr1: P + Dx + Dx = P *    488                 /* xr0, xr1: P + Dx + Dx = P */
489                 asm volatile("xvxor.v $xr0, $x    489                 asm volatile("xvxor.v $xr0, $xr0, $xr2");
490                 asm volatile("xvxor.v $xr1, $x    490                 asm volatile("xvxor.v $xr1, $xr1, $xr3");
491                                                   491 
492                 asm volatile("xvst $xr2, %0" :    492                 asm volatile("xvst $xr2, %0" : "=m" (dq[0]));
493                 asm volatile("xvst $xr3, %0" :    493                 asm volatile("xvst $xr3, %0" : "=m" (dq[32]));
494                 asm volatile("xvst $xr0, %0" :    494                 asm volatile("xvst $xr0, %0" : "=m" (p[0]));
495                 asm volatile("xvst $xr1, %0" :    495                 asm volatile("xvst $xr1, %0" : "=m" (p[32]));
496                                                   496 
497                 bytes -= 64;                      497                 bytes -= 64;
498                 p += 64;                          498                 p += 64;
499                 q += 64;                          499                 q += 64;
500                 dq += 64;                         500                 dq += 64;
501         }                                         501         }
502                                                   502 
503         kernel_fpu_end();                         503         kernel_fpu_end();
504 }                                                 504 }
505                                                   505 
506 const struct raid6_recov_calls raid6_recov_las    506 const struct raid6_recov_calls raid6_recov_lasx = {
507         .data2 = raid6_2data_recov_lasx,          507         .data2 = raid6_2data_recov_lasx,
508         .datap = raid6_datap_recov_lasx,          508         .datap = raid6_datap_recov_lasx,
509         .valid = raid6_has_lasx,                  509         .valid = raid6_has_lasx,
510         .name = "lasx",                           510         .name = "lasx",
511         .priority = 2,                            511         .priority = 2,
512 };                                                512 };
513 #endif /* CONFIG_CPU_HAS_LASX */                  513 #endif /* CONFIG_CPU_HAS_LASX */
514                                                   514 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php