~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/lib/raid6/recov_loongarch_simd.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX)
  4  *
  5  * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
  6  *
  7  * Originally based on recov_avx2.c and recov_ssse3.c:
  8  *
  9  * Copyright (C) 2012 Intel Corporation
 10  * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
 11  */
 12 
 13 #include <linux/raid/pq.h>
 14 #include "loongarch.h"
 15 
 16 /*
 17  * Unlike with the syndrome calculation algorithms, there's no boot-time
 18  * selection of recovery algorithms by benchmarking, so we have to specify
 19  * the priorities and hope the future cores will all have decent vector
 20  * support (i.e. no LASX slower than LSX, or even scalar code).
 21  */
 22 
 23 #ifdef CONFIG_CPU_HAS_LSX
 24 static int raid6_has_lsx(void)
 25 {
 26         return cpu_has_lsx;
 27 }
 28 
 29 static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila,
 30                                   int failb, void **ptrs)
 31 {
 32         u8 *p, *q, *dp, *dq;
 33         const u8 *pbmul;        /* P multiplier table for B data */
 34         const u8 *qmul;         /* Q multiplier table (for both) */
 35 
 36         p = (u8 *)ptrs[disks - 2];
 37         q = (u8 *)ptrs[disks - 1];
 38 
 39         /*
 40          * Compute syndrome with zero for the missing data pages
 41          * Use the dead data pages as temporary storage for
 42          * delta p and delta q
 43          */
 44         dp = (u8 *)ptrs[faila];
 45         ptrs[faila] = (void *)raid6_empty_zero_page;
 46         ptrs[disks - 2] = dp;
 47         dq = (u8 *)ptrs[failb];
 48         ptrs[failb] = (void *)raid6_empty_zero_page;
 49         ptrs[disks - 1] = dq;
 50 
 51         raid6_call.gen_syndrome(disks, bytes, ptrs);
 52 
 53         /* Restore pointer table */
 54         ptrs[faila] = dp;
 55         ptrs[failb] = dq;
 56         ptrs[disks - 2] = p;
 57         ptrs[disks - 1] = q;
 58 
 59         /* Now, pick the proper data tables */
 60         pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
 61         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
 62 
 63         kernel_fpu_begin();
 64 
 65         /*
 66          * vr20, vr21: qmul
 67          * vr22, vr23: pbmul
 68          */
 69         asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
 70         asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
 71         asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
 72         asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
 73 
 74         while (bytes) {
 75                 /* vr4 - vr7: Q */
 76                 asm volatile("vld $vr4, %0" : : "m" (q[0]));
 77                 asm volatile("vld $vr5, %0" : : "m" (q[16]));
 78                 asm volatile("vld $vr6, %0" : : "m" (q[32]));
 79                 asm volatile("vld $vr7, %0" : : "m" (q[48]));
 80                 /*  vr4 - vr7: Q + Qxy */
 81                 asm volatile("vld $vr8, %0" : : "m" (dq[0]));
 82                 asm volatile("vld $vr9, %0" : : "m" (dq[16]));
 83                 asm volatile("vld $vr10, %0" : : "m" (dq[32]));
 84                 asm volatile("vld $vr11, %0" : : "m" (dq[48]));
 85                 asm volatile("vxor.v $vr4, $vr4, $vr8");
 86                 asm volatile("vxor.v $vr5, $vr5, $vr9");
 87                 asm volatile("vxor.v $vr6, $vr6, $vr10");
 88                 asm volatile("vxor.v $vr7, $vr7, $vr11");
 89                 /* vr0 - vr3: P */
 90                 asm volatile("vld $vr0, %0" : : "m" (p[0]));
 91                 asm volatile("vld $vr1, %0" : : "m" (p[16]));
 92                 asm volatile("vld $vr2, %0" : : "m" (p[32]));
 93                 asm volatile("vld $vr3, %0" : : "m" (p[48]));
 94                 /* vr0 - vr3: P + Pxy */
 95                 asm volatile("vld $vr8, %0" : : "m" (dp[0]));
 96                 asm volatile("vld $vr9, %0" : : "m" (dp[16]));
 97                 asm volatile("vld $vr10, %0" : : "m" (dp[32]));
 98                 asm volatile("vld $vr11, %0" : : "m" (dp[48]));
 99                 asm volatile("vxor.v $vr0, $vr0, $vr8");
100                 asm volatile("vxor.v $vr1, $vr1, $vr9");
101                 asm volatile("vxor.v $vr2, $vr2, $vr10");
102                 asm volatile("vxor.v $vr3, $vr3, $vr11");
103 
104                 /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */
105                 asm volatile("vsrli.b $vr8, $vr4, 4");
106                 asm volatile("vsrli.b $vr9, $vr5, 4");
107                 asm volatile("vsrli.b $vr10, $vr6, 4");
108                 asm volatile("vsrli.b $vr11, $vr7, 4");
109                 /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */
110                 asm volatile("vandi.b $vr4, $vr4, 0x0f");
111                 asm volatile("vandi.b $vr5, $vr5, 0x0f");
112                 asm volatile("vandi.b $vr6, $vr6, 0x0f");
113                 asm volatile("vandi.b $vr7, $vr7, 0x0f");
114                 /* lookup from qmul[0] */
115                 asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4");
116                 asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5");
117                 asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6");
118                 asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7");
119                 /* lookup from qmul[16] */
120                 asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8");
121                 asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9");
122                 asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10");
123                 asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11");
124                 /* vr16 - vr19: B(Q + Qxy) */
125                 asm volatile("vxor.v $vr16, $vr8, $vr4");
126                 asm volatile("vxor.v $vr17, $vr9, $vr5");
127                 asm volatile("vxor.v $vr18, $vr10, $vr6");
128                 asm volatile("vxor.v $vr19, $vr11, $vr7");
129 
130                 /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */
131                 asm volatile("vsrli.b $vr4, $vr0, 4");
132                 asm volatile("vsrli.b $vr5, $vr1, 4");
133                 asm volatile("vsrli.b $vr6, $vr2, 4");
134                 asm volatile("vsrli.b $vr7, $vr3, 4");
135                 /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */
136                 asm volatile("vandi.b $vr12, $vr0, 0x0f");
137                 asm volatile("vandi.b $vr13, $vr1, 0x0f");
138                 asm volatile("vandi.b $vr14, $vr2, 0x0f");
139                 asm volatile("vandi.b $vr15, $vr3, 0x0f");
140                 /* lookup from pbmul[0] */
141                 asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12");
142                 asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13");
143                 asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14");
144                 asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15");
145                 /* lookup from pbmul[16] */
146                 asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4");
147                 asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5");
148                 asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6");
149                 asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7");
150                 /* vr4 - vr7: A(P + Pxy) */
151                 asm volatile("vxor.v $vr4, $vr4, $vr12");
152                 asm volatile("vxor.v $vr5, $vr5, $vr13");
153                 asm volatile("vxor.v $vr6, $vr6, $vr14");
154                 asm volatile("vxor.v $vr7, $vr7, $vr15");
155 
156                 /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */
157                 asm volatile("vxor.v $vr4, $vr4, $vr16");
158                 asm volatile("vxor.v $vr5, $vr5, $vr17");
159                 asm volatile("vxor.v $vr6, $vr6, $vr18");
160                 asm volatile("vxor.v $vr7, $vr7, $vr19");
161                 asm volatile("vst $vr4, %0" : "=m" (dq[0]));
162                 asm volatile("vst $vr5, %0" : "=m" (dq[16]));
163                 asm volatile("vst $vr6, %0" : "=m" (dq[32]));
164                 asm volatile("vst $vr7, %0" : "=m" (dq[48]));
165 
166                 /* vr0 - vr3: P + Pxy + Dx = Dy */
167                 asm volatile("vxor.v $vr0, $vr0, $vr4");
168                 asm volatile("vxor.v $vr1, $vr1, $vr5");
169                 asm volatile("vxor.v $vr2, $vr2, $vr6");
170                 asm volatile("vxor.v $vr3, $vr3, $vr7");
171                 asm volatile("vst $vr0, %0" : "=m" (dp[0]));
172                 asm volatile("vst $vr1, %0" : "=m" (dp[16]));
173                 asm volatile("vst $vr2, %0" : "=m" (dp[32]));
174                 asm volatile("vst $vr3, %0" : "=m" (dp[48]));
175 
176                 bytes -= 64;
177                 p += 64;
178                 q += 64;
179                 dp += 64;
180                 dq += 64;
181         }
182 
183         kernel_fpu_end();
184 }
185 
186 static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila,
187                                   void **ptrs)
188 {
189         u8 *p, *q, *dq;
190         const u8 *qmul;         /* Q multiplier table */
191 
192         p = (u8 *)ptrs[disks - 2];
193         q = (u8 *)ptrs[disks - 1];
194 
195         /*
196          * Compute syndrome with zero for the missing data page
197          * Use the dead data page as temporary storage for delta q
198          */
199         dq = (u8 *)ptrs[faila];
200         ptrs[faila] = (void *)raid6_empty_zero_page;
201         ptrs[disks - 1] = dq;
202 
203         raid6_call.gen_syndrome(disks, bytes, ptrs);
204 
205         /* Restore pointer table */
206         ptrs[faila] = dq;
207         ptrs[disks - 1] = q;
208 
209         /* Now, pick the proper data tables */
210         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
211 
212         kernel_fpu_begin();
213 
214         /* vr22, vr23: qmul */
215         asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
216         asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
217 
218         while (bytes) {
219                 /* vr0 - vr3: P + Dx */
220                 asm volatile("vld $vr0, %0" : : "m" (p[0]));
221                 asm volatile("vld $vr1, %0" : : "m" (p[16]));
222                 asm volatile("vld $vr2, %0" : : "m" (p[32]));
223                 asm volatile("vld $vr3, %0" : : "m" (p[48]));
224                 /* vr4 - vr7: Qx */
225                 asm volatile("vld $vr4, %0" : : "m" (dq[0]));
226                 asm volatile("vld $vr5, %0" : : "m" (dq[16]));
227                 asm volatile("vld $vr6, %0" : : "m" (dq[32]));
228                 asm volatile("vld $vr7, %0" : : "m" (dq[48]));
229                 /* vr4 - vr7: Q + Qx */
230                 asm volatile("vld $vr8, %0" : : "m" (q[0]));
231                 asm volatile("vld $vr9, %0" : : "m" (q[16]));
232                 asm volatile("vld $vr10, %0" : : "m" (q[32]));
233                 asm volatile("vld $vr11, %0" : : "m" (q[48]));
234                 asm volatile("vxor.v $vr4, $vr4, $vr8");
235                 asm volatile("vxor.v $vr5, $vr5, $vr9");
236                 asm volatile("vxor.v $vr6, $vr6, $vr10");
237                 asm volatile("vxor.v $vr7, $vr7, $vr11");
238 
239                 /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */
240                 asm volatile("vsrli.b $vr8, $vr4, 4");
241                 asm volatile("vsrli.b $vr9, $vr5, 4");
242                 asm volatile("vsrli.b $vr10, $vr6, 4");
243                 asm volatile("vsrli.b $vr11, $vr7, 4");
244                 /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */
245                 asm volatile("vandi.b $vr4, $vr4, 0x0f");
246                 asm volatile("vandi.b $vr5, $vr5, 0x0f");
247                 asm volatile("vandi.b $vr6, $vr6, 0x0f");
248                 asm volatile("vandi.b $vr7, $vr7, 0x0f");
249                 /* lookup from qmul[0] */
250                 asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4");
251                 asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5");
252                 asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6");
253                 asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7");
254                 /* lookup from qmul[16] */
255                 asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8");
256                 asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9");
257                 asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10");
258                 asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11");
259                 /* vr4 - vr7: qmul(Q + Qx) = Dx */
260                 asm volatile("vxor.v $vr4, $vr4, $vr8");
261                 asm volatile("vxor.v $vr5, $vr5, $vr9");
262                 asm volatile("vxor.v $vr6, $vr6, $vr10");
263                 asm volatile("vxor.v $vr7, $vr7, $vr11");
264                 asm volatile("vst $vr4, %0" : "=m" (dq[0]));
265                 asm volatile("vst $vr5, %0" : "=m" (dq[16]));
266                 asm volatile("vst $vr6, %0" : "=m" (dq[32]));
267                 asm volatile("vst $vr7, %0" : "=m" (dq[48]));
268 
269                 /* vr0 - vr3: P + Dx + Dx = P */
270                 asm volatile("vxor.v $vr0, $vr0, $vr4");
271                 asm volatile("vxor.v $vr1, $vr1, $vr5");
272                 asm volatile("vxor.v $vr2, $vr2, $vr6");
273                 asm volatile("vxor.v $vr3, $vr3, $vr7");
274                 asm volatile("vst $vr0, %0" : "=m" (p[0]));
275                 asm volatile("vst $vr1, %0" : "=m" (p[16]));
276                 asm volatile("vst $vr2, %0" : "=m" (p[32]));
277                 asm volatile("vst $vr3, %0" : "=m" (p[48]));
278 
279                 bytes -= 64;
280                 p += 64;
281                 q += 64;
282                 dq += 64;
283         }
284 
285         kernel_fpu_end();
286 }
287 
288 const struct raid6_recov_calls raid6_recov_lsx = {
289         .data2 = raid6_2data_recov_lsx,
290         .datap = raid6_datap_recov_lsx,
291         .valid = raid6_has_lsx,
292         .name = "lsx",
293         .priority = 1,
294 };
295 #endif /* CONFIG_CPU_HAS_LSX */
296 
297 #ifdef CONFIG_CPU_HAS_LASX
298 static int raid6_has_lasx(void)
299 {
300         return cpu_has_lasx;
301 }
302 
303 static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila,
304                                    int failb, void **ptrs)
305 {
306         u8 *p, *q, *dp, *dq;
307         const u8 *pbmul;        /* P multiplier table for B data */
308         const u8 *qmul;         /* Q multiplier table (for both) */
309 
310         p = (u8 *)ptrs[disks - 2];
311         q = (u8 *)ptrs[disks - 1];
312 
313         /*
314          * Compute syndrome with zero for the missing data pages
315          * Use the dead data pages as temporary storage for
316          * delta p and delta q
317          */
318         dp = (u8 *)ptrs[faila];
319         ptrs[faila] = (void *)raid6_empty_zero_page;
320         ptrs[disks - 2] = dp;
321         dq = (u8 *)ptrs[failb];
322         ptrs[failb] = (void *)raid6_empty_zero_page;
323         ptrs[disks - 1] = dq;
324 
325         raid6_call.gen_syndrome(disks, bytes, ptrs);
326 
327         /* Restore pointer table */
328         ptrs[faila] = dp;
329         ptrs[failb] = dq;
330         ptrs[disks - 2] = p;
331         ptrs[disks - 1] = q;
332 
333         /* Now, pick the proper data tables */
334         pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
335         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
336 
337         kernel_fpu_begin();
338 
339         /*
340          * xr20, xr21: qmul
341          * xr22, xr23: pbmul
342          */
343         asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
344         asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
345         asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
346         asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
347         asm volatile("xvreplve0.q $xr20, $xr20");
348         asm volatile("xvreplve0.q $xr21, $xr21");
349         asm volatile("xvreplve0.q $xr22, $xr22");
350         asm volatile("xvreplve0.q $xr23, $xr23");
351 
352         while (bytes) {
353                 /* xr0, xr1: Q */
354                 asm volatile("xvld $xr0, %0" : : "m" (q[0]));
355                 asm volatile("xvld $xr1, %0" : : "m" (q[32]));
356                 /* xr0, xr1: Q + Qxy */
357                 asm volatile("xvld $xr4, %0" : : "m" (dq[0]));
358                 asm volatile("xvld $xr5, %0" : : "m" (dq[32]));
359                 asm volatile("xvxor.v $xr0, $xr0, $xr4");
360                 asm volatile("xvxor.v $xr1, $xr1, $xr5");
361                 /* xr2, xr3: P */
362                 asm volatile("xvld $xr2, %0" : : "m" (p[0]));
363                 asm volatile("xvld $xr3, %0" : : "m" (p[32]));
364                 /* xr2, xr3: P + Pxy */
365                 asm volatile("xvld $xr4, %0" : : "m" (dp[0]));
366                 asm volatile("xvld $xr5, %0" : : "m" (dp[32]));
367                 asm volatile("xvxor.v $xr2, $xr2, $xr4");
368                 asm volatile("xvxor.v $xr3, $xr3, $xr5");
369 
370                 /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */
371                 asm volatile("xvsrli.b $xr4, $xr0, 4");
372                 asm volatile("xvsrli.b $xr5, $xr1, 4");
373                 /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */
374                 asm volatile("xvandi.b $xr0, $xr0, 0x0f");
375                 asm volatile("xvandi.b $xr1, $xr1, 0x0f");
376                 /* lookup from qmul[0] */
377                 asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0");
378                 asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1");
379                 /* lookup from qmul[16] */
380                 asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4");
381                 asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5");
382                 /* xr6, xr7: B(Q + Qxy) */
383                 asm volatile("xvxor.v $xr6, $xr4, $xr0");
384                 asm volatile("xvxor.v $xr7, $xr5, $xr1");
385 
386                 /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */
387                 asm volatile("xvsrli.b $xr4, $xr2, 4");
388                 asm volatile("xvsrli.b $xr5, $xr3, 4");
389                 /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */
390                 asm volatile("xvandi.b $xr0, $xr2, 0x0f");
391                 asm volatile("xvandi.b $xr1, $xr3, 0x0f");
392                 /* lookup from pbmul[0] */
393                 asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0");
394                 asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1");
395                 /* lookup from pbmul[16] */
396                 asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
397                 asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
398                 /* xr0, xr1: A(P + Pxy) */
399                 asm volatile("xvxor.v $xr0, $xr0, $xr4");
400                 asm volatile("xvxor.v $xr1, $xr1, $xr5");
401 
402                 /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */
403                 asm volatile("xvxor.v $xr0, $xr0, $xr6");
404                 asm volatile("xvxor.v $xr1, $xr1, $xr7");
405 
406                 /* xr2, xr3: P + Pxy + Dx = Dy */
407                 asm volatile("xvxor.v $xr2, $xr2, $xr0");
408                 asm volatile("xvxor.v $xr3, $xr3, $xr1");
409 
410                 asm volatile("xvst $xr0, %0" : "=m" (dq[0]));
411                 asm volatile("xvst $xr1, %0" : "=m" (dq[32]));
412                 asm volatile("xvst $xr2, %0" : "=m" (dp[0]));
413                 asm volatile("xvst $xr3, %0" : "=m" (dp[32]));
414 
415                 bytes -= 64;
416                 p += 64;
417                 q += 64;
418                 dp += 64;
419                 dq += 64;
420         }
421 
422         kernel_fpu_end();
423 }
424 
425 static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila,
426                                    void **ptrs)
427 {
428         u8 *p, *q, *dq;
429         const u8 *qmul;         /* Q multiplier table */
430 
431         p = (u8 *)ptrs[disks - 2];
432         q = (u8 *)ptrs[disks - 1];
433 
434         /*
435          * Compute syndrome with zero for the missing data page
436          * Use the dead data page as temporary storage for delta q
437          */
438         dq = (u8 *)ptrs[faila];
439         ptrs[faila] = (void *)raid6_empty_zero_page;
440         ptrs[disks - 1] = dq;
441 
442         raid6_call.gen_syndrome(disks, bytes, ptrs);
443 
444         /* Restore pointer table */
445         ptrs[faila] = dq;
446         ptrs[disks - 1] = q;
447 
448         /* Now, pick the proper data tables */
449         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
450 
451         kernel_fpu_begin();
452 
453         /* xr22, xr23: qmul */
454         asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
455         asm volatile("xvreplve0.q $xr22, $xr22");
456         asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
457         asm volatile("xvreplve0.q $xr23, $xr23");
458 
459         while (bytes) {
460                 /* xr0, xr1: P + Dx */
461                 asm volatile("xvld $xr0, %0" : : "m" (p[0]));
462                 asm volatile("xvld $xr1, %0" : : "m" (p[32]));
463                 /* xr2, xr3: Qx */
464                 asm volatile("xvld $xr2, %0" : : "m" (dq[0]));
465                 asm volatile("xvld $xr3, %0" : : "m" (dq[32]));
466                 /* xr2, xr3: Q + Qx */
467                 asm volatile("xvld $xr4, %0" : : "m" (q[0]));
468                 asm volatile("xvld $xr5, %0" : : "m" (q[32]));
469                 asm volatile("xvxor.v $xr2, $xr2, $xr4");
470                 asm volatile("xvxor.v $xr3, $xr3, $xr5");
471 
472                 /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */
473                 asm volatile("xvsrli.b $xr4, $xr2, 4");
474                 asm volatile("xvsrli.b $xr5, $xr3, 4");
475                 /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */
476                 asm volatile("xvandi.b $xr2, $xr2, 0x0f");
477                 asm volatile("xvandi.b $xr3, $xr3, 0x0f");
478                 /* lookup from qmul[0] */
479                 asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2");
480                 asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3");
481                 /* lookup from qmul[16] */
482                 asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
483                 asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
484                 /* xr2, xr3: qmul(Q + Qx) = Dx */
485                 asm volatile("xvxor.v $xr2, $xr2, $xr4");
486                 asm volatile("xvxor.v $xr3, $xr3, $xr5");
487 
488                 /* xr0, xr1: P + Dx + Dx = P */
489                 asm volatile("xvxor.v $xr0, $xr0, $xr2");
490                 asm volatile("xvxor.v $xr1, $xr1, $xr3");
491 
492                 asm volatile("xvst $xr2, %0" : "=m" (dq[0]));
493                 asm volatile("xvst $xr3, %0" : "=m" (dq[32]));
494                 asm volatile("xvst $xr0, %0" : "=m" (p[0]));
495                 asm volatile("xvst $xr1, %0" : "=m" (p[32]));
496 
497                 bytes -= 64;
498                 p += 64;
499                 q += 64;
500                 dq += 64;
501         }
502 
503         kernel_fpu_end();
504 }
505 
506 const struct raid6_recov_calls raid6_recov_lasx = {
507         .data2 = raid6_2data_recov_lasx,
508         .datap = raid6_datap_recov_lasx,
509         .valid = raid6_has_lasx,
510         .name = "lasx",
511         .priority = 2,
512 };
513 #endif /* CONFIG_CPU_HAS_LASX */
514 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php