~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/powerpc/crypto/crc32-vpmsum_core.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0-or-later */
  2 /*
  3  * Core of the accelerated CRC algorithm.
  4  * In your file, define the constants and CRC_FUNCTION_NAME
  5  * Then include this file.
  6  *
  7  * Calculate the checksum of data that is 16 byte aligned and a multiple of
  8  * 16 bytes.
  9  *
 10  * The first step is to reduce it to 1024 bits. We do this in 8 parallel
 11  * chunks in order to mask the latency of the vpmsum instructions. If we
 12  * have more than 32 kB of data to checksum we repeat this step multiple
 13  * times, passing in the previous 1024 bits.
 14  *
 15  * The next step is to reduce the 1024 bits to 64 bits. This step adds
 16  * 32 bits of 0s to the end - this matches what a CRC does. We just
 17  * calculate constants that land the data in this 32 bits.
 18  *
 19  * We then use fixed point Barrett reduction to compute a mod n over GF(2)
 20  * for n = CRC using POWER8 instructions. We use x = 32.
 21  *
 22  * https://en.wikipedia.org/wiki/Barrett_reduction
 23  *
 24  * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
 25 */
 26 
 27 #include <asm/ppc_asm.h>
 28 #include <asm/ppc-opcode.h>
 29 
 30 #define MAX_SIZE        32768
 31 
 32         .text
 33 
 34 #if defined(__BIG_ENDIAN__) && defined(REFLECT)
 35 #define BYTESWAP_DATA
 36 #elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT)
 37 #define BYTESWAP_DATA
 38 #else
 39 #undef BYTESWAP_DATA
 40 #endif
 41 
 42 #define off16           r25
 43 #define off32           r26
 44 #define off48           r27
 45 #define off64           r28
 46 #define off80           r29
 47 #define off96           r30
 48 #define off112          r31
 49 
 50 #define const1          v24
 51 #define const2          v25
 52 
 53 #define byteswap        v26
 54 #define mask_32bit      v27
 55 #define mask_64bit      v28
 56 #define zeroes          v29
 57 
 58 #ifdef BYTESWAP_DATA
 59 #define VPERM(A, B, C, D) vperm A, B, C, D
 60 #else
 61 #define VPERM(A, B, C, D)
 62 #endif
 63 
 64 /* unsigned int CRC_FUNCTION_NAME(unsigned int crc, void *p, unsigned long len) */
 65 FUNC_START(CRC_FUNCTION_NAME)
 66         std     r31,-8(r1)
 67         std     r30,-16(r1)
 68         std     r29,-24(r1)
 69         std     r28,-32(r1)
 70         std     r27,-40(r1)
 71         std     r26,-48(r1)
 72         std     r25,-56(r1)
 73 
 74         li      off16,16
 75         li      off32,32
 76         li      off48,48
 77         li      off64,64
 78         li      off80,80
 79         li      off96,96
 80         li      off112,112
 81         li      r0,0
 82 
 83         /* Enough room for saving 10 non volatile VMX registers */
 84         subi    r6,r1,56+10*16
 85         subi    r7,r1,56+2*16
 86 
 87         stvx    v20,0,r6
 88         stvx    v21,off16,r6
 89         stvx    v22,off32,r6
 90         stvx    v23,off48,r6
 91         stvx    v24,off64,r6
 92         stvx    v25,off80,r6
 93         stvx    v26,off96,r6
 94         stvx    v27,off112,r6
 95         stvx    v28,0,r7
 96         stvx    v29,off16,r7
 97 
 98         mr      r10,r3
 99 
100         vxor    zeroes,zeroes,zeroes
101         vspltisw v0,-1
102 
103         vsldoi  mask_32bit,zeroes,v0,4
104         vsldoi  mask_64bit,zeroes,v0,8
105 
106         /* Get the initial value into v8 */
107         vxor    v8,v8,v8
108         MTVRD(v8, R3)
109 #ifdef REFLECT
110         vsldoi  v8,zeroes,v8,8  /* shift into bottom 32 bits */
111 #else
112         vsldoi  v8,v8,zeroes,4  /* shift into top 32 bits */
113 #endif
114 
115 #ifdef BYTESWAP_DATA
116         LOAD_REG_ADDR(r3, .byteswap_constant)
117         lvx     byteswap,0,r3
118         addi    r3,r3,16
119 #endif
120 
121         cmpdi   r5,256
122         blt     .Lshort
123 
124         rldicr  r6,r5,0,56
125 
126         /* Checksum in blocks of MAX_SIZE */
127 1:      lis     r7,MAX_SIZE@h
128         ori     r7,r7,MAX_SIZE@l
129         mr      r9,r7
130         cmpd    r6,r7
131         bgt     2f
132         mr      r7,r6
133 2:      subf    r6,r7,r6
134 
135         /* our main loop does 128 bytes at a time */
136         srdi    r7,r7,7
137 
138         /*
139          * Work out the offset into the constants table to start at. Each
140          * constant is 16 bytes, and it is used against 128 bytes of input
141          * data - 128 / 16 = 8
142          */
143         sldi    r8,r7,4
144         srdi    r9,r9,3
145         subf    r8,r8,r9
146 
147         /* We reduce our final 128 bytes in a separate step */
148         addi    r7,r7,-1
149         mtctr   r7
150 
151         LOAD_REG_ADDR(r3, .constants)
152 
153         /* Find the start of our constants */
154         add     r3,r3,r8
155 
156         /* zero v0-v7 which will contain our checksums */
157         vxor    v0,v0,v0
158         vxor    v1,v1,v1
159         vxor    v2,v2,v2
160         vxor    v3,v3,v3
161         vxor    v4,v4,v4
162         vxor    v5,v5,v5
163         vxor    v6,v6,v6
164         vxor    v7,v7,v7
165 
166         lvx     const1,0,r3
167 
168         /*
169          * If we are looping back to consume more data we use the values
170          * already in v16-v23.
171          */
172         cmpdi   r0,1
173         beq     2f
174 
175         /* First warm up pass */
176         lvx     v16,0,r4
177         lvx     v17,off16,r4
178         VPERM(v16,v16,v16,byteswap)
179         VPERM(v17,v17,v17,byteswap)
180         lvx     v18,off32,r4
181         lvx     v19,off48,r4
182         VPERM(v18,v18,v18,byteswap)
183         VPERM(v19,v19,v19,byteswap)
184         lvx     v20,off64,r4
185         lvx     v21,off80,r4
186         VPERM(v20,v20,v20,byteswap)
187         VPERM(v21,v21,v21,byteswap)
188         lvx     v22,off96,r4
189         lvx     v23,off112,r4
190         VPERM(v22,v22,v22,byteswap)
191         VPERM(v23,v23,v23,byteswap)
192         addi    r4,r4,8*16
193 
194         /* xor in initial value */
195         vxor    v16,v16,v8
196 
197 2:      bdz     .Lfirst_warm_up_done
198 
199         addi    r3,r3,16
200         lvx     const2,0,r3
201 
202         /* Second warm up pass */
203         VPMSUMD(v8,v16,const1)
204         lvx     v16,0,r4
205         VPERM(v16,v16,v16,byteswap)
206         ori     r2,r2,0
207 
208         VPMSUMD(v9,v17,const1)
209         lvx     v17,off16,r4
210         VPERM(v17,v17,v17,byteswap)
211         ori     r2,r2,0
212 
213         VPMSUMD(v10,v18,const1)
214         lvx     v18,off32,r4
215         VPERM(v18,v18,v18,byteswap)
216         ori     r2,r2,0
217 
218         VPMSUMD(v11,v19,const1)
219         lvx     v19,off48,r4
220         VPERM(v19,v19,v19,byteswap)
221         ori     r2,r2,0
222 
223         VPMSUMD(v12,v20,const1)
224         lvx     v20,off64,r4
225         VPERM(v20,v20,v20,byteswap)
226         ori     r2,r2,0
227 
228         VPMSUMD(v13,v21,const1)
229         lvx     v21,off80,r4
230         VPERM(v21,v21,v21,byteswap)
231         ori     r2,r2,0
232 
233         VPMSUMD(v14,v22,const1)
234         lvx     v22,off96,r4
235         VPERM(v22,v22,v22,byteswap)
236         ori     r2,r2,0
237 
238         VPMSUMD(v15,v23,const1)
239         lvx     v23,off112,r4
240         VPERM(v23,v23,v23,byteswap)
241 
242         addi    r4,r4,8*16
243 
244         bdz     .Lfirst_cool_down
245 
246         /*
247          * main loop. We modulo schedule it such that it takes three iterations
248          * to complete - first iteration load, second iteration vpmsum, third
249          * iteration xor.
250          */
251         .balign 16
252 4:      lvx     const1,0,r3
253         addi    r3,r3,16
254         ori     r2,r2,0
255 
256         vxor    v0,v0,v8
257         VPMSUMD(v8,v16,const2)
258         lvx     v16,0,r4
259         VPERM(v16,v16,v16,byteswap)
260         ori     r2,r2,0
261 
262         vxor    v1,v1,v9
263         VPMSUMD(v9,v17,const2)
264         lvx     v17,off16,r4
265         VPERM(v17,v17,v17,byteswap)
266         ori     r2,r2,0
267 
268         vxor    v2,v2,v10
269         VPMSUMD(v10,v18,const2)
270         lvx     v18,off32,r4
271         VPERM(v18,v18,v18,byteswap)
272         ori     r2,r2,0
273 
274         vxor    v3,v3,v11
275         VPMSUMD(v11,v19,const2)
276         lvx     v19,off48,r4
277         VPERM(v19,v19,v19,byteswap)
278         lvx     const2,0,r3
279         ori     r2,r2,0
280 
281         vxor    v4,v4,v12
282         VPMSUMD(v12,v20,const1)
283         lvx     v20,off64,r4
284         VPERM(v20,v20,v20,byteswap)
285         ori     r2,r2,0
286 
287         vxor    v5,v5,v13
288         VPMSUMD(v13,v21,const1)
289         lvx     v21,off80,r4
290         VPERM(v21,v21,v21,byteswap)
291         ori     r2,r2,0
292 
293         vxor    v6,v6,v14
294         VPMSUMD(v14,v22,const1)
295         lvx     v22,off96,r4
296         VPERM(v22,v22,v22,byteswap)
297         ori     r2,r2,0
298 
299         vxor    v7,v7,v15
300         VPMSUMD(v15,v23,const1)
301         lvx     v23,off112,r4
302         VPERM(v23,v23,v23,byteswap)
303 
304         addi    r4,r4,8*16
305 
306         bdnz    4b
307 
308 .Lfirst_cool_down:
309         /* First cool down pass */
310         lvx     const1,0,r3
311         addi    r3,r3,16
312 
313         vxor    v0,v0,v8
314         VPMSUMD(v8,v16,const1)
315         ori     r2,r2,0
316 
317         vxor    v1,v1,v9
318         VPMSUMD(v9,v17,const1)
319         ori     r2,r2,0
320 
321         vxor    v2,v2,v10
322         VPMSUMD(v10,v18,const1)
323         ori     r2,r2,0
324 
325         vxor    v3,v3,v11
326         VPMSUMD(v11,v19,const1)
327         ori     r2,r2,0
328 
329         vxor    v4,v4,v12
330         VPMSUMD(v12,v20,const1)
331         ori     r2,r2,0
332 
333         vxor    v5,v5,v13
334         VPMSUMD(v13,v21,const1)
335         ori     r2,r2,0
336 
337         vxor    v6,v6,v14
338         VPMSUMD(v14,v22,const1)
339         ori     r2,r2,0
340 
341         vxor    v7,v7,v15
342         VPMSUMD(v15,v23,const1)
343         ori     r2,r2,0
344 
345 .Lsecond_cool_down:
346         /* Second cool down pass */
347         vxor    v0,v0,v8
348         vxor    v1,v1,v9
349         vxor    v2,v2,v10
350         vxor    v3,v3,v11
351         vxor    v4,v4,v12
352         vxor    v5,v5,v13
353         vxor    v6,v6,v14
354         vxor    v7,v7,v15
355 
356 #ifdef REFLECT
357         /*
358          * vpmsumd produces a 96 bit result in the least significant bits
359          * of the register. Since we are bit reflected we have to shift it
360          * left 32 bits so it occupies the least significant bits in the
361          * bit reflected domain.
362          */
363         vsldoi  v0,v0,zeroes,4
364         vsldoi  v1,v1,zeroes,4
365         vsldoi  v2,v2,zeroes,4
366         vsldoi  v3,v3,zeroes,4
367         vsldoi  v4,v4,zeroes,4
368         vsldoi  v5,v5,zeroes,4
369         vsldoi  v6,v6,zeroes,4
370         vsldoi  v7,v7,zeroes,4
371 #endif
372 
373         /* xor with last 1024 bits */
374         lvx     v8,0,r4
375         lvx     v9,off16,r4
376         VPERM(v8,v8,v8,byteswap)
377         VPERM(v9,v9,v9,byteswap)
378         lvx     v10,off32,r4
379         lvx     v11,off48,r4
380         VPERM(v10,v10,v10,byteswap)
381         VPERM(v11,v11,v11,byteswap)
382         lvx     v12,off64,r4
383         lvx     v13,off80,r4
384         VPERM(v12,v12,v12,byteswap)
385         VPERM(v13,v13,v13,byteswap)
386         lvx     v14,off96,r4
387         lvx     v15,off112,r4
388         VPERM(v14,v14,v14,byteswap)
389         VPERM(v15,v15,v15,byteswap)
390 
391         addi    r4,r4,8*16
392 
393         vxor    v16,v0,v8
394         vxor    v17,v1,v9
395         vxor    v18,v2,v10
396         vxor    v19,v3,v11
397         vxor    v20,v4,v12
398         vxor    v21,v5,v13
399         vxor    v22,v6,v14
400         vxor    v23,v7,v15
401 
402         li      r0,1
403         cmpdi   r6,0
404         addi    r6,r6,128
405         bne     1b
406 
407         /* Work out how many bytes we have left */
408         andi.   r5,r5,127
409 
410         /* Calculate where in the constant table we need to start */
411         subfic  r6,r5,128
412         add     r3,r3,r6
413 
414         /* How many 16 byte chunks are in the tail */
415         srdi    r7,r5,4
416         mtctr   r7
417 
418         /*
419          * Reduce the previously calculated 1024 bits to 64 bits, shifting
420          * 32 bits to include the trailing 32 bits of zeros
421          */
422         lvx     v0,0,r3
423         lvx     v1,off16,r3
424         lvx     v2,off32,r3
425         lvx     v3,off48,r3
426         lvx     v4,off64,r3
427         lvx     v5,off80,r3
428         lvx     v6,off96,r3
429         lvx     v7,off112,r3
430         addi    r3,r3,8*16
431 
432         VPMSUMW(v0,v16,v0)
433         VPMSUMW(v1,v17,v1)
434         VPMSUMW(v2,v18,v2)
435         VPMSUMW(v3,v19,v3)
436         VPMSUMW(v4,v20,v4)
437         VPMSUMW(v5,v21,v5)
438         VPMSUMW(v6,v22,v6)
439         VPMSUMW(v7,v23,v7)
440 
441         /* Now reduce the tail (0 - 112 bytes) */
442         cmpdi   r7,0
443         beq     1f
444 
445         lvx     v16,0,r4
446         lvx     v17,0,r3
447         VPERM(v16,v16,v16,byteswap)
448         VPMSUMW(v16,v16,v17)
449         vxor    v0,v0,v16
450         bdz     1f
451 
452         lvx     v16,off16,r4
453         lvx     v17,off16,r3
454         VPERM(v16,v16,v16,byteswap)
455         VPMSUMW(v16,v16,v17)
456         vxor    v0,v0,v16
457         bdz     1f
458 
459         lvx     v16,off32,r4
460         lvx     v17,off32,r3
461         VPERM(v16,v16,v16,byteswap)
462         VPMSUMW(v16,v16,v17)
463         vxor    v0,v0,v16
464         bdz     1f
465 
466         lvx     v16,off48,r4
467         lvx     v17,off48,r3
468         VPERM(v16,v16,v16,byteswap)
469         VPMSUMW(v16,v16,v17)
470         vxor    v0,v0,v16
471         bdz     1f
472 
473         lvx     v16,off64,r4
474         lvx     v17,off64,r3
475         VPERM(v16,v16,v16,byteswap)
476         VPMSUMW(v16,v16,v17)
477         vxor    v0,v0,v16
478         bdz     1f
479 
480         lvx     v16,off80,r4
481         lvx     v17,off80,r3
482         VPERM(v16,v16,v16,byteswap)
483         VPMSUMW(v16,v16,v17)
484         vxor    v0,v0,v16
485         bdz     1f
486 
487         lvx     v16,off96,r4
488         lvx     v17,off96,r3
489         VPERM(v16,v16,v16,byteswap)
490         VPMSUMW(v16,v16,v17)
491         vxor    v0,v0,v16
492 
493         /* Now xor all the parallel chunks together */
494 1:      vxor    v0,v0,v1
495         vxor    v2,v2,v3
496         vxor    v4,v4,v5
497         vxor    v6,v6,v7
498 
499         vxor    v0,v0,v2
500         vxor    v4,v4,v6
501 
502         vxor    v0,v0,v4
503 
504 .Lbarrett_reduction:
505         /* Barrett constants */
506         LOAD_REG_ADDR(r3, .barrett_constants)
507 
508         lvx     const1,0,r3
509         lvx     const2,off16,r3
510 
511         vsldoi  v1,v0,v0,8
512         vxor    v0,v0,v1                /* xor two 64 bit results together */
513 
514 #ifdef REFLECT
515         /* shift left one bit */
516         vspltisb v1,1
517         vsl     v0,v0,v1
518 #endif
519 
520         vand    v0,v0,mask_64bit
521 #ifndef REFLECT
522         /*
523          * Now for the Barrett reduction algorithm. The idea is to calculate q,
524          * the multiple of our polynomial that we need to subtract. By
525          * doing the computation 2x bits higher (ie 64 bits) and shifting the
526          * result back down 2x bits, we round down to the nearest multiple.
527          */
528         VPMSUMD(v1,v0,const1)   /* ma */
529         vsldoi  v1,zeroes,v1,8  /* q = floor(ma/(2^64)) */
530         VPMSUMD(v1,v1,const2)   /* qn */
531         vxor    v0,v0,v1        /* a - qn, subtraction is xor in GF(2) */
532 
533         /*
534          * Get the result into r3. We need to shift it left 8 bytes:
535          * V0 [ 0 1 2 X ]
536          * V0 [ 0 X 2 3 ]
537          */
538         vsldoi  v0,v0,zeroes,8  /* shift result into top 64 bits */
539 #else
540         /*
541          * The reflected version of Barrett reduction. Instead of bit
542          * reflecting our data (which is expensive to do), we bit reflect our
543          * constants and our algorithm, which means the intermediate data in
544          * our vector registers goes from 0-63 instead of 63-0. We can reflect
545          * the algorithm because we don't carry in mod 2 arithmetic.
546          */
547         vand    v1,v0,mask_32bit        /* bottom 32 bits of a */
548         VPMSUMD(v1,v1,const1)           /* ma */
549         vand    v1,v1,mask_32bit        /* bottom 32bits of ma */
550         VPMSUMD(v1,v1,const2)           /* qn */
551         vxor    v0,v0,v1                /* a - qn, subtraction is xor in GF(2) */
552 
553         /*
554          * Since we are bit reflected, the result (ie the low 32 bits) is in
555          * the high 32 bits. We just need to shift it left 4 bytes
556          * V0 [ 0 1 X 3 ]
557          * V0 [ 0 X 2 3 ]
558          */
559         vsldoi  v0,v0,zeroes,4          /* shift result into top 64 bits of */
560 #endif
561 
562         /* Get it into r3 */
563         MFVRD(R3, v0)
564 
565 .Lout:
566         subi    r6,r1,56+10*16
567         subi    r7,r1,56+2*16
568 
569         lvx     v20,0,r6
570         lvx     v21,off16,r6
571         lvx     v22,off32,r6
572         lvx     v23,off48,r6
573         lvx     v24,off64,r6
574         lvx     v25,off80,r6
575         lvx     v26,off96,r6
576         lvx     v27,off112,r6
577         lvx     v28,0,r7
578         lvx     v29,off16,r7
579 
580         ld      r31,-8(r1)
581         ld      r30,-16(r1)
582         ld      r29,-24(r1)
583         ld      r28,-32(r1)
584         ld      r27,-40(r1)
585         ld      r26,-48(r1)
586         ld      r25,-56(r1)
587 
588         blr
589 
590 .Lfirst_warm_up_done:
591         lvx     const1,0,r3
592         addi    r3,r3,16
593 
594         VPMSUMD(v8,v16,const1)
595         VPMSUMD(v9,v17,const1)
596         VPMSUMD(v10,v18,const1)
597         VPMSUMD(v11,v19,const1)
598         VPMSUMD(v12,v20,const1)
599         VPMSUMD(v13,v21,const1)
600         VPMSUMD(v14,v22,const1)
601         VPMSUMD(v15,v23,const1)
602 
603         b       .Lsecond_cool_down
604 
605 .Lshort:
606         cmpdi   r5,0
607         beq     .Lzero
608 
609         LOAD_REG_ADDR(r3, .short_constants)
610 
611         /* Calculate where in the constant table we need to start */
612         subfic  r6,r5,256
613         add     r3,r3,r6
614 
615         /* How many 16 byte chunks? */
616         srdi    r7,r5,4
617         mtctr   r7
618 
619         vxor    v19,v19,v19
620         vxor    v20,v20,v20
621 
622         lvx     v0,0,r4
623         lvx     v16,0,r3
624         VPERM(v0,v0,v16,byteswap)
625         vxor    v0,v0,v8        /* xor in initial value */
626         VPMSUMW(v0,v0,v16)
627         bdz     .Lv0
628 
629         lvx     v1,off16,r4
630         lvx     v17,off16,r3
631         VPERM(v1,v1,v17,byteswap)
632         VPMSUMW(v1,v1,v17)
633         bdz     .Lv1
634 
635         lvx     v2,off32,r4
636         lvx     v16,off32,r3
637         VPERM(v2,v2,v16,byteswap)
638         VPMSUMW(v2,v2,v16)
639         bdz     .Lv2
640 
641         lvx     v3,off48,r4
642         lvx     v17,off48,r3
643         VPERM(v3,v3,v17,byteswap)
644         VPMSUMW(v3,v3,v17)
645         bdz     .Lv3
646 
647         lvx     v4,off64,r4
648         lvx     v16,off64,r3
649         VPERM(v4,v4,v16,byteswap)
650         VPMSUMW(v4,v4,v16)
651         bdz     .Lv4
652 
653         lvx     v5,off80,r4
654         lvx     v17,off80,r3
655         VPERM(v5,v5,v17,byteswap)
656         VPMSUMW(v5,v5,v17)
657         bdz     .Lv5
658 
659         lvx     v6,off96,r4
660         lvx     v16,off96,r3
661         VPERM(v6,v6,v16,byteswap)
662         VPMSUMW(v6,v6,v16)
663         bdz     .Lv6
664 
665         lvx     v7,off112,r4
666         lvx     v17,off112,r3
667         VPERM(v7,v7,v17,byteswap)
668         VPMSUMW(v7,v7,v17)
669         bdz     .Lv7
670 
671         addi    r3,r3,128
672         addi    r4,r4,128
673 
674         lvx     v8,0,r4
675         lvx     v16,0,r3
676         VPERM(v8,v8,v16,byteswap)
677         VPMSUMW(v8,v8,v16)
678         bdz     .Lv8
679 
680         lvx     v9,off16,r4
681         lvx     v17,off16,r3
682         VPERM(v9,v9,v17,byteswap)
683         VPMSUMW(v9,v9,v17)
684         bdz     .Lv9
685 
686         lvx     v10,off32,r4
687         lvx     v16,off32,r3
688         VPERM(v10,v10,v16,byteswap)
689         VPMSUMW(v10,v10,v16)
690         bdz     .Lv10
691 
692         lvx     v11,off48,r4
693         lvx     v17,off48,r3
694         VPERM(v11,v11,v17,byteswap)
695         VPMSUMW(v11,v11,v17)
696         bdz     .Lv11
697 
698         lvx     v12,off64,r4
699         lvx     v16,off64,r3
700         VPERM(v12,v12,v16,byteswap)
701         VPMSUMW(v12,v12,v16)
702         bdz     .Lv12
703 
704         lvx     v13,off80,r4
705         lvx     v17,off80,r3
706         VPERM(v13,v13,v17,byteswap)
707         VPMSUMW(v13,v13,v17)
708         bdz     .Lv13
709 
710         lvx     v14,off96,r4
711         lvx     v16,off96,r3
712         VPERM(v14,v14,v16,byteswap)
713         VPMSUMW(v14,v14,v16)
714         bdz     .Lv14
715 
716         lvx     v15,off112,r4
717         lvx     v17,off112,r3
718         VPERM(v15,v15,v17,byteswap)
719         VPMSUMW(v15,v15,v17)
720 
721 .Lv15:  vxor    v19,v19,v15
722 .Lv14:  vxor    v20,v20,v14
723 .Lv13:  vxor    v19,v19,v13
724 .Lv12:  vxor    v20,v20,v12
725 .Lv11:  vxor    v19,v19,v11
726 .Lv10:  vxor    v20,v20,v10
727 .Lv9:   vxor    v19,v19,v9
728 .Lv8:   vxor    v20,v20,v8
729 .Lv7:   vxor    v19,v19,v7
730 .Lv6:   vxor    v20,v20,v6
731 .Lv5:   vxor    v19,v19,v5
732 .Lv4:   vxor    v20,v20,v4
733 .Lv3:   vxor    v19,v19,v3
734 .Lv2:   vxor    v20,v20,v2
735 .Lv1:   vxor    v19,v19,v1
736 .Lv0:   vxor    v20,v20,v0
737 
738         vxor    v0,v19,v20
739 
740         b       .Lbarrett_reduction
741 
742 .Lzero:
743         mr      r3,r10
744         b       .Lout
745 
746 FUNC_END(CRC_FUNCTION_NAME)

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php