~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0-or-later */
  2 /*
  3  *
  4  * Copyright (C) IBM Corporation, 2011
  5  *
  6  * Author: Anton Blanchard <anton@au.ibm.com>
  7  */
  8 #include <asm/ppc_asm.h>
  9 
 10 #ifndef SELFTEST_CASE
 11 /* 0 == don't use VMX, 1 == use VMX */
 12 #define SELFTEST_CASE   0
 13 #endif
 14 
 15 #ifdef __BIG_ENDIAN__
 16 #define LVS(VRT,RA,RB)          lvsl    VRT,RA,RB
 17 #define VPERM(VRT,VRA,VRB,VRC)  vperm   VRT,VRA,VRB,VRC
 18 #else
 19 #define LVS(VRT,RA,RB)          lvsr    VRT,RA,RB
 20 #define VPERM(VRT,VRA,VRB,VRC)  vperm   VRT,VRB,VRA,VRC
 21 #endif
 22 
 23         .macro err1
 24 100:
 25         EX_TABLE(100b,.Ldo_err1)
 26         .endm
 27 
 28         .macro err2
 29 200:
 30         EX_TABLE(200b,.Ldo_err2)
 31         .endm
 32 
 33 #ifdef CONFIG_ALTIVEC
 34         .macro err3
 35 300:
 36         EX_TABLE(300b,.Ldo_err3)
 37         .endm
 38 
 39         .macro err4
 40 400:
 41         EX_TABLE(400b,.Ldo_err4)
 42         .endm
 43 
 44 
 45 .Ldo_err4:
 46         ld      r16,STK_REG(R16)(r1)
 47         ld      r15,STK_REG(R15)(r1)
 48         ld      r14,STK_REG(R14)(r1)
 49 .Ldo_err3:
 50         bl      CFUNC(exit_vmx_usercopy)
 51         ld      r0,STACKFRAMESIZE+16(r1)
 52         mtlr    r0
 53         b       .Lexit
 54 #endif /* CONFIG_ALTIVEC */
 55 
 56 .Ldo_err2:
 57         ld      r22,STK_REG(R22)(r1)
 58         ld      r21,STK_REG(R21)(r1)
 59         ld      r20,STK_REG(R20)(r1)
 60         ld      r19,STK_REG(R19)(r1)
 61         ld      r18,STK_REG(R18)(r1)
 62         ld      r17,STK_REG(R17)(r1)
 63         ld      r16,STK_REG(R16)(r1)
 64         ld      r15,STK_REG(R15)(r1)
 65         ld      r14,STK_REG(R14)(r1)
 66 .Lexit:
 67         addi    r1,r1,STACKFRAMESIZE
 68 .Ldo_err1:
 69         ld      r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
 70         ld      r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
 71         ld      r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
 72         b       __copy_tofrom_user_base
 73 
 74 
 75 _GLOBAL(__copy_tofrom_user_power7)
 76         cmpldi  r5,16
 77         cmpldi  cr1,r5,3328
 78 
 79         std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
 80         std     r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
 81         std     r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
 82 
 83         blt     .Lshort_copy
 84 
 85 #ifdef CONFIG_ALTIVEC
 86 test_feature = SELFTEST_CASE
 87 BEGIN_FTR_SECTION
 88         bgt     cr1,.Lvmx_copy
 89 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 90 #endif
 91 
 92 .Lnonvmx_copy:
 93         /* Get the source 8B aligned */
 94         neg     r6,r4
 95         mtocrf  0x01,r6
 96         clrldi  r6,r6,(64-3)
 97 
 98         bf      cr7*4+3,1f
 99 err1;   lbz     r0,0(r4)
100         addi    r4,r4,1
101 err1;   stb     r0,0(r3)
102         addi    r3,r3,1
103 
104 1:      bf      cr7*4+2,2f
105 err1;   lhz     r0,0(r4)
106         addi    r4,r4,2
107 err1;   sth     r0,0(r3)
108         addi    r3,r3,2
109 
110 2:      bf      cr7*4+1,3f
111 err1;   lwz     r0,0(r4)
112         addi    r4,r4,4
113 err1;   stw     r0,0(r3)
114         addi    r3,r3,4
115 
116 3:      sub     r5,r5,r6
117         cmpldi  r5,128
118         blt     5f
119 
120         mflr    r0
121         stdu    r1,-STACKFRAMESIZE(r1)
122         std     r14,STK_REG(R14)(r1)
123         std     r15,STK_REG(R15)(r1)
124         std     r16,STK_REG(R16)(r1)
125         std     r17,STK_REG(R17)(r1)
126         std     r18,STK_REG(R18)(r1)
127         std     r19,STK_REG(R19)(r1)
128         std     r20,STK_REG(R20)(r1)
129         std     r21,STK_REG(R21)(r1)
130         std     r22,STK_REG(R22)(r1)
131         std     r0,STACKFRAMESIZE+16(r1)
132 
133         srdi    r6,r5,7
134         mtctr   r6
135 
136         /* Now do cacheline (128B) sized loads and stores. */
137         .align  5
138 4:
139 err2;   ld      r0,0(r4)
140 err2;   ld      r6,8(r4)
141 err2;   ld      r7,16(r4)
142 err2;   ld      r8,24(r4)
143 err2;   ld      r9,32(r4)
144 err2;   ld      r10,40(r4)
145 err2;   ld      r11,48(r4)
146 err2;   ld      r12,56(r4)
147 err2;   ld      r14,64(r4)
148 err2;   ld      r15,72(r4)
149 err2;   ld      r16,80(r4)
150 err2;   ld      r17,88(r4)
151 err2;   ld      r18,96(r4)
152 err2;   ld      r19,104(r4)
153 err2;   ld      r20,112(r4)
154 err2;   ld      r21,120(r4)
155         addi    r4,r4,128
156 err2;   std     r0,0(r3)
157 err2;   std     r6,8(r3)
158 err2;   std     r7,16(r3)
159 err2;   std     r8,24(r3)
160 err2;   std     r9,32(r3)
161 err2;   std     r10,40(r3)
162 err2;   std     r11,48(r3)
163 err2;   std     r12,56(r3)
164 err2;   std     r14,64(r3)
165 err2;   std     r15,72(r3)
166 err2;   std     r16,80(r3)
167 err2;   std     r17,88(r3)
168 err2;   std     r18,96(r3)
169 err2;   std     r19,104(r3)
170 err2;   std     r20,112(r3)
171 err2;   std     r21,120(r3)
172         addi    r3,r3,128
173         bdnz    4b
174 
175         clrldi  r5,r5,(64-7)
176 
177         ld      r14,STK_REG(R14)(r1)
178         ld      r15,STK_REG(R15)(r1)
179         ld      r16,STK_REG(R16)(r1)
180         ld      r17,STK_REG(R17)(r1)
181         ld      r18,STK_REG(R18)(r1)
182         ld      r19,STK_REG(R19)(r1)
183         ld      r20,STK_REG(R20)(r1)
184         ld      r21,STK_REG(R21)(r1)
185         ld      r22,STK_REG(R22)(r1)
186         addi    r1,r1,STACKFRAMESIZE
187 
188         /* Up to 127B to go */
189 5:      srdi    r6,r5,4
190         mtocrf  0x01,r6
191 
192 6:      bf      cr7*4+1,7f
193 err1;   ld      r0,0(r4)
194 err1;   ld      r6,8(r4)
195 err1;   ld      r7,16(r4)
196 err1;   ld      r8,24(r4)
197 err1;   ld      r9,32(r4)
198 err1;   ld      r10,40(r4)
199 err1;   ld      r11,48(r4)
200 err1;   ld      r12,56(r4)
201         addi    r4,r4,64
202 err1;   std     r0,0(r3)
203 err1;   std     r6,8(r3)
204 err1;   std     r7,16(r3)
205 err1;   std     r8,24(r3)
206 err1;   std     r9,32(r3)
207 err1;   std     r10,40(r3)
208 err1;   std     r11,48(r3)
209 err1;   std     r12,56(r3)
210         addi    r3,r3,64
211 
212         /* Up to 63B to go */
213 7:      bf      cr7*4+2,8f
214 err1;   ld      r0,0(r4)
215 err1;   ld      r6,8(r4)
216 err1;   ld      r7,16(r4)
217 err1;   ld      r8,24(r4)
218         addi    r4,r4,32
219 err1;   std     r0,0(r3)
220 err1;   std     r6,8(r3)
221 err1;   std     r7,16(r3)
222 err1;   std     r8,24(r3)
223         addi    r3,r3,32
224 
225         /* Up to 31B to go */
226 8:      bf      cr7*4+3,9f
227 err1;   ld      r0,0(r4)
228 err1;   ld      r6,8(r4)
229         addi    r4,r4,16
230 err1;   std     r0,0(r3)
231 err1;   std     r6,8(r3)
232         addi    r3,r3,16
233 
234 9:      clrldi  r5,r5,(64-4)
235 
236         /* Up to 15B to go */
237 .Lshort_copy:
238         mtocrf  0x01,r5
239         bf      cr7*4+0,12f
240 err1;   lwz     r0,0(r4)        /* Less chance of a reject with word ops */
241 err1;   lwz     r6,4(r4)
242         addi    r4,r4,8
243 err1;   stw     r0,0(r3)
244 err1;   stw     r6,4(r3)
245         addi    r3,r3,8
246 
247 12:     bf      cr7*4+1,13f
248 err1;   lwz     r0,0(r4)
249         addi    r4,r4,4
250 err1;   stw     r0,0(r3)
251         addi    r3,r3,4
252 
253 13:     bf      cr7*4+2,14f
254 err1;   lhz     r0,0(r4)
255         addi    r4,r4,2
256 err1;   sth     r0,0(r3)
257         addi    r3,r3,2
258 
259 14:     bf      cr7*4+3,15f
260 err1;   lbz     r0,0(r4)
261 err1;   stb     r0,0(r3)
262 
263 15:     li      r3,0
264         blr
265 
266 .Lunwind_stack_nonvmx_copy:
267         addi    r1,r1,STACKFRAMESIZE
268         b       .Lnonvmx_copy
269 
270 .Lvmx_copy:
271 #ifdef CONFIG_ALTIVEC
272         mflr    r0
273         std     r0,16(r1)
274         stdu    r1,-STACKFRAMESIZE(r1)
275         bl      CFUNC(enter_vmx_usercopy)
276         cmpwi   cr1,r3,0
277         ld      r0,STACKFRAMESIZE+16(r1)
278         ld      r3,STK_REG(R31)(r1)
279         ld      r4,STK_REG(R30)(r1)
280         ld      r5,STK_REG(R29)(r1)
281         mtlr    r0
282 
283         /*
284          * We prefetch both the source and destination using enhanced touch
285          * instructions. We use a stream ID of 0 for the load side and
286          * 1 for the store side.
287          */
288         clrrdi  r6,r4,7
289         clrrdi  r9,r3,7
290         ori     r9,r9,1         /* stream=1 */
291 
292         srdi    r7,r5,7         /* length in cachelines, capped at 0x3FF */
293         cmpldi  r7,0x3FF
294         ble     1f
295         li      r7,0x3FF
296 1:      lis     r0,0x0E00       /* depth=7 */
297         sldi    r7,r7,7
298         or      r7,r7,r0
299         ori     r10,r7,1        /* stream=1 */
300 
301         DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8)
302 
303         beq     cr1,.Lunwind_stack_nonvmx_copy
304 
305         /*
306          * If source and destination are not relatively aligned we use a
307          * slower permute loop.
308          */
309         xor     r6,r4,r3
310         rldicl. r6,r6,0,(64-4)
311         bne     .Lvmx_unaligned_copy
312 
313         /* Get the destination 16B aligned */
314         neg     r6,r3
315         mtocrf  0x01,r6
316         clrldi  r6,r6,(64-4)
317 
318         bf      cr7*4+3,1f
319 err3;   lbz     r0,0(r4)
320         addi    r4,r4,1
321 err3;   stb     r0,0(r3)
322         addi    r3,r3,1
323 
324 1:      bf      cr7*4+2,2f
325 err3;   lhz     r0,0(r4)
326         addi    r4,r4,2
327 err3;   sth     r0,0(r3)
328         addi    r3,r3,2
329 
330 2:      bf      cr7*4+1,3f
331 err3;   lwz     r0,0(r4)
332         addi    r4,r4,4
333 err3;   stw     r0,0(r3)
334         addi    r3,r3,4
335 
336 3:      bf      cr7*4+0,4f
337 err3;   ld      r0,0(r4)
338         addi    r4,r4,8
339 err3;   std     r0,0(r3)
340         addi    r3,r3,8
341 
342 4:      sub     r5,r5,r6
343 
344         /* Get the desination 128B aligned */
345         neg     r6,r3
346         srdi    r7,r6,4
347         mtocrf  0x01,r7
348         clrldi  r6,r6,(64-7)
349 
350         li      r9,16
351         li      r10,32
352         li      r11,48
353 
354         bf      cr7*4+3,5f
355 err3;   lvx     v1,0,r4
356         addi    r4,r4,16
357 err3;   stvx    v1,0,r3
358         addi    r3,r3,16
359 
360 5:      bf      cr7*4+2,6f
361 err3;   lvx     v1,0,r4
362 err3;   lvx     v0,r4,r9
363         addi    r4,r4,32
364 err3;   stvx    v1,0,r3
365 err3;   stvx    v0,r3,r9
366         addi    r3,r3,32
367 
368 6:      bf      cr7*4+1,7f
369 err3;   lvx     v3,0,r4
370 err3;   lvx     v2,r4,r9
371 err3;   lvx     v1,r4,r10
372 err3;   lvx     v0,r4,r11
373         addi    r4,r4,64
374 err3;   stvx    v3,0,r3
375 err3;   stvx    v2,r3,r9
376 err3;   stvx    v1,r3,r10
377 err3;   stvx    v0,r3,r11
378         addi    r3,r3,64
379 
380 7:      sub     r5,r5,r6
381         srdi    r6,r5,7
382 
383         std     r14,STK_REG(R14)(r1)
384         std     r15,STK_REG(R15)(r1)
385         std     r16,STK_REG(R16)(r1)
386 
387         li      r12,64
388         li      r14,80
389         li      r15,96
390         li      r16,112
391 
392         mtctr   r6
393 
394         /*
395          * Now do cacheline sized loads and stores. By this stage the
396          * cacheline stores are also cacheline aligned.
397          */
398         .align  5
399 8:
400 err4;   lvx     v7,0,r4
401 err4;   lvx     v6,r4,r9
402 err4;   lvx     v5,r4,r10
403 err4;   lvx     v4,r4,r11
404 err4;   lvx     v3,r4,r12
405 err4;   lvx     v2,r4,r14
406 err4;   lvx     v1,r4,r15
407 err4;   lvx     v0,r4,r16
408         addi    r4,r4,128
409 err4;   stvx    v7,0,r3
410 err4;   stvx    v6,r3,r9
411 err4;   stvx    v5,r3,r10
412 err4;   stvx    v4,r3,r11
413 err4;   stvx    v3,r3,r12
414 err4;   stvx    v2,r3,r14
415 err4;   stvx    v1,r3,r15
416 err4;   stvx    v0,r3,r16
417         addi    r3,r3,128
418         bdnz    8b
419 
420         ld      r14,STK_REG(R14)(r1)
421         ld      r15,STK_REG(R15)(r1)
422         ld      r16,STK_REG(R16)(r1)
423 
424         /* Up to 127B to go */
425         clrldi  r5,r5,(64-7)
426         srdi    r6,r5,4
427         mtocrf  0x01,r6
428 
429         bf      cr7*4+1,9f
430 err3;   lvx     v3,0,r4
431 err3;   lvx     v2,r4,r9
432 err3;   lvx     v1,r4,r10
433 err3;   lvx     v0,r4,r11
434         addi    r4,r4,64
435 err3;   stvx    v3,0,r3
436 err3;   stvx    v2,r3,r9
437 err3;   stvx    v1,r3,r10
438 err3;   stvx    v0,r3,r11
439         addi    r3,r3,64
440 
441 9:      bf      cr7*4+2,10f
442 err3;   lvx     v1,0,r4
443 err3;   lvx     v0,r4,r9
444         addi    r4,r4,32
445 err3;   stvx    v1,0,r3
446 err3;   stvx    v0,r3,r9
447         addi    r3,r3,32
448 
449 10:     bf      cr7*4+3,11f
450 err3;   lvx     v1,0,r4
451         addi    r4,r4,16
452 err3;   stvx    v1,0,r3
453         addi    r3,r3,16
454 
455         /* Up to 15B to go */
456 11:     clrldi  r5,r5,(64-4)
457         mtocrf  0x01,r5
458         bf      cr7*4+0,12f
459 err3;   ld      r0,0(r4)
460         addi    r4,r4,8
461 err3;   std     r0,0(r3)
462         addi    r3,r3,8
463 
464 12:     bf      cr7*4+1,13f
465 err3;   lwz     r0,0(r4)
466         addi    r4,r4,4
467 err3;   stw     r0,0(r3)
468         addi    r3,r3,4
469 
470 13:     bf      cr7*4+2,14f
471 err3;   lhz     r0,0(r4)
472         addi    r4,r4,2
473 err3;   sth     r0,0(r3)
474         addi    r3,r3,2
475 
476 14:     bf      cr7*4+3,15f
477 err3;   lbz     r0,0(r4)
478 err3;   stb     r0,0(r3)
479 
480 15:     addi    r1,r1,STACKFRAMESIZE
481         b       CFUNC(exit_vmx_usercopy)        /* tail call optimise */
482 
483 .Lvmx_unaligned_copy:
484         /* Get the destination 16B aligned */
485         neg     r6,r3
486         mtocrf  0x01,r6
487         clrldi  r6,r6,(64-4)
488 
489         bf      cr7*4+3,1f
490 err3;   lbz     r0,0(r4)
491         addi    r4,r4,1
492 err3;   stb     r0,0(r3)
493         addi    r3,r3,1
494 
495 1:      bf      cr7*4+2,2f
496 err3;   lhz     r0,0(r4)
497         addi    r4,r4,2
498 err3;   sth     r0,0(r3)
499         addi    r3,r3,2
500 
501 2:      bf      cr7*4+1,3f
502 err3;   lwz     r0,0(r4)
503         addi    r4,r4,4
504 err3;   stw     r0,0(r3)
505         addi    r3,r3,4
506 
507 3:      bf      cr7*4+0,4f
508 err3;   lwz     r0,0(r4)        /* Less chance of a reject with word ops */
509 err3;   lwz     r7,4(r4)
510         addi    r4,r4,8
511 err3;   stw     r0,0(r3)
512 err3;   stw     r7,4(r3)
513         addi    r3,r3,8
514 
515 4:      sub     r5,r5,r6
516 
517         /* Get the desination 128B aligned */
518         neg     r6,r3
519         srdi    r7,r6,4
520         mtocrf  0x01,r7
521         clrldi  r6,r6,(64-7)
522 
523         li      r9,16
524         li      r10,32
525         li      r11,48
526 
527         LVS(v16,0,r4)           /* Setup permute control vector */
528 err3;   lvx     v0,0,r4
529         addi    r4,r4,16
530 
531         bf      cr7*4+3,5f
532 err3;   lvx     v1,0,r4
533         VPERM(v8,v0,v1,v16)
534         addi    r4,r4,16
535 err3;   stvx    v8,0,r3
536         addi    r3,r3,16
537         vor     v0,v1,v1
538 
539 5:      bf      cr7*4+2,6f
540 err3;   lvx     v1,0,r4
541         VPERM(v8,v0,v1,v16)
542 err3;   lvx     v0,r4,r9
543         VPERM(v9,v1,v0,v16)
544         addi    r4,r4,32
545 err3;   stvx    v8,0,r3
546 err3;   stvx    v9,r3,r9
547         addi    r3,r3,32
548 
549 6:      bf      cr7*4+1,7f
550 err3;   lvx     v3,0,r4
551         VPERM(v8,v0,v3,v16)
552 err3;   lvx     v2,r4,r9
553         VPERM(v9,v3,v2,v16)
554 err3;   lvx     v1,r4,r10
555         VPERM(v10,v2,v1,v16)
556 err3;   lvx     v0,r4,r11
557         VPERM(v11,v1,v0,v16)
558         addi    r4,r4,64
559 err3;   stvx    v8,0,r3
560 err3;   stvx    v9,r3,r9
561 err3;   stvx    v10,r3,r10
562 err3;   stvx    v11,r3,r11
563         addi    r3,r3,64
564 
565 7:      sub     r5,r5,r6
566         srdi    r6,r5,7
567 
568         std     r14,STK_REG(R14)(r1)
569         std     r15,STK_REG(R15)(r1)
570         std     r16,STK_REG(R16)(r1)
571 
572         li      r12,64
573         li      r14,80
574         li      r15,96
575         li      r16,112
576 
577         mtctr   r6
578 
579         /*
580          * Now do cacheline sized loads and stores. By this stage the
581          * cacheline stores are also cacheline aligned.
582          */
583         .align  5
584 8:
585 err4;   lvx     v7,0,r4
586         VPERM(v8,v0,v7,v16)
587 err4;   lvx     v6,r4,r9
588         VPERM(v9,v7,v6,v16)
589 err4;   lvx     v5,r4,r10
590         VPERM(v10,v6,v5,v16)
591 err4;   lvx     v4,r4,r11
592         VPERM(v11,v5,v4,v16)
593 err4;   lvx     v3,r4,r12
594         VPERM(v12,v4,v3,v16)
595 err4;   lvx     v2,r4,r14
596         VPERM(v13,v3,v2,v16)
597 err4;   lvx     v1,r4,r15
598         VPERM(v14,v2,v1,v16)
599 err4;   lvx     v0,r4,r16
600         VPERM(v15,v1,v0,v16)
601         addi    r4,r4,128
602 err4;   stvx    v8,0,r3
603 err4;   stvx    v9,r3,r9
604 err4;   stvx    v10,r3,r10
605 err4;   stvx    v11,r3,r11
606 err4;   stvx    v12,r3,r12
607 err4;   stvx    v13,r3,r14
608 err4;   stvx    v14,r3,r15
609 err4;   stvx    v15,r3,r16
610         addi    r3,r3,128
611         bdnz    8b
612 
613         ld      r14,STK_REG(R14)(r1)
614         ld      r15,STK_REG(R15)(r1)
615         ld      r16,STK_REG(R16)(r1)
616 
617         /* Up to 127B to go */
618         clrldi  r5,r5,(64-7)
619         srdi    r6,r5,4
620         mtocrf  0x01,r6
621 
622         bf      cr7*4+1,9f
623 err3;   lvx     v3,0,r4
624         VPERM(v8,v0,v3,v16)
625 err3;   lvx     v2,r4,r9
626         VPERM(v9,v3,v2,v16)
627 err3;   lvx     v1,r4,r10
628         VPERM(v10,v2,v1,v16)
629 err3;   lvx     v0,r4,r11
630         VPERM(v11,v1,v0,v16)
631         addi    r4,r4,64
632 err3;   stvx    v8,0,r3
633 err3;   stvx    v9,r3,r9
634 err3;   stvx    v10,r3,r10
635 err3;   stvx    v11,r3,r11
636         addi    r3,r3,64
637 
638 9:      bf      cr7*4+2,10f
639 err3;   lvx     v1,0,r4
640         VPERM(v8,v0,v1,v16)
641 err3;   lvx     v0,r4,r9
642         VPERM(v9,v1,v0,v16)
643         addi    r4,r4,32
644 err3;   stvx    v8,0,r3
645 err3;   stvx    v9,r3,r9
646         addi    r3,r3,32
647 
648 10:     bf      cr7*4+3,11f
649 err3;   lvx     v1,0,r4
650         VPERM(v8,v0,v1,v16)
651         addi    r4,r4,16
652 err3;   stvx    v8,0,r3
653         addi    r3,r3,16
654 
655         /* Up to 15B to go */
656 11:     clrldi  r5,r5,(64-4)
657         addi    r4,r4,-16       /* Unwind the +16 load offset */
658         mtocrf  0x01,r5
659         bf      cr7*4+0,12f
660 err3;   lwz     r0,0(r4)        /* Less chance of a reject with word ops */
661 err3;   lwz     r6,4(r4)
662         addi    r4,r4,8
663 err3;   stw     r0,0(r3)
664 err3;   stw     r6,4(r3)
665         addi    r3,r3,8
666 
667 12:     bf      cr7*4+1,13f
668 err3;   lwz     r0,0(r4)
669         addi    r4,r4,4
670 err3;   stw     r0,0(r3)
671         addi    r3,r3,4
672 
673 13:     bf      cr7*4+2,14f
674 err3;   lhz     r0,0(r4)
675         addi    r4,r4,2
676 err3;   sth     r0,0(r3)
677         addi    r3,r3,2
678 
679 14:     bf      cr7*4+3,15f
680 err3;   lbz     r0,0(r4)
681 err3;   stb     r0,0(r3)
682 
683 15:     addi    r1,r1,STACKFRAMESIZE
684         b       CFUNC(exit_vmx_usercopy)        /* tail call optimise */
685 #endif /* CONFIG_ALTIVEC */

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php