~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/xtensa/lib/memcopy.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * arch/xtensa/lib/hal/memcopy.S -- Core HAL library functions
  3  * xthal_memcpy and xthal_bcopy
  4  *
  5  * This file is subject to the terms and conditions of the GNU General Public
  6  * License.  See the file "COPYING" in the main directory of this archive
  7  * for more details.
  8  *
  9  * Copyright (C) 2002 - 2012 Tensilica Inc.
 10  */
 11 
 12 #include <linux/linkage.h>
 13 #include <asm/asmmacro.h>
 14 #include <asm/core.h>
 15 
 16 /*
 17  * void *memcpy(void *dst, const void *src, size_t len);
 18  *
 19  * This function is intended to do the same thing as the standard
 20  * library function memcpy() for most cases.
 21  * However, where the source and/or destination references
 22  * an instruction RAM or ROM or a data RAM or ROM, that
 23  * source and/or destination will always be accessed with
 24  * 32-bit load and store instructions (as required for these
 25  * types of devices).
 26  *
 27  * !!!!!!!  XTFIXME:
 28  * !!!!!!!  Handling of IRAM/IROM has not yet
 29  * !!!!!!!  been implemented.
 30  *
 31  * The (general case) algorithm is as follows:
 32  *   If destination is unaligned, align it by conditionally
 33  *     copying 1 and 2 bytes.
 34  *   If source is aligned,
 35  *     do 16 bytes with a loop, and then finish up with
 36  *     8, 4, 2, and 1 byte copies conditional on the length;
 37  *   else (if source is unaligned),
 38  *     do the same, but use SRC to align the source data.
 39  *   This code tries to use fall-through branches for the common
 40  *     case of aligned source and destination and multiple
 41  *     of 4 (or 8) length.
 42  *
 43  * Register use:
 44  *      a0/ return address
 45  *      a1/ stack pointer
 46  *      a2/ return value
 47  *      a3/ src
 48  *      a4/ length
 49  *      a5/ dst
 50  *      a6/ tmp
 51  *      a7/ tmp
 52  *      a8/ tmp
 53  *      a9/ tmp
 54  *      a10/ tmp
 55  *      a11/ tmp
 56  */
 57 
 58         .text
 59 
 60 /*
 61  * Byte by byte copy
 62  */
 63         .align  4
 64         .byte   0               # 1 mod 4 alignment for LOOPNEZ
 65                                 # (0 mod 4 alignment for LBEG)
 66 .Lbytecopy:
 67 #if XCHAL_HAVE_LOOPS
 68         loopnez a4, .Lbytecopydone
 69 #else /* !XCHAL_HAVE_LOOPS */
 70         beqz    a4, .Lbytecopydone
 71         add     a7, a3, a4      # a7 = end address for source
 72 #endif /* !XCHAL_HAVE_LOOPS */
 73 .Lnextbyte:
 74         l8ui    a6, a3, 0
 75         addi    a3, a3, 1
 76         s8i     a6, a5, 0
 77         addi    a5, a5, 1
 78 #if !XCHAL_HAVE_LOOPS
 79         bne     a3, a7, .Lnextbyte # continue loop if $a3:src != $a7:src_end
 80 #endif /* !XCHAL_HAVE_LOOPS */
 81 .Lbytecopydone:
 82         abi_ret_default
 83 
 84 /*
 85  * Destination is unaligned
 86  */
 87 
 88         .align  4
 89 .Ldst1mod2:     # dst is only byte aligned
 90         _bltui  a4, 7, .Lbytecopy       # do short copies byte by byte
 91 
 92         # copy 1 byte
 93         l8ui    a6, a3,  0
 94         addi    a3, a3,  1
 95         addi    a4, a4, -1
 96         s8i     a6, a5,  0
 97         addi    a5, a5,  1
 98         _bbci.l a5, 1, .Ldstaligned     # if dst is now aligned, then
 99                                         # return to main algorithm
100 .Ldst2mod4:     # dst 16-bit aligned
101         # copy 2 bytes
102         _bltui  a4, 6, .Lbytecopy       # do short copies byte by byte
103         l8ui    a6, a3,  0
104         l8ui    a7, a3,  1
105         addi    a3, a3,  2
106         addi    a4, a4, -2
107         s8i     a6, a5,  0
108         s8i     a7, a5,  1
109         addi    a5, a5,  2
110         j       .Ldstaligned    # dst is now aligned, return to main algorithm
111 
112 ENTRY(__memcpy)
113 WEAK(memcpy)
114 
115         abi_entry_default
116         # a2/ dst, a3/ src, a4/ len
117         mov     a5, a2          # copy dst so that a2 is return value
118 .Lcommon:
119         _bbsi.l a2, 0, .Ldst1mod2       # if dst is 1 mod 2
120         _bbsi.l a2, 1, .Ldst2mod4       # if dst is 2 mod 4
121 .Ldstaligned:   # return here from .Ldst?mod? once dst is aligned
122         srli    a7, a4, 4       # number of loop iterations with 16B
123                                 # per iteration
124         movi    a8, 3           # if source is not aligned,
125         _bany   a3, a8, .Lsrcunaligned  # then use shifting copy
126         /*
127          * Destination and source are word-aligned, use word copy.
128          */
129         # copy 16 bytes per iteration for word-aligned dst and word-aligned src
130 #if XCHAL_HAVE_LOOPS
131         loopnez a7, .Loop1done
132 #else /* !XCHAL_HAVE_LOOPS */
133         beqz    a7, .Loop1done
134         slli    a8, a7, 4
135         add     a8, a8, a3      # a8 = end of last 16B source chunk
136 #endif /* !XCHAL_HAVE_LOOPS */
137 .Loop1:
138         l32i    a6, a3,  0
139         l32i    a7, a3,  4
140         s32i    a6, a5,  0
141         l32i    a6, a3,  8
142         s32i    a7, a5,  4
143         l32i    a7, a3, 12
144         s32i    a6, a5,  8
145         addi    a3, a3, 16
146         s32i    a7, a5, 12
147         addi    a5, a5, 16
148 #if !XCHAL_HAVE_LOOPS
149         bne     a3, a8, .Loop1  # continue loop if a3:src != a8:src_end
150 #endif /* !XCHAL_HAVE_LOOPS */
151 .Loop1done:
152         bbci.l  a4, 3, .L2
153         # copy 8 bytes
154         l32i    a6, a3,  0
155         l32i    a7, a3,  4
156         addi    a3, a3,  8
157         s32i    a6, a5,  0
158         s32i    a7, a5,  4
159         addi    a5, a5,  8
160 .L2:
161         bbsi.l  a4, 2, .L3
162         bbsi.l  a4, 1, .L4
163         bbsi.l  a4, 0, .L5
164         abi_ret_default
165 .L3:
166         # copy 4 bytes
167         l32i    a6, a3,  0
168         addi    a3, a3,  4
169         s32i    a6, a5,  0
170         addi    a5, a5,  4
171         bbsi.l  a4, 1, .L4
172         bbsi.l  a4, 0, .L5
173         abi_ret_default
174 .L4:
175         # copy 2 bytes
176         l16ui   a6, a3,  0
177         addi    a3, a3,  2
178         s16i    a6, a5,  0
179         addi    a5, a5,  2
180         bbsi.l  a4, 0, .L5
181         abi_ret_default
182 .L5:
183         # copy 1 byte
184         l8ui    a6, a3,  0
185         s8i     a6, a5,  0
186         abi_ret_default
187 
188 /*
189  * Destination is aligned, Source is unaligned
190  */
191 
192         .align  4
193 .Lsrcunaligned:
194         _beqz   a4, .Ldone      # avoid loading anything for zero-length copies
195         # copy 16 bytes per iteration for word-aligned dst and unaligned src
196         __ssa8  a3              # set shift amount from byte offset
197 
198 /* set to 1 when running on ISS (simulator) with the
199    lint or ferret client, or 0 to save a few cycles */
200 #define SIM_CHECKS_ALIGNMENT    1
201 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
202         and     a11, a3, a8     # save unalignment offset for below
203         sub     a3, a3, a11     # align a3
204 #endif
205         l32i    a6, a3, 0       # load first word
206 #if XCHAL_HAVE_LOOPS
207         loopnez a7, .Loop2done
208 #else /* !XCHAL_HAVE_LOOPS */
209         beqz    a7, .Loop2done
210         slli    a10, a7, 4
211         add     a10, a10, a3    # a10 = end of last 16B source chunk
212 #endif /* !XCHAL_HAVE_LOOPS */
213 .Loop2:
214         l32i    a7, a3,  4
215         l32i    a8, a3,  8
216         __src_b a6, a6, a7
217         s32i    a6, a5,  0
218         l32i    a9, a3, 12
219         __src_b a7, a7, a8
220         s32i    a7, a5,  4
221         l32i    a6, a3, 16
222         __src_b a8, a8, a9
223         s32i    a8, a5,  8
224         addi    a3, a3, 16
225         __src_b a9, a9, a6
226         s32i    a9, a5, 12
227         addi    a5, a5, 16
228 #if !XCHAL_HAVE_LOOPS
229         bne     a3, a10, .Loop2 # continue loop if a3:src != a10:src_end
230 #endif /* !XCHAL_HAVE_LOOPS */
231 .Loop2done:
232         bbci.l  a4, 3, .L12
233         # copy 8 bytes
234         l32i    a7, a3,  4
235         l32i    a8, a3,  8
236         __src_b a6, a6, a7
237         s32i    a6, a5,  0
238         addi    a3, a3,  8
239         __src_b a7, a7, a8
240         s32i    a7, a5,  4
241         addi    a5, a5,  8
242         mov     a6, a8
243 .L12:
244         bbci.l  a4, 2, .L13
245         # copy 4 bytes
246         l32i    a7, a3,  4
247         addi    a3, a3,  4
248         __src_b a6, a6, a7
249         s32i    a6, a5,  0
250         addi    a5, a5,  4
251         mov     a6, a7
252 .L13:
253 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
254         add     a3, a3, a11     # readjust a3 with correct misalignment
255 #endif
256         bbsi.l  a4, 1, .L14
257         bbsi.l  a4, 0, .L15
258 .Ldone: abi_ret_default
259 .L14:
260         # copy 2 bytes
261         l8ui    a6, a3,  0
262         l8ui    a7, a3,  1
263         addi    a3, a3,  2
264         s8i     a6, a5,  0
265         s8i     a7, a5,  1
266         addi    a5, a5,  2
267         bbsi.l  a4, 0, .L15
268         abi_ret_default
269 .L15:
270         # copy 1 byte
271         l8ui    a6, a3,  0
272         s8i     a6, a5,  0
273         abi_ret_default
274 
275 ENDPROC(__memcpy)
276 EXPORT_SYMBOL(__memcpy)
277 EXPORT_SYMBOL(memcpy)
278 
279 /*
280  * void *memmove(void *dst, const void *src, size_t len);
281  *
282  * This function is intended to do the same thing as the standard
283  * library function memmove() for most cases.
284  * However, where the source and/or destination references
285  * an instruction RAM or ROM or a data RAM or ROM, that
286  * source and/or destination will always be accessed with
287  * 32-bit load and store instructions (as required for these
288  * types of devices).
289  *
290  * !!!!!!!  XTFIXME:
291  * !!!!!!!  Handling of IRAM/IROM has not yet
292  * !!!!!!!  been implemented.
293  *
294  * The (general case) algorithm is as follows:
295  *   If end of source doesn't overlap destination then use memcpy.
296  *   Otherwise do memcpy backwards.
297  *
298  * Register use:
299  *      a0/ return address
300  *      a1/ stack pointer
301  *      a2/ return value
302  *      a3/ src
303  *      a4/ length
304  *      a5/ dst
305  *      a6/ tmp
306  *      a7/ tmp
307  *      a8/ tmp
308  *      a9/ tmp
309  *      a10/ tmp
310  *      a11/ tmp
311  */
312 
313 /*
314  * Byte by byte copy
315  */
316         .align  4
317         .byte   0               # 1 mod 4 alignment for LOOPNEZ
318                                 # (0 mod 4 alignment for LBEG)
319 .Lbackbytecopy:
320 #if XCHAL_HAVE_LOOPS
321         loopnez a4, .Lbackbytecopydone
322 #else /* !XCHAL_HAVE_LOOPS */
323         beqz    a4, .Lbackbytecopydone
324         sub     a7, a3, a4      # a7 = start address for source
325 #endif /* !XCHAL_HAVE_LOOPS */
326 .Lbacknextbyte:
327         addi    a3, a3, -1
328         l8ui    a6, a3, 0
329         addi    a5, a5, -1
330         s8i     a6, a5, 0
331 #if !XCHAL_HAVE_LOOPS
332         bne     a3, a7, .Lbacknextbyte # continue loop if
333                                        # $a3:src != $a7:src_start
334 #endif /* !XCHAL_HAVE_LOOPS */
335 .Lbackbytecopydone:
336         abi_ret_default
337 
338 /*
339  * Destination is unaligned
340  */
341 
342         .align  4
343 .Lbackdst1mod2: # dst is only byte aligned
344         _bltui  a4, 7, .Lbackbytecopy   # do short copies byte by byte
345 
346         # copy 1 byte
347         addi    a3, a3, -1
348         l8ui    a6, a3,  0
349         addi    a5, a5, -1
350         s8i     a6, a5,  0
351         addi    a4, a4, -1
352         _bbci.l a5, 1, .Lbackdstaligned # if dst is now aligned, then
353                                         # return to main algorithm
354 .Lbackdst2mod4: # dst 16-bit aligned
355         # copy 2 bytes
356         _bltui  a4, 6, .Lbackbytecopy   # do short copies byte by byte
357         addi    a3, a3, -2
358         l8ui    a6, a3,  0
359         l8ui    a7, a3,  1
360         addi    a5, a5, -2
361         s8i     a6, a5,  0
362         s8i     a7, a5,  1
363         addi    a4, a4, -2
364         j       .Lbackdstaligned        # dst is now aligned,
365                                         # return to main algorithm
366 
367 ENTRY(__memmove)
368 WEAK(memmove)
369 
370         abi_entry_default
371         # a2/ dst, a3/ src, a4/ len
372         mov     a5, a2          # copy dst so that a2 is return value
373 .Lmovecommon:
374         sub     a6, a5, a3
375         bgeu    a6, a4, .Lcommon
376 
377         add     a5, a5, a4
378         add     a3, a3, a4
379 
380         _bbsi.l a5, 0, .Lbackdst1mod2   # if dst is 1 mod 2
381         _bbsi.l a5, 1, .Lbackdst2mod4   # if dst is 2 mod 4
382 .Lbackdstaligned:       # return here from .Lbackdst?mod? once dst is aligned
383         srli    a7, a4, 4       # number of loop iterations with 16B
384                                 # per iteration
385         movi    a8, 3           # if source is not aligned,
386         _bany   a3, a8, .Lbacksrcunaligned      # then use shifting copy
387         /*
388          * Destination and source are word-aligned, use word copy.
389          */
390         # copy 16 bytes per iteration for word-aligned dst and word-aligned src
391 #if XCHAL_HAVE_LOOPS
392         loopnez a7, .LbackLoop1done
393 #else /* !XCHAL_HAVE_LOOPS */
394         beqz    a7, .LbackLoop1done
395         slli    a8, a7, 4
396         sub     a8, a3, a8      # a8 = start of first 16B source chunk
397 #endif /* !XCHAL_HAVE_LOOPS */
398 .LbackLoop1:
399         addi    a3, a3, -16
400         l32i    a7, a3, 12
401         l32i    a6, a3,  8
402         addi    a5, a5, -16
403         s32i    a7, a5, 12
404         l32i    a7, a3,  4
405         s32i    a6, a5,  8
406         l32i    a6, a3,  0
407         s32i    a7, a5,  4
408         s32i    a6, a5,  0
409 #if !XCHAL_HAVE_LOOPS
410         bne     a3, a8, .LbackLoop1  # continue loop if a3:src != a8:src_start
411 #endif /* !XCHAL_HAVE_LOOPS */
412 .LbackLoop1done:
413         bbci.l  a4, 3, .Lback2
414         # copy 8 bytes
415         addi    a3, a3, -8
416         l32i    a6, a3,  0
417         l32i    a7, a3,  4
418         addi    a5, a5, -8
419         s32i    a6, a5,  0
420         s32i    a7, a5,  4
421 .Lback2:
422         bbsi.l  a4, 2, .Lback3
423         bbsi.l  a4, 1, .Lback4
424         bbsi.l  a4, 0, .Lback5
425         abi_ret_default
426 .Lback3:
427         # copy 4 bytes
428         addi    a3, a3, -4
429         l32i    a6, a3,  0
430         addi    a5, a5, -4
431         s32i    a6, a5,  0
432         bbsi.l  a4, 1, .Lback4
433         bbsi.l  a4, 0, .Lback5
434         abi_ret_default
435 .Lback4:
436         # copy 2 bytes
437         addi    a3, a3, -2
438         l16ui   a6, a3,  0
439         addi    a5, a5, -2
440         s16i    a6, a5,  0
441         bbsi.l  a4, 0, .Lback5
442         abi_ret_default
443 .Lback5:
444         # copy 1 byte
445         addi    a3, a3, -1
446         l8ui    a6, a3,  0
447         addi    a5, a5, -1
448         s8i     a6, a5,  0
449         abi_ret_default
450 
451 /*
452  * Destination is aligned, Source is unaligned
453  */
454 
455         .align  4
456 .Lbacksrcunaligned:
457         _beqz   a4, .Lbackdone  # avoid loading anything for zero-length copies
458         # copy 16 bytes per iteration for word-aligned dst and unaligned src
459         __ssa8  a3              # set shift amount from byte offset
460 #define SIM_CHECKS_ALIGNMENT    1       /* set to 1 when running on ISS with
461                                          * the lint or ferret client, or 0
462                                          * to save a few cycles */
463 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
464         and     a11, a3, a8     # save unalignment offset for below
465         sub     a3, a3, a11     # align a3
466 #endif
467         l32i    a6, a3, 0       # load first word
468 #if XCHAL_HAVE_LOOPS
469         loopnez a7, .LbackLoop2done
470 #else /* !XCHAL_HAVE_LOOPS */
471         beqz    a7, .LbackLoop2done
472         slli    a10, a7, 4
473         sub     a10, a3, a10    # a10 = start of first 16B source chunk
474 #endif /* !XCHAL_HAVE_LOOPS */
475 .LbackLoop2:
476         addi    a3, a3, -16
477         l32i    a7, a3, 12
478         l32i    a8, a3,  8
479         addi    a5, a5, -16
480         __src_b a6, a7, a6
481         s32i    a6, a5, 12
482         l32i    a9, a3,  4
483         __src_b a7, a8, a7
484         s32i    a7, a5,  8
485         l32i    a6, a3,  0
486         __src_b a8, a9, a8
487         s32i    a8, a5,  4
488         __src_b a9, a6, a9
489         s32i    a9, a5,  0
490 #if !XCHAL_HAVE_LOOPS
491         bne     a3, a10, .LbackLoop2 # continue loop if a3:src != a10:src_start
492 #endif /* !XCHAL_HAVE_LOOPS */
493 .LbackLoop2done:
494         bbci.l  a4, 3, .Lback12
495         # copy 8 bytes
496         addi    a3, a3, -8
497         l32i    a7, a3,  4
498         l32i    a8, a3,  0
499         addi    a5, a5, -8
500         __src_b a6, a7, a6
501         s32i    a6, a5,  4
502         __src_b a7, a8, a7
503         s32i    a7, a5,  0
504         mov     a6, a8
505 .Lback12:
506         bbci.l  a4, 2, .Lback13
507         # copy 4 bytes
508         addi    a3, a3, -4
509         l32i    a7, a3,  0
510         addi    a5, a5, -4
511         __src_b a6, a7, a6
512         s32i    a6, a5,  0
513         mov     a6, a7
514 .Lback13:
515 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
516         add     a3, a3, a11     # readjust a3 with correct misalignment
517 #endif
518         bbsi.l  a4, 1, .Lback14
519         bbsi.l  a4, 0, .Lback15
520 .Lbackdone:
521         abi_ret_default
522 .Lback14:
523         # copy 2 bytes
524         addi    a3, a3, -2
525         l8ui    a6, a3,  0
526         l8ui    a7, a3,  1
527         addi    a5, a5, -2
528         s8i     a6, a5,  0
529         s8i     a7, a5,  1
530         bbsi.l  a4, 0, .Lback15
531         abi_ret_default
532 .Lback15:
533         # copy 1 byte
534         addi    a3, a3, -1
535         addi    a5, a5, -1
536         l8ui    a6, a3,  0
537         s8i     a6, a5,  0
538         abi_ret_default
539 
540 ENDPROC(__memmove)
541 EXPORT_SYMBOL(__memmove)
542 EXPORT_SYMBOL(memmove)

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php