~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/riscv/lib/memmove.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/riscv/lib/memmove.S (Architecture i386) and /arch/sparc/lib/memmove.S (Architecture sparc)


  1 /* SPDX-License-Identifier: GPL-2.0-only */    !!   1 /* SPDX-License-Identifier: GPL-2.0 */
  2 /*                                             !!   2 /* memmove.S: Simple memmove implementation.
  3  * Copyright (C) 2022 Michael T. Kloos <michael !!   3  *
                                                   >>   4  * Copyright (C) 1997, 2004 David S. Miller (davem@redhat.com)
                                                   >>   5  * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
  4  */                                                 6  */
  5                                                     7 
                                                   >>   8 #include <linux/export.h>
  6 #include <linux/linkage.h>                          9 #include <linux/linkage.h>
  7 #include <asm/asm.h>                           << 
  8                                                    10 
  9 SYM_FUNC_START(__memmove)                      !!  11         .text
 10         /*                                     !!  12 ENTRY(memmove) /* o0=dst o1=src o2=len */
 11          * Returns                             !!  13         brz,pn          %o2, 99f
 12          *   a0 - dest                         !!  14          mov            %o0, %g1
 13          *                                     !!  15 
 14          * Parameters                          !!  16         cmp             %o0, %o1
 15          *   a0 - Inclusive first byte of dest !!  17         bleu,pt         %xcc, 2f
 16          *   a1 - Inclusive first byte of src  !!  18          add            %o1, %o2, %g7
 17          *   a2 - Length of copy n             !!  19         cmp             %g7, %o0
 18          *                                     !!  20         bleu,pt         %xcc, memcpy
 19          * Because the return matches the para !!  21          add            %o0, %o2, %o5
 20          * we will not clobber or modify that  !!  22         sub             %g7, 1, %o1
 21          *                                     !!  23 
 22          * Note: This currently only works on  !!  24         sub             %o5, 1, %o0
 23          * To port to big-endian, reverse the  !!  25 1:      ldub            [%o1], %g7
 24          * in the 2 misaligned fixup copy loop !!  26         subcc           %o2, 1, %o2
 25          */                                    !!  27         sub             %o1, 1, %o1
 26                                                !!  28         stb             %g7, [%o0]
 27         /* Return if nothing to do */          !!  29         bne,pt          %icc, 1b
 28         beq a0, a1, .Lreturn_from_memmove      !!  30          sub            %o0, 1, %o0
 29         beqz a2, .Lreturn_from_memmove         !!  31 99:
 30                                                !!  32         retl
 31         /*                                     !!  33          mov            %g1, %o0
 32          * Register Uses                       !!  34 
 33          *      Forward Copy: a1 - Index count !!  35         /* We can't just call memcpy for these memmove cases.  On some
 34          *      Reverse Copy: a4 - Index count !!  36          * chips the memcpy uses cache initializing stores and when dst
 35          *      Forward Copy: t3 - Index count !!  37          * and src are close enough, those can clobber the source data
 36          *      Reverse Copy: t4 - Index count !!  38          * before we've loaded it in.
 37          *   Both Copy Modes: t5 - Inclusive f !!  39          */
 38          *   Both Copy Modes: t6 - Non-Inclusi !!  40 2:      or              %o0, %o1, %g7
 39          *   Both Copy Modes: t0 - Link / Temp !!  41         or              %o2, %g7, %g7
 40          *   Both Copy Modes: t1 - Temporary f !!  42         andcc           %g7, 0x7, %g0
 41          *   Both Copy Modes: t2 - Temporary f !!  43         bne,pn          %xcc, 4f
 42          *   Both Copy Modes: a5 - dest to src !!  44          nop
 43          *   Both Copy Modes: a6 - Shift ammou !!  45 
 44          *   Both Copy Modes: a7 - Inverse Shi !!  46 3:      ldx             [%o1], %g7
 45          *   Both Copy Modes: a2 - Alternate b !!  47         add             %o1, 8, %o1
 46          */                                    !!  48         subcc           %o2, 8, %o2
 47                                                !!  49         add             %o0, 8, %o0
 48         /*                                     !!  50         bne,pt          %icc, 3b
 49          * Solve for some register values now. !!  51          stx            %g7, [%o0 - 0x8]
 50          * Byte copy does not need t5 or t6.   !!  52         ba,a,pt         %xcc, 99b
 51          */                                    !!  53 
 52         mv   t3, a0                            !!  54 4:      ldub            [%o1], %g7
 53         add  t4, a0, a2                        !!  55         add             %o1, 1, %o1
 54         add  a4, a1, a2                        !!  56         subcc           %o2, 1, %o2
 55                                                !!  57         add             %o0, 1, %o0
 56         /*                                     !!  58         bne,pt          %icc, 4b
 57          * Byte copy if copying less than (2 * !!  59          stb            %g7, [%o0 - 0x1]
 58          * cause problems with the bulk copy i !!  60         ba,a,pt         %xcc, 99b
 59          * small enough not to bother.         !!  61 ENDPROC(memmove)
 60          */                                    !!  62 EXPORT_SYMBOL(memmove)
 61         andi t0, a2, -(2 * SZREG)              << 
 62         beqz t0, .Lbyte_copy                   << 
 63                                                << 
 64         /*                                     << 
 65          * Now solve for t5 and t6.            << 
 66          */                                    << 
 67         andi t5, t3, -SZREG                    << 
 68         andi t6, t4, -SZREG                    << 
 69         /*                                     << 
 70          * If dest(Register t3) rounded down t << 
 71          * aligned SZREG address, does not equ << 
 72          * to find the low-bound of SZREG alig << 
 73          * region.  Note that this could overs << 
 74          * region if n is less than SZREG.  Th << 
 75          * we always byte copy if n is less th << 
 76          * Otherwise, dest is already naturall << 
 77          */                                    << 
 78         beq  t5, t3, 1f                        << 
 79                 addi t5, t5, SZREG             << 
 80         1:                                     << 
 81                                                << 
 82         /*                                     << 
 83          * If the dest and src are co-aligned  << 
 84          * no need for the full rigmarole of a << 
 85          * Instead, do a simpler co-aligned co << 
 86          */                                    << 
 87         xor  t0, a0, a1                        << 
 88         andi t1, t0, (SZREG - 1)               << 
 89         beqz t1, .Lcoaligned_copy              << 
 90         /* Fall through to misaligned fixup co << 
 91                                                << 
 92 .Lmisaligned_fixup_copy:                       << 
 93         bltu a1, a0, .Lmisaligned_fixup_copy_r << 
 94                                                << 
 95 .Lmisaligned_fixup_copy_forward:               << 
 96         jal  t0, .Lbyte_copy_until_aligned_for << 
 97                                                << 
 98         andi a5, a1, (SZREG - 1) /* Find the a << 
 99         slli a6, a5, 3 /* Multiply by 8 to con << 
100         sub  a5, a1, t3 /* Find the difference << 
101         andi a1, a1, -SZREG /* Align the src p << 
102         addi a2, t6, SZREG /* The other breakp << 
103                                                << 
104         /*                                     << 
105          * Compute The Inverse Shift           << 
106          * a7 = XLEN - a6 = XLEN + -a6         << 
107          * 2s complement negation to find the  << 
108          * Add that to XLEN.  XLEN = SZREG * 8 << 
109          */                                    << 
110         not  a7, a6                            << 
111         addi a7, a7, (SZREG * 8 + 1)           << 
112                                                << 
113         /*                                     << 
114          * Fix Misalignment Copy Loop - Forwar << 
115          * load_val0 = load_ptr[0];            << 
116          * do {                                << 
117          *      load_val1 = load_ptr[1];       << 
118          *      store_ptr += 2;                << 
119          *      store_ptr[0 - 2] = (load_val0  << 
120          *                                     << 
121          *      if (store_ptr == {a2})         << 
122          *              break;                 << 
123          *                                     << 
124          *      load_val0 = load_ptr[2];       << 
125          *      load_ptr += 2;                 << 
126          *      store_ptr[1 - 2] = (load_val1  << 
127          *                                     << 
128          * } while (store_ptr != store_ptr_end << 
129          * store_ptr = store_ptr_end;          << 
130          */                                    << 
131                                                << 
132         REG_L t0, (0 * SZREG)(a1)              << 
133         1:                                     << 
134         REG_L t1, (1 * SZREG)(a1)              << 
135         addi  t3, t3, (2 * SZREG)              << 
136         srl   t0, t0, a6                       << 
137         sll   t2, t1, a7                       << 
138         or    t2, t0, t2                       << 
139         REG_S t2, ((0 * SZREG) - (2 * SZREG))( << 
140                                                << 
141         beq   t3, a2, 2f                       << 
142                                                << 
143         REG_L t0, (2 * SZREG)(a1)              << 
144         addi  a1, a1, (2 * SZREG)              << 
145         srl   t1, t1, a6                       << 
146         sll   t2, t0, a7                       << 
147         or    t2, t1, t2                       << 
148         REG_S t2, ((1 * SZREG) - (2 * SZREG))( << 
149                                                << 
150         bne   t3, t6, 1b                       << 
151         2:                                     << 
152         mv    t3, t6 /* Fix the dest pointer i << 
153                                                << 
154         add  a1, t3, a5 /* Restore the src poi << 
155         j .Lbyte_copy_forward /* Copy any rema << 
156                                                << 
157 .Lmisaligned_fixup_copy_reverse:               << 
158         jal  t0, .Lbyte_copy_until_aligned_rev << 
159                                                << 
160         andi a5, a4, (SZREG - 1) /* Find the a << 
161         slli a6, a5, 3 /* Multiply by 8 to con << 
162         sub  a5, a4, t4 /* Find the difference << 
163         andi a4, a4, -SZREG /* Align the src p << 
164         addi a2, t5, -SZREG /* The other break << 
165                                                << 
166         /*                                     << 
167          * Compute The Inverse Shift           << 
168          * a7 = XLEN - a6 = XLEN + -a6         << 
169          * 2s complement negation to find the  << 
170          * Add that to XLEN.  XLEN = SZREG * 8 << 
171          */                                    << 
172         not  a7, a6                            << 
173         addi a7, a7, (SZREG * 8 + 1)           << 
174                                                << 
175         /*                                     << 
176          * Fix Misalignment Copy Loop - Revers << 
177          * load_val1 = load_ptr[0];            << 
178          * do {                                << 
179          *      load_val0 = load_ptr[-1];      << 
180          *      store_ptr -= 2;                << 
181          *      store_ptr[1] = (load_val0 >> { << 
182          *                                     << 
183          *      if (store_ptr == {a2})         << 
184          *              break;                 << 
185          *                                     << 
186          *      load_val1 = load_ptr[-2];      << 
187          *      load_ptr -= 2;                 << 
188          *      store_ptr[0] = (load_val1 >> { << 
189          *                                     << 
190          * } while (store_ptr != store_ptr_end << 
191          * store_ptr = store_ptr_end;          << 
192          */                                    << 
193                                                << 
194         REG_L t1, ( 0 * SZREG)(a4)             << 
195         1:                                     << 
196         REG_L t0, (-1 * SZREG)(a4)             << 
197         addi  t4, t4, (-2 * SZREG)             << 
198         sll   t1, t1, a7                       << 
199         srl   t2, t0, a6                       << 
200         or    t2, t1, t2                       << 
201         REG_S t2, ( 1 * SZREG)(t4)             << 
202                                                << 
203         beq   t4, a2, 2f                       << 
204                                                << 
205         REG_L t1, (-2 * SZREG)(a4)             << 
206         addi  a4, a4, (-2 * SZREG)             << 
207         sll   t0, t0, a7                       << 
208         srl   t2, t1, a6                       << 
209         or    t2, t0, t2                       << 
210         REG_S t2, ( 0 * SZREG)(t4)             << 
211                                                << 
212         bne   t4, t5, 1b                       << 
213         2:                                     << 
214         mv    t4, t5 /* Fix the dest pointer i << 
215                                                << 
216         add  a4, t4, a5 /* Restore the src poi << 
217         j .Lbyte_copy_reverse /* Copy any rema << 
218                                                << 
219 /*                                             << 
220  * Simple copy loops for SZREG co-aligned memo << 
221  * These also make calls to do byte copies for << 
222  * data at their terminations.                 << 
223  */                                            << 
224 .Lcoaligned_copy:                              << 
225         bltu a1, a0, .Lcoaligned_copy_reverse  << 
226                                                << 
227 .Lcoaligned_copy_forward:                      << 
228         jal t0, .Lbyte_copy_until_aligned_forw << 
229                                                << 
230         1:                                     << 
231         REG_L t1, ( 0 * SZREG)(a1)             << 
232         addi  a1, a1, SZREG                    << 
233         addi  t3, t3, SZREG                    << 
234         REG_S t1, (-1 * SZREG)(t3)             << 
235         bne   t3, t6, 1b                       << 
236                                                << 
237         j .Lbyte_copy_forward /* Copy any rema << 
238                                                << 
239 .Lcoaligned_copy_reverse:                      << 
240         jal t0, .Lbyte_copy_until_aligned_reve << 
241                                                << 
242         1:                                     << 
243         REG_L t1, (-1 * SZREG)(a4)             << 
244         addi  a4, a4, -SZREG                   << 
245         addi  t4, t4, -SZREG                   << 
246         REG_S t1, ( 0 * SZREG)(t4)             << 
247         bne   t4, t5, 1b                       << 
248                                                << 
249         j .Lbyte_copy_reverse /* Copy any rema << 
250                                                << 
251 /*                                             << 
252  * These are basically sub-functions within th << 
253  * are used to byte copy until the dest pointe << 
254  * At which point, a bulk copy method can be u << 
255  * calling code.  These work on the same regis << 
256  * copy loops.  Therefore, the register values << 
257  * up from where they were left and we avoid c << 
258  * without any overhead except the call in and << 
259  */                                            << 
260 .Lbyte_copy_until_aligned_forward:             << 
261         beq  t3, t5, 2f                        << 
262         1:                                     << 
263         lb   t1,  0(a1)                        << 
264         addi a1, a1, 1                         << 
265         addi t3, t3, 1                         << 
266         sb   t1, -1(t3)                        << 
267         bne  t3, t5, 1b                        << 
268         2:                                     << 
269         jalr zero, 0x0(t0) /* Return to multib << 
270                                                << 
271 .Lbyte_copy_until_aligned_reverse:             << 
272         beq  t4, t6, 2f                        << 
273         1:                                     << 
274         lb   t1, -1(a4)                        << 
275         addi a4, a4, -1                        << 
276         addi t4, t4, -1                        << 
277         sb   t1,  0(t4)                        << 
278         bne  t4, t6, 1b                        << 
279         2:                                     << 
280         jalr zero, 0x0(t0) /* Return to multib << 
281                                                << 
282 /*                                             << 
283  * Simple byte copy loops.                     << 
284  * These will byte copy until they reach the e << 
285  * At that point, they will call to return fro << 
286  */                                            << 
287 .Lbyte_copy:                                   << 
288         bltu a1, a0, .Lbyte_copy_reverse       << 
289                                                << 
290 .Lbyte_copy_forward:                           << 
291         beq  t3, t4, 2f                        << 
292         1:                                     << 
293         lb   t1,  0(a1)                        << 
294         addi a1, a1, 1                         << 
295         addi t3, t3, 1                         << 
296         sb   t1, -1(t3)                        << 
297         bne  t3, t4, 1b                        << 
298         2:                                     << 
299         ret                                    << 
300                                                << 
301 .Lbyte_copy_reverse:                           << 
302         beq  t4, t3, 2f                        << 
303         1:                                     << 
304         lb   t1, -1(a4)                        << 
305         addi a4, a4, -1                        << 
306         addi t4, t4, -1                        << 
307         sb   t1,  0(t4)                        << 
308         bne  t4, t3, 1b                        << 
309         2:                                     << 
310                                                << 
311 .Lreturn_from_memmove:                         << 
312         ret                                    << 
313                                                << 
314 SYM_FUNC_END(__memmove)                        << 
315 SYM_FUNC_ALIAS_WEAK(memmove, __memmove)        << 
316 SYM_FUNC_ALIAS(__pi_memmove, __memmove)        << 
317 SYM_FUNC_ALIAS(__pi___memmove, __memmove)      << 
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php