~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/riscv/lib/memmove.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/riscv/lib/memmove.S (Version linux-6.12-rc7) and /arch/alpha/lib/memmove.S (Version linux-5.8.18)


  1 /* SPDX-License-Identifier: GPL-2.0-only */    !!   1 /* SPDX-License-Identifier: GPL-2.0 */
  2 /*                                                  2 /*
  3  * Copyright (C) 2022 Michael T. Kloos <michael !!   3  * arch/alpha/lib/memmove.S
                                                   >>   4  *
                                                   >>   5  * Barely optimized memmove routine for Alpha EV5.
                                                   >>   6  *
                                                   >>   7  * This is hand-massaged output from the original memcpy.c.  We defer to
                                                   >>   8  * memcpy whenever possible; the backwards copy loops are not unrolled.
  4  */                                                 9  */
                                                   >>  10 #include <asm/export.h>        
                                                   >>  11         .set noat
                                                   >>  12         .set noreorder
                                                   >>  13         .text
                                                   >>  14 
                                                   >>  15         .align 4
                                                   >>  16         .globl memmove
                                                   >>  17         .ent memmove
                                                   >>  18 memmove:
                                                   >>  19         ldgp $29, 0($27)
                                                   >>  20         unop
                                                   >>  21         nop
                                                   >>  22         .prologue 1
                                                   >>  23 
                                                   >>  24         addq $16,$18,$4
                                                   >>  25         addq $17,$18,$5
                                                   >>  26         cmpule $4,$17,$1                /*  dest + n <= src  */
                                                   >>  27         cmpule $5,$16,$2                /*  dest >= src + n  */
                                                   >>  28 
                                                   >>  29         bis $1,$2,$1
                                                   >>  30         mov $16,$0
                                                   >>  31         xor $16,$17,$2
                                                   >>  32         bne $1,memcpy                   !samegp
                                                   >>  33 
                                                   >>  34         and $2,7,$2                     /* Test for src/dest co-alignment.  */
                                                   >>  35         and $16,7,$1
                                                   >>  36         cmpule $16,$17,$3
                                                   >>  37         bne $3,$memmove_up              /* dest < src */
                                                   >>  38 
                                                   >>  39         and $4,7,$1
                                                   >>  40         bne $2,$misaligned_dn
                                                   >>  41         unop
                                                   >>  42         beq $1,$skip_aligned_byte_loop_head_dn
                                                   >>  43 
                                                   >>  44 $aligned_byte_loop_head_dn:
                                                   >>  45         lda $4,-1($4)
                                                   >>  46         lda $5,-1($5)
                                                   >>  47         unop
                                                   >>  48         ble $18,$egress
                                                   >>  49 
                                                   >>  50         ldq_u $3,0($5)
                                                   >>  51         ldq_u $2,0($4)
                                                   >>  52         lda $18,-1($18)
                                                   >>  53         extbl $3,$5,$1
                                                   >>  54 
                                                   >>  55         insbl $1,$4,$1
                                                   >>  56         mskbl $2,$4,$2
                                                   >>  57         bis $1,$2,$1
                                                   >>  58         and $4,7,$6
                                                   >>  59 
                                                   >>  60         stq_u $1,0($4)
                                                   >>  61         bne $6,$aligned_byte_loop_head_dn
                                                   >>  62 
                                                   >>  63 $skip_aligned_byte_loop_head_dn:
                                                   >>  64         lda $18,-8($18)
                                                   >>  65         blt $18,$skip_aligned_word_loop_dn
                                                   >>  66 
                                                   >>  67 $aligned_word_loop_dn:
                                                   >>  68         ldq $1,-8($5)
                                                   >>  69         nop
                                                   >>  70         lda $5,-8($5)
                                                   >>  71         lda $18,-8($18)
                                                   >>  72 
                                                   >>  73         stq $1,-8($4)
                                                   >>  74         nop
                                                   >>  75         lda $4,-8($4)
                                                   >>  76         bge $18,$aligned_word_loop_dn
                                                   >>  77 
                                                   >>  78 $skip_aligned_word_loop_dn:
                                                   >>  79         lda $18,8($18)
                                                   >>  80         bgt $18,$byte_loop_tail_dn
                                                   >>  81         unop
                                                   >>  82         ret $31,($26),1
                                                   >>  83 
                                                   >>  84         .align 4
                                                   >>  85 $misaligned_dn:
                                                   >>  86         nop
                                                   >>  87         fnop
                                                   >>  88         unop
                                                   >>  89         beq $18,$egress
                                                   >>  90 
                                                   >>  91 $byte_loop_tail_dn:
                                                   >>  92         ldq_u $3,-1($5)
                                                   >>  93         ldq_u $2,-1($4)
                                                   >>  94         lda $5,-1($5)
                                                   >>  95         lda $4,-1($4)
                                                   >>  96 
                                                   >>  97         lda $18,-1($18)
                                                   >>  98         extbl $3,$5,$1
                                                   >>  99         insbl $1,$4,$1
                                                   >> 100         mskbl $2,$4,$2
                                                   >> 101 
                                                   >> 102         bis $1,$2,$1
                                                   >> 103         stq_u $1,0($4)
                                                   >> 104         bgt $18,$byte_loop_tail_dn
                                                   >> 105         br $egress
                                                   >> 106 
                                                   >> 107 $memmove_up:
                                                   >> 108         mov $16,$4
                                                   >> 109         mov $17,$5
                                                   >> 110         bne $2,$misaligned_up
                                                   >> 111         beq $1,$skip_aligned_byte_loop_head_up
                                                   >> 112 
                                                   >> 113 $aligned_byte_loop_head_up:
                                                   >> 114         unop
                                                   >> 115         ble $18,$egress
                                                   >> 116         ldq_u $3,0($5)
                                                   >> 117         ldq_u $2,0($4)
                                                   >> 118 
                                                   >> 119         lda $18,-1($18)
                                                   >> 120         extbl $3,$5,$1
                                                   >> 121         insbl $1,$4,$1
                                                   >> 122         mskbl $2,$4,$2
                                                   >> 123 
                                                   >> 124         bis $1,$2,$1
                                                   >> 125         lda $5,1($5)
                                                   >> 126         stq_u $1,0($4)
                                                   >> 127         lda $4,1($4)
                                                   >> 128 
                                                   >> 129         and $4,7,$6
                                                   >> 130         bne $6,$aligned_byte_loop_head_up
                                                   >> 131 
                                                   >> 132 $skip_aligned_byte_loop_head_up:
                                                   >> 133         lda $18,-8($18)
                                                   >> 134         blt $18,$skip_aligned_word_loop_up
                                                   >> 135 
                                                   >> 136 $aligned_word_loop_up:
                                                   >> 137         ldq $1,0($5)
                                                   >> 138         nop
                                                   >> 139         lda $5,8($5)
                                                   >> 140         lda $18,-8($18)
                                                   >> 141 
                                                   >> 142         stq $1,0($4)
                                                   >> 143         nop
                                                   >> 144         lda $4,8($4)
                                                   >> 145         bge $18,$aligned_word_loop_up
                                                   >> 146 
                                                   >> 147 $skip_aligned_word_loop_up:
                                                   >> 148         lda $18,8($18)
                                                   >> 149         bgt $18,$byte_loop_tail_up
                                                   >> 150         unop
                                                   >> 151         ret $31,($26),1
                                                   >> 152 
                                                   >> 153         .align 4
                                                   >> 154 $misaligned_up:
                                                   >> 155         nop
                                                   >> 156         fnop
                                                   >> 157         unop
                                                   >> 158         beq $18,$egress
                                                   >> 159 
                                                   >> 160 $byte_loop_tail_up:
                                                   >> 161         ldq_u $3,0($5)
                                                   >> 162         ldq_u $2,0($4)
                                                   >> 163         lda $18,-1($18)
                                                   >> 164         extbl $3,$5,$1
                                                   >> 165 
                                                   >> 166         insbl $1,$4,$1
                                                   >> 167         mskbl $2,$4,$2
                                                   >> 168         bis $1,$2,$1
                                                   >> 169         stq_u $1,0($4)
                                                   >> 170 
                                                   >> 171         lda $5,1($5)
                                                   >> 172         lda $4,1($4)
                                                   >> 173         nop
                                                   >> 174         bgt $18,$byte_loop_tail_up
                                                   >> 175 
                                                   >> 176 $egress:
                                                   >> 177         ret $31,($26),1
                                                   >> 178         nop
                                                   >> 179         nop
                                                   >> 180         nop
  5                                                   181 
  6 #include <linux/linkage.h>                     !! 182         .end memmove
  7 #include <asm/asm.h>                           !! 183         EXPORT_SYMBOL(memmove)
  8                                                << 
  9 SYM_FUNC_START(__memmove)                      << 
 10         /*                                     << 
 11          * Returns                             << 
 12          *   a0 - dest                         << 
 13          *                                     << 
 14          * Parameters                          << 
 15          *   a0 - Inclusive first byte of dest << 
 16          *   a1 - Inclusive first byte of src  << 
 17          *   a2 - Length of copy n             << 
 18          *                                     << 
 19          * Because the return matches the para << 
 20          * we will not clobber or modify that  << 
 21          *                                     << 
 22          * Note: This currently only works on  << 
 23          * To port to big-endian, reverse the  << 
 24          * in the 2 misaligned fixup copy loop << 
 25          */                                    << 
 26                                                << 
 27         /* Return if nothing to do */          << 
 28         beq a0, a1, .Lreturn_from_memmove      << 
 29         beqz a2, .Lreturn_from_memmove         << 
 30                                                << 
 31         /*                                     << 
 32          * Register Uses                       << 
 33          *      Forward Copy: a1 - Index count << 
 34          *      Reverse Copy: a4 - Index count << 
 35          *      Forward Copy: t3 - Index count << 
 36          *      Reverse Copy: t4 - Index count << 
 37          *   Both Copy Modes: t5 - Inclusive f << 
 38          *   Both Copy Modes: t6 - Non-Inclusi << 
 39          *   Both Copy Modes: t0 - Link / Temp << 
 40          *   Both Copy Modes: t1 - Temporary f << 
 41          *   Both Copy Modes: t2 - Temporary f << 
 42          *   Both Copy Modes: a5 - dest to src << 
 43          *   Both Copy Modes: a6 - Shift ammou << 
 44          *   Both Copy Modes: a7 - Inverse Shi << 
 45          *   Both Copy Modes: a2 - Alternate b << 
 46          */                                    << 
 47                                                << 
 48         /*                                     << 
 49          * Solve for some register values now. << 
 50          * Byte copy does not need t5 or t6.   << 
 51          */                                    << 
 52         mv   t3, a0                            << 
 53         add  t4, a0, a2                        << 
 54         add  a4, a1, a2                        << 
 55                                                << 
 56         /*                                     << 
 57          * Byte copy if copying less than (2 * << 
 58          * cause problems with the bulk copy i << 
 59          * small enough not to bother.         << 
 60          */                                    << 
 61         andi t0, a2, -(2 * SZREG)              << 
 62         beqz t0, .Lbyte_copy                   << 
 63                                                << 
 64         /*                                     << 
 65          * Now solve for t5 and t6.            << 
 66          */                                    << 
 67         andi t5, t3, -SZREG                    << 
 68         andi t6, t4, -SZREG                    << 
 69         /*                                     << 
 70          * If dest(Register t3) rounded down t << 
 71          * aligned SZREG address, does not equ << 
 72          * to find the low-bound of SZREG alig << 
 73          * region.  Note that this could overs << 
 74          * region if n is less than SZREG.  Th << 
 75          * we always byte copy if n is less th << 
 76          * Otherwise, dest is already naturall << 
 77          */                                    << 
 78         beq  t5, t3, 1f                        << 
 79                 addi t5, t5, SZREG             << 
 80         1:                                     << 
 81                                                << 
 82         /*                                     << 
 83          * If the dest and src are co-aligned  << 
 84          * no need for the full rigmarole of a << 
 85          * Instead, do a simpler co-aligned co << 
 86          */                                    << 
 87         xor  t0, a0, a1                        << 
 88         andi t1, t0, (SZREG - 1)               << 
 89         beqz t1, .Lcoaligned_copy              << 
 90         /* Fall through to misaligned fixup co << 
 91                                                << 
 92 .Lmisaligned_fixup_copy:                       << 
 93         bltu a1, a0, .Lmisaligned_fixup_copy_r << 
 94                                                << 
 95 .Lmisaligned_fixup_copy_forward:               << 
 96         jal  t0, .Lbyte_copy_until_aligned_for << 
 97                                                << 
 98         andi a5, a1, (SZREG - 1) /* Find the a << 
 99         slli a6, a5, 3 /* Multiply by 8 to con << 
100         sub  a5, a1, t3 /* Find the difference << 
101         andi a1, a1, -SZREG /* Align the src p << 
102         addi a2, t6, SZREG /* The other breakp << 
103                                                << 
104         /*                                     << 
105          * Compute The Inverse Shift           << 
106          * a7 = XLEN - a6 = XLEN + -a6         << 
107          * 2s complement negation to find the  << 
108          * Add that to XLEN.  XLEN = SZREG * 8 << 
109          */                                    << 
110         not  a7, a6                            << 
111         addi a7, a7, (SZREG * 8 + 1)           << 
112                                                << 
113         /*                                     << 
114          * Fix Misalignment Copy Loop - Forwar << 
115          * load_val0 = load_ptr[0];            << 
116          * do {                                << 
117          *      load_val1 = load_ptr[1];       << 
118          *      store_ptr += 2;                << 
119          *      store_ptr[0 - 2] = (load_val0  << 
120          *                                     << 
121          *      if (store_ptr == {a2})         << 
122          *              break;                 << 
123          *                                     << 
124          *      load_val0 = load_ptr[2];       << 
125          *      load_ptr += 2;                 << 
126          *      store_ptr[1 - 2] = (load_val1  << 
127          *                                     << 
128          * } while (store_ptr != store_ptr_end << 
129          * store_ptr = store_ptr_end;          << 
130          */                                    << 
131                                                << 
132         REG_L t0, (0 * SZREG)(a1)              << 
133         1:                                     << 
134         REG_L t1, (1 * SZREG)(a1)              << 
135         addi  t3, t3, (2 * SZREG)              << 
136         srl   t0, t0, a6                       << 
137         sll   t2, t1, a7                       << 
138         or    t2, t0, t2                       << 
139         REG_S t2, ((0 * SZREG) - (2 * SZREG))( << 
140                                                << 
141         beq   t3, a2, 2f                       << 
142                                                << 
143         REG_L t0, (2 * SZREG)(a1)              << 
144         addi  a1, a1, (2 * SZREG)              << 
145         srl   t1, t1, a6                       << 
146         sll   t2, t0, a7                       << 
147         or    t2, t1, t2                       << 
148         REG_S t2, ((1 * SZREG) - (2 * SZREG))( << 
149                                                << 
150         bne   t3, t6, 1b                       << 
151         2:                                     << 
152         mv    t3, t6 /* Fix the dest pointer i << 
153                                                << 
154         add  a1, t3, a5 /* Restore the src poi << 
155         j .Lbyte_copy_forward /* Copy any rema << 
156                                                << 
157 .Lmisaligned_fixup_copy_reverse:               << 
158         jal  t0, .Lbyte_copy_until_aligned_rev << 
159                                                << 
160         andi a5, a4, (SZREG - 1) /* Find the a << 
161         slli a6, a5, 3 /* Multiply by 8 to con << 
162         sub  a5, a4, t4 /* Find the difference << 
163         andi a4, a4, -SZREG /* Align the src p << 
164         addi a2, t5, -SZREG /* The other break << 
165                                                << 
166         /*                                     << 
167          * Compute The Inverse Shift           << 
168          * a7 = XLEN - a6 = XLEN + -a6         << 
169          * 2s complement negation to find the  << 
170          * Add that to XLEN.  XLEN = SZREG * 8 << 
171          */                                    << 
172         not  a7, a6                            << 
173         addi a7, a7, (SZREG * 8 + 1)           << 
174                                                << 
175         /*                                     << 
176          * Fix Misalignment Copy Loop - Revers << 
177          * load_val1 = load_ptr[0];            << 
178          * do {                                << 
179          *      load_val0 = load_ptr[-1];      << 
180          *      store_ptr -= 2;                << 
181          *      store_ptr[1] = (load_val0 >> { << 
182          *                                     << 
183          *      if (store_ptr == {a2})         << 
184          *              break;                 << 
185          *                                     << 
186          *      load_val1 = load_ptr[-2];      << 
187          *      load_ptr -= 2;                 << 
188          *      store_ptr[0] = (load_val1 >> { << 
189          *                                     << 
190          * } while (store_ptr != store_ptr_end << 
191          * store_ptr = store_ptr_end;          << 
192          */                                    << 
193                                                << 
194         REG_L t1, ( 0 * SZREG)(a4)             << 
195         1:                                     << 
196         REG_L t0, (-1 * SZREG)(a4)             << 
197         addi  t4, t4, (-2 * SZREG)             << 
198         sll   t1, t1, a7                       << 
199         srl   t2, t0, a6                       << 
200         or    t2, t1, t2                       << 
201         REG_S t2, ( 1 * SZREG)(t4)             << 
202                                                << 
203         beq   t4, a2, 2f                       << 
204                                                << 
205         REG_L t1, (-2 * SZREG)(a4)             << 
206         addi  a4, a4, (-2 * SZREG)             << 
207         sll   t0, t0, a7                       << 
208         srl   t2, t1, a6                       << 
209         or    t2, t0, t2                       << 
210         REG_S t2, ( 0 * SZREG)(t4)             << 
211                                                << 
212         bne   t4, t5, 1b                       << 
213         2:                                     << 
214         mv    t4, t5 /* Fix the dest pointer i << 
215                                                << 
216         add  a4, t4, a5 /* Restore the src poi << 
217         j .Lbyte_copy_reverse /* Copy any rema << 
218                                                << 
219 /*                                             << 
220  * Simple copy loops for SZREG co-aligned memo << 
221  * These also make calls to do byte copies for << 
222  * data at their terminations.                 << 
223  */                                            << 
224 .Lcoaligned_copy:                              << 
225         bltu a1, a0, .Lcoaligned_copy_reverse  << 
226                                                << 
227 .Lcoaligned_copy_forward:                      << 
228         jal t0, .Lbyte_copy_until_aligned_forw << 
229                                                << 
230         1:                                     << 
231         REG_L t1, ( 0 * SZREG)(a1)             << 
232         addi  a1, a1, SZREG                    << 
233         addi  t3, t3, SZREG                    << 
234         REG_S t1, (-1 * SZREG)(t3)             << 
235         bne   t3, t6, 1b                       << 
236                                                << 
237         j .Lbyte_copy_forward /* Copy any rema << 
238                                                << 
239 .Lcoaligned_copy_reverse:                      << 
240         jal t0, .Lbyte_copy_until_aligned_reve << 
241                                                << 
242         1:                                     << 
243         REG_L t1, (-1 * SZREG)(a4)             << 
244         addi  a4, a4, -SZREG                   << 
245         addi  t4, t4, -SZREG                   << 
246         REG_S t1, ( 0 * SZREG)(t4)             << 
247         bne   t4, t5, 1b                       << 
248                                                << 
249         j .Lbyte_copy_reverse /* Copy any rema << 
250                                                << 
251 /*                                             << 
252  * These are basically sub-functions within th << 
253  * are used to byte copy until the dest pointe << 
254  * At which point, a bulk copy method can be u << 
255  * calling code.  These work on the same regis << 
256  * copy loops.  Therefore, the register values << 
257  * up from where they were left and we avoid c << 
258  * without any overhead except the call in and << 
259  */                                            << 
260 .Lbyte_copy_until_aligned_forward:             << 
261         beq  t3, t5, 2f                        << 
262         1:                                     << 
263         lb   t1,  0(a1)                        << 
264         addi a1, a1, 1                         << 
265         addi t3, t3, 1                         << 
266         sb   t1, -1(t3)                        << 
267         bne  t3, t5, 1b                        << 
268         2:                                     << 
269         jalr zero, 0x0(t0) /* Return to multib << 
270                                                << 
271 .Lbyte_copy_until_aligned_reverse:             << 
272         beq  t4, t6, 2f                        << 
273         1:                                     << 
274         lb   t1, -1(a4)                        << 
275         addi a4, a4, -1                        << 
276         addi t4, t4, -1                        << 
277         sb   t1,  0(t4)                        << 
278         bne  t4, t6, 1b                        << 
279         2:                                     << 
280         jalr zero, 0x0(t0) /* Return to multib << 
281                                                << 
282 /*                                             << 
283  * Simple byte copy loops.                     << 
284  * These will byte copy until they reach the e << 
285  * At that point, they will call to return fro << 
286  */                                            << 
287 .Lbyte_copy:                                   << 
288         bltu a1, a0, .Lbyte_copy_reverse       << 
289                                                << 
290 .Lbyte_copy_forward:                           << 
291         beq  t3, t4, 2f                        << 
292         1:                                     << 
293         lb   t1,  0(a1)                        << 
294         addi a1, a1, 1                         << 
295         addi t3, t3, 1                         << 
296         sb   t1, -1(t3)                        << 
297         bne  t3, t4, 1b                        << 
298         2:                                     << 
299         ret                                    << 
300                                                << 
301 .Lbyte_copy_reverse:                           << 
302         beq  t4, t3, 2f                        << 
303         1:                                     << 
304         lb   t1, -1(a4)                        << 
305         addi a4, a4, -1                        << 
306         addi t4, t4, -1                        << 
307         sb   t1,  0(t4)                        << 
308         bne  t4, t3, 1b                        << 
309         2:                                     << 
310                                                << 
311 .Lreturn_from_memmove:                         << 
312         ret                                    << 
313                                                << 
314 SYM_FUNC_END(__memmove)                        << 
315 SYM_FUNC_ALIAS_WEAK(memmove, __memmove)        << 
316 SYM_FUNC_ALIAS(__pi_memmove, __memmove)        << 
317 SYM_FUNC_ALIAS(__pi___memmove, __memmove)      << 
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php