~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/sparc/lib/memcpy.S

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0 */
  2 /* memcpy.S: Sparc optimized memcpy and memmove code
  3  * Hand optimized from GNU libc's memcpy and memmove
  4  * Copyright (C) 1991,1996 Free Software Foundation
  5  * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi)
  6  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
  7  * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
  8  * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  9  */
 10 
 11 #include <linux/export.h>
 12 
 13 #define FUNC(x)                 \
 14         .globl  x;              \
 15         .type   x,@function;    \
 16         .align  4;              \
 17 x:
 18 
 19 /* Both these macros have to start with exactly the same insn */
 20 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
 21         ldd     [%src + (offset) + 0x00], %t0; \
 22         ldd     [%src + (offset) + 0x08], %t2; \
 23         ldd     [%src + (offset) + 0x10], %t4; \
 24         ldd     [%src + (offset) + 0x18], %t6; \
 25         st      %t0, [%dst + (offset) + 0x00]; \
 26         st      %t1, [%dst + (offset) + 0x04]; \
 27         st      %t2, [%dst + (offset) + 0x08]; \
 28         st      %t3, [%dst + (offset) + 0x0c]; \
 29         st      %t4, [%dst + (offset) + 0x10]; \
 30         st      %t5, [%dst + (offset) + 0x14]; \
 31         st      %t6, [%dst + (offset) + 0x18]; \
 32         st      %t7, [%dst + (offset) + 0x1c];
 33 
 34 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
 35         ldd     [%src + (offset) + 0x00], %t0; \
 36         ldd     [%src + (offset) + 0x08], %t2; \
 37         ldd     [%src + (offset) + 0x10], %t4; \
 38         ldd     [%src + (offset) + 0x18], %t6; \
 39         std     %t0, [%dst + (offset) + 0x00]; \
 40         std     %t2, [%dst + (offset) + 0x08]; \
 41         std     %t4, [%dst + (offset) + 0x10]; \
 42         std     %t6, [%dst + (offset) + 0x18];
 43 
 44 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
 45         ldd     [%src - (offset) - 0x10], %t0; \
 46         ldd     [%src - (offset) - 0x08], %t2; \
 47         st      %t0, [%dst - (offset) - 0x10]; \
 48         st      %t1, [%dst - (offset) - 0x0c]; \
 49         st      %t2, [%dst - (offset) - 0x08]; \
 50         st      %t3, [%dst - (offset) - 0x04];
 51 
 52 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
 53         ldd     [%src - (offset) - 0x10], %t0; \
 54         ldd     [%src - (offset) - 0x08], %t2; \
 55         std     %t0, [%dst - (offset) - 0x10]; \
 56         std     %t2, [%dst - (offset) - 0x08];
 57 
 58 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
 59         ldub    [%src - (offset) - 0x02], %t0; \
 60         ldub    [%src - (offset) - 0x01], %t1; \
 61         stb     %t0, [%dst - (offset) - 0x02]; \
 62         stb     %t1, [%dst - (offset) - 0x01];
 63 
 64         .text
 65         .align  4
 66 
 67 FUNC(memmove)
 68 EXPORT_SYMBOL(memmove)
 69         cmp             %o0, %o1
 70         mov             %o0, %g7
 71         bleu            9f
 72          sub            %o0, %o1, %o4
 73 
 74         add             %o1, %o2, %o3
 75         cmp             %o3, %o0
 76         bleu            0f
 77          andcc          %o4, 3, %o5
 78 
 79         add             %o1, %o2, %o1
 80         add             %o0, %o2, %o0
 81         sub             %o1, 1, %o1
 82         sub             %o0, 1, %o0
 83         
 84 1:      /* reverse_bytes */
 85 
 86         ldub            [%o1], %o4
 87         subcc           %o2, 1, %o2
 88         stb             %o4, [%o0]
 89         sub             %o1, 1, %o1
 90         bne             1b
 91          sub            %o0, 1, %o0
 92 
 93         retl
 94          mov            %g7, %o0
 95 
 96 /* NOTE: This code is executed just for the cases,
 97          where %src (=%o1) & 3 is != 0.
 98          We need to align it to 4. So, for (%src & 3)
 99          1 we need to do ldub,lduh
100          2 lduh
101          3 just ldub
102          so even if it looks weird, the branches
103          are correct here. -jj
104  */
105 78:     /* dword_align */
106 
107         andcc           %o1, 1, %g0
108         be              4f
109          andcc          %o1, 2, %g0
110 
111         ldub            [%o1], %g2
112         add             %o1, 1, %o1
113         stb             %g2, [%o0]
114         sub             %o2, 1, %o2
115         bne             3f
116          add            %o0, 1, %o0
117 4:
118         lduh            [%o1], %g2
119         add             %o1, 2, %o1
120         sth             %g2, [%o0]
121         sub             %o2, 2, %o2
122         b               3f
123          add            %o0, 2, %o0
124 
125 FUNC(memcpy)    /* %o0=dst %o1=src %o2=len */
126 EXPORT_SYMBOL(memcpy)
127 
128         sub             %o0, %o1, %o4
129         mov             %o0, %g7
130 9:
131         andcc           %o4, 3, %o5
132 0:
133         bne             86f
134          cmp            %o2, 15
135 
136         bleu            90f
137          andcc          %o1, 3, %g0
138 
139         bne             78b
140 3:
141          andcc          %o1, 4, %g0
142 
143         be              2f
144          mov            %o2, %g1
145 
146         ld              [%o1], %o4
147         sub             %g1, 4, %g1
148         st              %o4, [%o0]
149         add             %o1, 4, %o1
150         add             %o0, 4, %o0
151 2:
152         andcc           %g1, 0xffffff80, %g0
153         be              3f
154          andcc          %o0, 4, %g0
155 
156         be              82f + 4
157 5:
158         MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
159         MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
160         MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
161         MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
162         sub             %g1, 128, %g1
163         add             %o1, 128, %o1
164         cmp             %g1, 128
165         bge             5b
166          add            %o0, 128, %o0
167 3:
168         andcc           %g1, 0x70, %g4
169         be              80f
170          andcc          %g1, 8, %g0
171 
172         sethi           %hi(80f), %o5
173         srl             %g4, 1, %o4
174         add             %g4, %o4, %o4
175         add             %o1, %g4, %o1
176         sub             %o5, %o4, %o5
177         jmpl            %o5 + %lo(80f), %g0
178          add            %o0, %g4, %o0
179 
180 79:     /* memcpy_table */
181 
182         MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
183         MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
184         MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
185         MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
186         MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
187         MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
188         MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
189 
190 80:     /* memcpy_table_end */
191         be              81f
192          andcc          %g1, 4, %g0
193 
194         ldd             [%o1], %g2
195         add             %o0, 8, %o0
196         st              %g2, [%o0 - 0x08]
197         add             %o1, 8, %o1
198         st              %g3, [%o0 - 0x04]
199 
200 81:     /* memcpy_last7 */
201 
202         be              1f
203          andcc          %g1, 2, %g0
204 
205         ld              [%o1], %g2
206         add             %o1, 4, %o1
207         st              %g2, [%o0]
208         add             %o0, 4, %o0
209 1:
210         be              1f
211          andcc          %g1, 1, %g0
212 
213         lduh            [%o1], %g2
214         add             %o1, 2, %o1
215         sth             %g2, [%o0]
216         add             %o0, 2, %o0
217 1:
218         be              1f
219          nop
220 
221         ldub            [%o1], %g2
222         stb             %g2, [%o0]
223 1:
224         retl
225          mov            %g7, %o0
226 
227 82:     /* ldd_std */
228         MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
229         MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
230         MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
231         MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
232         subcc           %g1, 128, %g1
233         add             %o1, 128, %o1
234         cmp             %g1, 128
235         bge             82b
236          add            %o0, 128, %o0
237 
238         andcc           %g1, 0x70, %g4
239         be              84f
240          andcc          %g1, 8, %g0
241 
242         sethi           %hi(84f), %o5
243         add             %o1, %g4, %o1
244         sub             %o5, %g4, %o5
245         jmpl            %o5 + %lo(84f), %g0
246          add            %o0, %g4, %o0
247 
248 83:     /* amemcpy_table */
249 
250         MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
251         MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
252         MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
253         MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
254         MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
255         MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
256         MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
257 
258 84:     /* amemcpy_table_end */
259         be              85f
260          andcc          %g1, 4, %g0
261 
262         ldd             [%o1], %g2
263         add             %o0, 8, %o0
264         std             %g2, [%o0 - 0x08]
265         add             %o1, 8, %o1
266 85:     /* amemcpy_last7 */
267         be              1f
268          andcc          %g1, 2, %g0
269 
270         ld              [%o1], %g2
271         add             %o1, 4, %o1
272         st              %g2, [%o0]
273         add             %o0, 4, %o0
274 1:
275         be              1f
276          andcc          %g1, 1, %g0
277 
278         lduh            [%o1], %g2
279         add             %o1, 2, %o1
280         sth             %g2, [%o0]
281         add             %o0, 2, %o0
282 1:
283         be              1f
284          nop
285 
286         ldub            [%o1], %g2
287         stb             %g2, [%o0]
288 1:
289         retl
290          mov            %g7, %o0
291 
292 86:     /* non_aligned */
293         cmp             %o2, 6
294         bleu            88f
295          nop
296 
297         save            %sp, -96, %sp
298         andcc           %i0, 3, %g0
299         be              61f
300          andcc          %i0, 1, %g0
301         be              60f
302          andcc          %i0, 2, %g0
303 
304         ldub            [%i1], %g5
305         add             %i1, 1, %i1
306         stb             %g5, [%i0]
307         sub             %i2, 1, %i2
308         bne             61f
309          add            %i0, 1, %i0
310 60:
311         ldub            [%i1], %g3
312         add             %i1, 2, %i1
313         stb             %g3, [%i0]
314         sub             %i2, 2, %i2
315         ldub            [%i1 - 1], %g3
316         add             %i0, 2, %i0
317         stb             %g3, [%i0 - 1]
318 61:
319         and             %i1, 3, %g2
320         and             %i2, 0xc, %g3
321         and             %i1, -4, %i1
322         cmp             %g3, 4
323         sll             %g2, 3, %g4
324         mov             32, %g2
325         be              4f
326          sub            %g2, %g4, %l0
327         
328         blu             3f
329          cmp            %g3, 0x8
330 
331         be              2f
332          srl            %i2, 2, %g3
333 
334         ld              [%i1], %i3
335         add             %i0, -8, %i0
336         ld              [%i1 + 4], %i4
337         b               8f
338          add            %g3, 1, %g3
339 2:
340         ld              [%i1], %i4
341         add             %i0, -12, %i0
342         ld              [%i1 + 4], %i5
343         add             %g3, 2, %g3
344         b               9f
345          add            %i1, -4, %i1
346 3:
347         ld              [%i1], %g1
348         add             %i0, -4, %i0
349         ld              [%i1 + 4], %i3
350         srl             %i2, 2, %g3
351         b               7f
352          add            %i1, 4, %i1
353 4:
354         ld              [%i1], %i5
355         cmp             %i2, 7
356         ld              [%i1 + 4], %g1
357         srl             %i2, 2, %g3
358         bleu            10f
359          add            %i1, 8, %i1
360 
361         ld              [%i1], %i3
362         add             %g3, -1, %g3
363 5:
364         sll             %i5, %g4, %g2
365         srl             %g1, %l0, %g5
366         or              %g2, %g5, %g2
367         st              %g2, [%i0]
368 7:
369         ld              [%i1 + 4], %i4
370         sll             %g1, %g4, %g2
371         srl             %i3, %l0, %g5
372         or              %g2, %g5, %g2
373         st              %g2, [%i0 + 4]
374 8:
375         ld              [%i1 + 8], %i5
376         sll             %i3, %g4, %g2
377         srl             %i4, %l0, %g5
378         or              %g2, %g5, %g2
379         st              %g2, [%i0 + 8]
380 9:
381         ld              [%i1 + 12], %g1
382         sll             %i4, %g4, %g2
383         srl             %i5, %l0, %g5
384         addcc           %g3, -4, %g3
385         or              %g2, %g5, %g2
386         add             %i1, 16, %i1
387         st              %g2, [%i0 + 12]
388         add             %i0, 16, %i0
389         bne,a           5b
390          ld             [%i1], %i3
391 10:
392         sll             %i5, %g4, %g2
393         srl             %g1, %l0, %g5
394         srl             %l0, 3, %g3
395         or              %g2, %g5, %g2
396         sub             %i1, %g3, %i1
397         andcc           %i2, 2, %g0
398         st              %g2, [%i0]
399         be              1f
400          andcc          %i2, 1, %g0
401 
402         ldub            [%i1], %g2
403         add             %i1, 2, %i1
404         stb             %g2, [%i0 + 4]
405         add             %i0, 2, %i0
406         ldub            [%i1 - 1], %g2
407         stb             %g2, [%i0 + 3]
408 1:
409         be              1f
410          nop
411         ldub            [%i1], %g2
412         stb             %g2, [%i0 + 4]
413 1:
414         ret
415          restore        %g7, %g0, %o0
416 
417 88:     /* short_end */
418 
419         and             %o2, 0xe, %o3
420 20:
421         sethi           %hi(89f), %o5
422         sll             %o3, 3, %o4
423         add             %o0, %o3, %o0
424         sub             %o5, %o4, %o5
425         add             %o1, %o3, %o1
426         jmpl            %o5 + %lo(89f), %g0
427          andcc          %o2, 1, %g0
428 
429         MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
430         MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
431         MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
432         MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
433         MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
434         MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
435         MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
436 
437 89:     /* short_table_end */
438 
439         be              1f
440          nop
441 
442         ldub            [%o1], %g2
443         stb             %g2, [%o0]
444 1:
445         retl
446          mov            %g7, %o0
447 
448 90:     /* short_aligned_end */
449         bne             88b
450          andcc          %o2, 8, %g0
451 
452         be              1f
453          andcc          %o2, 4, %g0
454 
455         ld              [%o1 + 0x00], %g2
456         ld              [%o1 + 0x04], %g3
457         add             %o1, 8, %o1
458         st              %g2, [%o0 + 0x00]
459         st              %g3, [%o0 + 0x04]
460         add             %o0, 8, %o0
461 1:
462         b               81b
463          mov            %o2, %g1

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php