~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/arch/x86/lib/memcpy_64.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0-only */
  2 /* Copyright 2002 Andi Kleen */
  3 
  4 #include <linux/export.h>
  5 #include <linux/linkage.h>
  6 #include <asm/errno.h>
  7 #include <asm/cpufeatures.h>
  8 #include <asm/alternative.h>
  9 
 10 .section .noinstr.text, "ax"
 11 
 12 /*
 13  * memcpy - Copy a memory block.
 14  *
 15  * Input:
 16  *  rdi destination
 17  *  rsi source
 18  *  rdx count
 19  *
 20  * Output:
 21  * rax original destination
 22  *
 23  * The FSRM alternative should be done inline (avoiding the call and
 24  * the disgusting return handling), but that would require some help
 25  * from the compiler for better calling conventions.
 26  *
 27  * The 'rep movsb' itself is small enough to replace the call, but the
 28  * two register moves blow up the code. And one of them is "needed"
 29  * only for the return value that is the same as the source input,
 30  * which the compiler could/should do much better anyway.
 31  */
 32 SYM_TYPED_FUNC_START(__memcpy)
 33         ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM
 34 
 35         movq %rdi, %rax
 36         movq %rdx, %rcx
 37         rep movsb
 38         RET
 39 SYM_FUNC_END(__memcpy)
 40 EXPORT_SYMBOL(__memcpy)
 41 
 42 SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy)
 43 EXPORT_SYMBOL(memcpy)
 44 
 45 SYM_FUNC_START_LOCAL(memcpy_orig)
 46         movq %rdi, %rax
 47 
 48         cmpq $0x20, %rdx
 49         jb .Lhandle_tail
 50 
 51         /*
 52          * We check whether memory false dependence could occur,
 53          * then jump to corresponding copy mode.
 54          */
 55         cmp  %dil, %sil
 56         jl .Lcopy_backward
 57         subq $0x20, %rdx
 58 .Lcopy_forward_loop:
 59         subq $0x20,     %rdx
 60 
 61         /*
 62          * Move in blocks of 4x8 bytes:
 63          */
 64         movq 0*8(%rsi), %r8
 65         movq 1*8(%rsi), %r9
 66         movq 2*8(%rsi), %r10
 67         movq 3*8(%rsi), %r11
 68         leaq 4*8(%rsi), %rsi
 69 
 70         movq %r8,       0*8(%rdi)
 71         movq %r9,       1*8(%rdi)
 72         movq %r10,      2*8(%rdi)
 73         movq %r11,      3*8(%rdi)
 74         leaq 4*8(%rdi), %rdi
 75         jae  .Lcopy_forward_loop
 76         addl $0x20,     %edx
 77         jmp  .Lhandle_tail
 78 
 79 .Lcopy_backward:
 80         /*
 81          * Calculate copy position to tail.
 82          */
 83         addq %rdx,      %rsi
 84         addq %rdx,      %rdi
 85         subq $0x20,     %rdx
 86         /*
 87          * At most 3 ALU operations in one cycle,
 88          * so append NOPS in the same 16 bytes trunk.
 89          */
 90         .p2align 4
 91 .Lcopy_backward_loop:
 92         subq $0x20,     %rdx
 93         movq -1*8(%rsi),        %r8
 94         movq -2*8(%rsi),        %r9
 95         movq -3*8(%rsi),        %r10
 96         movq -4*8(%rsi),        %r11
 97         leaq -4*8(%rsi),        %rsi
 98         movq %r8,               -1*8(%rdi)
 99         movq %r9,               -2*8(%rdi)
100         movq %r10,              -3*8(%rdi)
101         movq %r11,              -4*8(%rdi)
102         leaq -4*8(%rdi),        %rdi
103         jae  .Lcopy_backward_loop
104 
105         /*
106          * Calculate copy position to head.
107          */
108         addl $0x20,     %edx
109         subq %rdx,      %rsi
110         subq %rdx,      %rdi
111 .Lhandle_tail:
112         cmpl $16,       %edx
113         jb   .Lless_16bytes
114 
115         /*
116          * Move data from 16 bytes to 31 bytes.
117          */
118         movq 0*8(%rsi), %r8
119         movq 1*8(%rsi), %r9
120         movq -2*8(%rsi, %rdx),  %r10
121         movq -1*8(%rsi, %rdx),  %r11
122         movq %r8,       0*8(%rdi)
123         movq %r9,       1*8(%rdi)
124         movq %r10,      -2*8(%rdi, %rdx)
125         movq %r11,      -1*8(%rdi, %rdx)
126         RET
127         .p2align 4
128 .Lless_16bytes:
129         cmpl $8,        %edx
130         jb   .Lless_8bytes
131         /*
132          * Move data from 8 bytes to 15 bytes.
133          */
134         movq 0*8(%rsi), %r8
135         movq -1*8(%rsi, %rdx),  %r9
136         movq %r8,       0*8(%rdi)
137         movq %r9,       -1*8(%rdi, %rdx)
138         RET
139         .p2align 4
140 .Lless_8bytes:
141         cmpl $4,        %edx
142         jb   .Lless_3bytes
143 
144         /*
145          * Move data from 4 bytes to 7 bytes.
146          */
147         movl (%rsi), %ecx
148         movl -4(%rsi, %rdx), %r8d
149         movl %ecx, (%rdi)
150         movl %r8d, -4(%rdi, %rdx)
151         RET
152         .p2align 4
153 .Lless_3bytes:
154         subl $1, %edx
155         jb .Lend
156         /*
157          * Move data from 1 bytes to 3 bytes.
158          */
159         movzbl (%rsi), %ecx
160         jz .Lstore_1byte
161         movzbq 1(%rsi), %r8
162         movzbq (%rsi, %rdx), %r9
163         movb %r8b, 1(%rdi)
164         movb %r9b, (%rdi, %rdx)
165 .Lstore_1byte:
166         movb %cl, (%rdi)
167 
168 .Lend:
169         RET
170 SYM_FUNC_END(memcpy_orig)
171 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php