~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/lib/memcpy_64.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0-only */
  2 /* Copyright 2002 Andi Kleen */
  3 
  4 #include <linux/export.h>
  5 #include <linux/linkage.h>
  6 #include <linux/cfi_types.h>
  7 #include <asm/errno.h>
  8 #include <asm/cpufeatures.h>
  9 #include <asm/alternative.h>
 10 
 11 .section .noinstr.text, "ax"
 12 
 13 /*
 14  * memcpy - Copy a memory block.
 15  *
 16  * Input:
 17  *  rdi destination
 18  *  rsi source
 19  *  rdx count
 20  *
 21  * Output:
 22  * rax original destination
 23  *
 24  * The FSRM alternative should be done inline (avoiding the call and
 25  * the disgusting return handling), but that would require some help
 26  * from the compiler for better calling conventions.
 27  *
 28  * The 'rep movsb' itself is small enough to replace the call, but the
 29  * two register moves blow up the code. And one of them is "needed"
 30  * only for the return value that is the same as the source input,
 31  * which the compiler could/should do much better anyway.
 32  */
 33 SYM_TYPED_FUNC_START(__memcpy)
 34         ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM
 35 
 36         movq %rdi, %rax
 37         movq %rdx, %rcx
 38         rep movsb
 39         RET
 40 SYM_FUNC_END(__memcpy)
 41 EXPORT_SYMBOL(__memcpy)
 42 
 43 SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy)
 44 EXPORT_SYMBOL(memcpy)
 45 
 46 SYM_FUNC_START_LOCAL(memcpy_orig)
 47         movq %rdi, %rax
 48 
 49         cmpq $0x20, %rdx
 50         jb .Lhandle_tail
 51 
 52         /*
 53          * We check whether memory false dependence could occur,
 54          * then jump to corresponding copy mode.
 55          */
 56         cmp  %dil, %sil
 57         jl .Lcopy_backward
 58         subq $0x20, %rdx
 59 .Lcopy_forward_loop:
 60         subq $0x20,     %rdx
 61 
 62         /*
 63          * Move in blocks of 4x8 bytes:
 64          */
 65         movq 0*8(%rsi), %r8
 66         movq 1*8(%rsi), %r9
 67         movq 2*8(%rsi), %r10
 68         movq 3*8(%rsi), %r11
 69         leaq 4*8(%rsi), %rsi
 70 
 71         movq %r8,       0*8(%rdi)
 72         movq %r9,       1*8(%rdi)
 73         movq %r10,      2*8(%rdi)
 74         movq %r11,      3*8(%rdi)
 75         leaq 4*8(%rdi), %rdi
 76         jae  .Lcopy_forward_loop
 77         addl $0x20,     %edx
 78         jmp  .Lhandle_tail
 79 
 80 .Lcopy_backward:
 81         /*
 82          * Calculate copy position to tail.
 83          */
 84         addq %rdx,      %rsi
 85         addq %rdx,      %rdi
 86         subq $0x20,     %rdx
 87         /*
 88          * At most 3 ALU operations in one cycle,
 89          * so append NOPS in the same 16 bytes trunk.
 90          */
 91         .p2align 4
 92 .Lcopy_backward_loop:
 93         subq $0x20,     %rdx
 94         movq -1*8(%rsi),        %r8
 95         movq -2*8(%rsi),        %r9
 96         movq -3*8(%rsi),        %r10
 97         movq -4*8(%rsi),        %r11
 98         leaq -4*8(%rsi),        %rsi
 99         movq %r8,               -1*8(%rdi)
100         movq %r9,               -2*8(%rdi)
101         movq %r10,              -3*8(%rdi)
102         movq %r11,              -4*8(%rdi)
103         leaq -4*8(%rdi),        %rdi
104         jae  .Lcopy_backward_loop
105 
106         /*
107          * Calculate copy position to head.
108          */
109         addl $0x20,     %edx
110         subq %rdx,      %rsi
111         subq %rdx,      %rdi
112 .Lhandle_tail:
113         cmpl $16,       %edx
114         jb   .Lless_16bytes
115 
116         /*
117          * Move data from 16 bytes to 31 bytes.
118          */
119         movq 0*8(%rsi), %r8
120         movq 1*8(%rsi), %r9
121         movq -2*8(%rsi, %rdx),  %r10
122         movq -1*8(%rsi, %rdx),  %r11
123         movq %r8,       0*8(%rdi)
124         movq %r9,       1*8(%rdi)
125         movq %r10,      -2*8(%rdi, %rdx)
126         movq %r11,      -1*8(%rdi, %rdx)
127         RET
128         .p2align 4
129 .Lless_16bytes:
130         cmpl $8,        %edx
131         jb   .Lless_8bytes
132         /*
133          * Move data from 8 bytes to 15 bytes.
134          */
135         movq 0*8(%rsi), %r8
136         movq -1*8(%rsi, %rdx),  %r9
137         movq %r8,       0*8(%rdi)
138         movq %r9,       -1*8(%rdi, %rdx)
139         RET
140         .p2align 4
141 .Lless_8bytes:
142         cmpl $4,        %edx
143         jb   .Lless_3bytes
144 
145         /*
146          * Move data from 4 bytes to 7 bytes.
147          */
148         movl (%rsi), %ecx
149         movl -4(%rsi, %rdx), %r8d
150         movl %ecx, (%rdi)
151         movl %r8d, -4(%rdi, %rdx)
152         RET
153         .p2align 4
154 .Lless_3bytes:
155         subl $1, %edx
156         jb .Lend
157         /*
158          * Move data from 1 bytes to 3 bytes.
159          */
160         movzbl (%rsi), %ecx
161         jz .Lstore_1byte
162         movzbq 1(%rsi), %r8
163         movzbq (%rsi, %rdx), %r9
164         movb %r8b, 1(%rdi)
165         movb %r9b, (%rdi, %rdx)
166 .Lstore_1byte:
167         movb %cl, (%rdi)
168 
169 .Lend:
170         RET
171 SYM_FUNC_END(memcpy_orig)
172 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php