~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/arch/x86/lib/memset_64.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0 */
  2 /* Copyright 2002 Andi Kleen, SuSE Labs */
  3 
  4 #include <linux/export.h>
  5 #include <linux/linkage.h>
  6 #include <asm/cpufeatures.h>
  7 #include <asm/alternative.h>
  8 
  9 .section .noinstr.text, "ax"
 10 
 11 /*
 12  * ISO C memset - set a memory block to a byte value. This function uses fast
 13  * string to get better performance than the original function. The code is
 14  * simpler and shorter than the original function as well.
 15  *
 16  * rdi   destination
 17  * rsi   value (char)
 18  * rdx   count (bytes)
 19  *
 20  * rax   original destination
 21  *
 22  * The FSRS alternative should be done inline (avoiding the call and
 23  * the disgusting return handling), but that would require some help
 24  * from the compiler for better calling conventions.
 25  *
 26  * The 'rep stosb' itself is small enough to replace the call, but all
 27  * the register moves blow up the code. And two of them are "needed"
 28  * only for the return value that is the same as the source input,
 29  * which the compiler could/should do much better anyway.
 30  */
 31 SYM_FUNC_START(__memset)
 32         ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS
 33 
 34         movq %rdi,%r9
 35         movb %sil,%al
 36         movq %rdx,%rcx
 37         rep stosb
 38         movq %r9,%rax
 39         RET
 40 SYM_FUNC_END(__memset)
 41 EXPORT_SYMBOL(__memset)
 42 
 43 SYM_FUNC_ALIAS_MEMFUNC(memset, __memset)
 44 EXPORT_SYMBOL(memset)
 45 
 46 SYM_FUNC_START_LOCAL(memset_orig)
 47         movq %rdi,%r10
 48 
 49         /* expand byte value  */
 50         movzbl %sil,%ecx
 51         movabs $0x0101010101010101,%rax
 52         imulq  %rcx,%rax
 53 
 54         /* align dst */
 55         movl  %edi,%r9d
 56         andl  $7,%r9d
 57         jnz  .Lbad_alignment
 58 .Lafter_bad_alignment:
 59 
 60         movq  %rdx,%rcx
 61         shrq  $6,%rcx
 62         jz       .Lhandle_tail
 63 
 64         .p2align 4
 65 .Lloop_64:
 66         decq  %rcx
 67         movq  %rax,(%rdi)
 68         movq  %rax,8(%rdi)
 69         movq  %rax,16(%rdi)
 70         movq  %rax,24(%rdi)
 71         movq  %rax,32(%rdi)
 72         movq  %rax,40(%rdi)
 73         movq  %rax,48(%rdi)
 74         movq  %rax,56(%rdi)
 75         leaq  64(%rdi),%rdi
 76         jnz    .Lloop_64
 77 
 78         /* Handle tail in loops. The loops should be faster than hard
 79            to predict jump tables. */
 80         .p2align 4
 81 .Lhandle_tail:
 82         movl    %edx,%ecx
 83         andl    $63&(~7),%ecx
 84         jz              .Lhandle_7
 85         shrl    $3,%ecx
 86         .p2align 4
 87 .Lloop_8:
 88         decl   %ecx
 89         movq  %rax,(%rdi)
 90         leaq  8(%rdi),%rdi
 91         jnz    .Lloop_8
 92 
 93 .Lhandle_7:
 94         andl    $7,%edx
 95         jz      .Lende
 96         .p2align 4
 97 .Lloop_1:
 98         decl    %edx
 99         movb    %al,(%rdi)
100         leaq    1(%rdi),%rdi
101         jnz     .Lloop_1
102 
103 .Lende:
104         movq    %r10,%rax
105         RET
106 
107 .Lbad_alignment:
108         cmpq $7,%rdx
109         jbe     .Lhandle_7
110         movq %rax,(%rdi)        /* unaligned store */
111         movq $8,%r8
112         subq %r9,%r8
113         addq %r8,%rdi
114         subq %r8,%rdx
115         jmp .Lafter_bad_alignment
116 .Lfinal:
117 SYM_FUNC_END(memset_orig)

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php