~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/loongarch/lib/memset.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/loongarch/lib/memset.S (Architecture i386) and /arch/sparc/lib/memset.S (Architecture sparc)


  1 /* SPDX-License-Identifier: GPL-2.0 */              1 /* SPDX-License-Identifier: GPL-2.0 */
  2 /*                                             !!   2 /* linux/arch/sparc/lib/memset.S: Sparc optimized memset, bzero and clear_user code
  3  * Copyright (C) 2020-2022 Loongson Technology !!   3  * Copyright (C) 1991,1996 Free Software Foundation
  4  */                                            !!   4  * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  5                                                !!   5  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
  6 #include <linux/export.h>                      << 
  7 #include <asm/alternative-asm.h>               << 
  8 #include <asm/asm.h>                           << 
  9 #include <asm/asmmacro.h>                      << 
 10 #include <asm/cpu.h>                           << 
 11 #include <asm/regdef.h>                        << 
 12 #include <asm/unwind_hints.h>                  << 
 13                                                << 
 14 .macro fill_to_64 r0                           << 
 15         bstrins.d \r0, \r0, 15, 8              << 
 16         bstrins.d \r0, \r0, 31, 16             << 
 17         bstrins.d \r0, \r0, 63, 32             << 
 18 .endm                                          << 
 19                                                << 
 20 .section .noinstr.text, "ax"                   << 
 21                                                << 
 22 SYM_FUNC_START(memset)                         << 
 23         /*                                     << 
 24          * Some CPUs support hardware unaligne << 
 25          */                                    << 
 26         ALTERNATIVE     "b __memset_generic",  << 
 27                         "b __memset_fast", CPU << 
 28 SYM_FUNC_END(memset)                           << 
 29 SYM_FUNC_ALIAS(__memset, memset)               << 
 30                                                << 
 31 EXPORT_SYMBOL(memset)                          << 
 32 EXPORT_SYMBOL(__memset)                        << 
 33                                                << 
 34 _ASM_NOKPROBE(memset)                          << 
 35 _ASM_NOKPROBE(__memset)                        << 
 36                                                << 
 37 /*                                             << 
 38  * void *__memset_generic(void *s, int c, size << 
 39  *                                                  6  *
 40  * a0: s                                       !!   7  * Calls to memset returns initial %o0. Calls to bzero returns 0, if ok, and
 41  * a1: c                                       !!   8  * number of bytes not yet set if exception occurs and we were called as
 42  * a2: n                                       !!   9  * clear_user.
 43  */                                                10  */
 44 SYM_FUNC_START(__memset_generic)               << 
 45         move    a3, a0                         << 
 46         beqz    a2, 2f                         << 
 47                                                << 
 48 1:      st.b    a1, a0, 0                      << 
 49         addi.d  a0, a0, 1                      << 
 50         addi.d  a2, a2, -1                     << 
 51         bgt     a2, zero, 1b                   << 
 52                                                << 
 53 2:      move    a0, a3                         << 
 54         jr      ra                             << 
 55 SYM_FUNC_END(__memset_generic)                 << 
 56 _ASM_NOKPROBE(__memset_generic)                << 
 57                                                << 
 58 /*                                             << 
 59  * void *__memset_fast(void *s, int c, size_t  << 
 60  *                                             << 
 61  * a0: s                                       << 
 62  * a1: c                                       << 
 63  * a2: n                                       << 
 64  */                                            << 
 65 SYM_FUNC_START(__memset_fast)                  << 
 66         /* fill a1 to 64 bits */               << 
 67         fill_to_64 a1                          << 
 68                                                << 
 69         sltui   t0, a2, 9                      << 
 70         bnez    t0, .Lsmall                    << 
 71                                                << 
 72         add.d   a2, a0, a2                     << 
 73         st.d    a1, a0, 0                      << 
 74                                                << 
 75         /* align up address */                 << 
 76         addi.d  a3, a0, 8                      << 
 77         bstrins.d       a3, zero, 2, 0         << 
 78                                                << 
 79         addi.d  a4, a2, -64                    << 
 80         bgeu    a3, a4, .Llt64                 << 
 81                                                << 
 82         /* set 64 bytes at a time */           << 
 83 .Lloop64:                                      << 
 84         st.d    a1, a3, 0                      << 
 85         st.d    a1, a3, 8                      << 
 86         st.d    a1, a3, 16                     << 
 87         st.d    a1, a3, 24                     << 
 88         st.d    a1, a3, 32                     << 
 89         st.d    a1, a3, 40                     << 
 90         st.d    a1, a3, 48                     << 
 91         st.d    a1, a3, 56                     << 
 92         addi.d  a3, a3, 64                     << 
 93         bltu    a3, a4, .Lloop64               << 
 94                                                << 
 95         /* set the remaining bytes */          << 
 96 .Llt64:                                        << 
 97         addi.d  a4, a2, -32                    << 
 98         bgeu    a3, a4, .Llt32                 << 
 99         st.d    a1, a3, 0                      << 
100         st.d    a1, a3, 8                      << 
101         st.d    a1, a3, 16                     << 
102         st.d    a1, a3, 24                     << 
103         addi.d  a3, a3, 32                     << 
104                                                << 
105 .Llt32:                                        << 
106         addi.d  a4, a2, -16                    << 
107         bgeu    a3, a4, .Llt16                 << 
108         st.d    a1, a3, 0                      << 
109         st.d    a1, a3, 8                      << 
110         addi.d  a3, a3, 16                     << 
111                                                << 
112 .Llt16:                                        << 
113         addi.d  a4, a2, -8                     << 
114         bgeu    a3, a4, .Llt8                  << 
115         st.d    a1, a3, 0                      << 
116                                                << 
117 .Llt8:                                         << 
118         st.d    a1, a2, -8                     << 
119                                                << 
120         /* return */                           << 
121         jr      ra                             << 
122                                                << 
123         .align  4                              << 
124 .Lsmall:                                       << 
125         pcaddi  t0, 4                          << 
126         slli.d  a2, a2, 4                      << 
127         add.d   t0, t0, a2                     << 
128         jr      t0                             << 
129                                                << 
130         .align  4                              << 
131 0:      jr      ra                             << 
132                                                    11 
133         .align  4                              !!  12 #include <linux/export.h>
134 1:      st.b    a1, a0, 0                      !!  13 #include <asm/ptrace.h>
135         jr      ra                             << 
136                                                << 
137         .align  4                              << 
138 2:      st.h    a1, a0, 0                      << 
139         jr      ra                             << 
140                                                << 
141         .align  4                              << 
142 3:      st.h    a1, a0, 0                      << 
143         st.b    a1, a0, 2                      << 
144         jr      ra                             << 
145                                                << 
146         .align  4                              << 
147 4:      st.w    a1, a0, 0                      << 
148         jr      ra                             << 
149                                                << 
150         .align  4                              << 
151 5:      st.w    a1, a0, 0                      << 
152         st.b    a1, a0, 4                      << 
153         jr      ra                             << 
154                                                << 
155         .align  4                              << 
156 6:      st.w    a1, a0, 0                      << 
157         st.h    a1, a0, 4                      << 
158         jr      ra                             << 
159                                                << 
160         .align  4                              << 
161 7:      st.w    a1, a0, 0                      << 
162         st.w    a1, a0, 3                      << 
163         jr      ra                             << 
164                                                    14 
165         .align  4                              !!  15 /* Work around cpp -rob */
166 8:      st.d    a1, a0, 0                      !!  16 #define ALLOC #alloc
167         jr      ra                             !!  17 #define EXECINSTR #execinstr
168 SYM_FUNC_END(__memset_fast)                    !!  18 #define EX(x,y,a,b)                             \
169 _ASM_NOKPROBE(__memset_fast)                   !!  19 98:     x,y;                                    \
                                                   >>  20         .section .fixup,ALLOC,EXECINSTR;        \
                                                   >>  21         .align  4;                              \
                                                   >>  22 99:     retl;                                   \
                                                   >>  23          a, b, %o0;                             \
                                                   >>  24         .section __ex_table,ALLOC;              \
                                                   >>  25         .align  4;                              \
                                                   >>  26         .word   98b, 99b;                       \
                                                   >>  27         .text;                                  \
                                                   >>  28         .align  4
                                                   >>  29 
                                                   >>  30 #define STORE(source, base, offset, n)          \
                                                   >>  31 98:     std source, [base + offset + n];        \
                                                   >>  32         .section .fixup,ALLOC,EXECINSTR;        \
                                                   >>  33         .align  4;                              \
                                                   >>  34 99:     ba 30f;                                 \
                                                   >>  35          sub %o3, n - offset, %o3;              \
                                                   >>  36         .section __ex_table,ALLOC;              \
                                                   >>  37         .align  4;                              \
                                                   >>  38         .word   98b, 99b;                       \
                                                   >>  39         .text;                                  \
                                                   >>  40         .align  4;
                                                   >>  41 
                                                   >>  42 #define STORE_LAST(source, base, offset, n)     \
                                                   >>  43         EX(std source, [base - offset - n],     \
                                                   >>  44            add %o1, offset + n);
                                                   >>  45 
                                                   >>  46 /* Please don't change these macros, unless you change the logic
                                                   >>  47  * in the .fixup section below as well.
                                                   >>  48  * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */
                                                   >>  49 #define ZERO_BIG_BLOCK(base, offset, source)    \
                                                   >>  50         STORE(source, base, offset, 0x00);      \
                                                   >>  51         STORE(source, base, offset, 0x08);      \
                                                   >>  52         STORE(source, base, offset, 0x10);      \
                                                   >>  53         STORE(source, base, offset, 0x18);      \
                                                   >>  54         STORE(source, base, offset, 0x20);      \
                                                   >>  55         STORE(source, base, offset, 0x28);      \
                                                   >>  56         STORE(source, base, offset, 0x30);      \
                                                   >>  57         STORE(source, base, offset, 0x38);
                                                   >>  58 
                                                   >>  59 #define ZERO_LAST_BLOCKS(base, offset, source)  \
                                                   >>  60         STORE_LAST(source, base, offset, 0x38); \
                                                   >>  61         STORE_LAST(source, base, offset, 0x30); \
                                                   >>  62         STORE_LAST(source, base, offset, 0x28); \
                                                   >>  63         STORE_LAST(source, base, offset, 0x20); \
                                                   >>  64         STORE_LAST(source, base, offset, 0x18); \
                                                   >>  65         STORE_LAST(source, base, offset, 0x10); \
                                                   >>  66         STORE_LAST(source, base, offset, 0x08); \
                                                   >>  67         STORE_LAST(source, base, offset, 0x00);
                                                   >>  68 
                                                   >>  69         .text
                                                   >>  70         .align 4
                                                   >>  71 
                                                   >>  72         .globl  __bzero_begin
                                                   >>  73 __bzero_begin:
                                                   >>  74 
                                                   >>  75         .globl  __bzero
                                                   >>  76         .type   __bzero,#function
                                                   >>  77         .globl  memset
                                                   >>  78         EXPORT_SYMBOL(__bzero)
                                                   >>  79         EXPORT_SYMBOL(memset)
                                                   >>  80 memset:
                                                   >>  81         mov     %o0, %g1
                                                   >>  82         mov     1, %g4
                                                   >>  83         and     %o1, 0xff, %g3
                                                   >>  84         sll     %g3, 8, %g2
                                                   >>  85         or      %g3, %g2, %g3
                                                   >>  86         sll     %g3, 16, %g2
                                                   >>  87         or      %g3, %g2, %g3
                                                   >>  88         b       1f
                                                   >>  89          mov    %o2, %o1
                                                   >>  90 3:
                                                   >>  91         cmp     %o2, 3
                                                   >>  92         be      2f
                                                   >>  93          EX(stb %g3, [%o0], sub %o1, 0)
                                                   >>  94 
                                                   >>  95         cmp     %o2, 2
                                                   >>  96         be      2f
                                                   >>  97          EX(stb %g3, [%o0 + 0x01], sub %o1, 1)
                                                   >>  98 
                                                   >>  99         EX(stb  %g3, [%o0 + 0x02], sub %o1, 2)
                                                   >> 100 2:
                                                   >> 101         sub     %o2, 4, %o2
                                                   >> 102         add     %o1, %o2, %o1
                                                   >> 103         b       4f
                                                   >> 104          sub    %o0, %o2, %o0
                                                   >> 105 
                                                   >> 106 __bzero:
                                                   >> 107         clr     %g4
                                                   >> 108         mov     %g0, %g3
                                                   >> 109 1:
                                                   >> 110         cmp     %o1, 7
                                                   >> 111         bleu    7f
                                                   >> 112          andcc  %o0, 3, %o2
                                                   >> 113 
                                                   >> 114         bne     3b
                                                   >> 115 4:
                                                   >> 116          andcc  %o0, 4, %g0
                                                   >> 117 
                                                   >> 118         be      2f
                                                   >> 119          mov    %g3, %g2
                                                   >> 120 
                                                   >> 121         EX(st   %g3, [%o0], sub %o1, 0)
                                                   >> 122         sub     %o1, 4, %o1
                                                   >> 123         add     %o0, 4, %o0
                                                   >> 124 2:
                                                   >> 125         andcc   %o1, 0xffffff80, %o3    ! Now everything is 8 aligned and o1 is len to run
                                                   >> 126         be      9f
                                                   >> 127          andcc  %o1, 0x78, %o2
                                                   >> 128 10:
                                                   >> 129         ZERO_BIG_BLOCK(%o0, 0x00, %g2)
                                                   >> 130         subcc   %o3, 128, %o3
                                                   >> 131         ZERO_BIG_BLOCK(%o0, 0x40, %g2)
                                                   >> 132         bne     10b
                                                   >> 133          add    %o0, 128, %o0
                                                   >> 134 
                                                   >> 135         orcc    %o2, %g0, %g0
                                                   >> 136 9:
                                                   >> 137         be      13f
                                                   >> 138          andcc  %o1, 7, %o1
                                                   >> 139 
                                                   >> 140         srl     %o2, 1, %o3
                                                   >> 141         set     13f, %o4
                                                   >> 142         sub     %o4, %o3, %o4
                                                   >> 143         jmp     %o4
                                                   >> 144          add    %o0, %o2, %o0
                                                   >> 145 
                                                   >> 146         ZERO_LAST_BLOCKS(%o0, 0x48, %g2)
                                                   >> 147         ZERO_LAST_BLOCKS(%o0, 0x08, %g2)
                                                   >> 148 13:
                                                   >> 149         be      8f
                                                   >> 150          andcc  %o1, 4, %g0
                                                   >> 151 
                                                   >> 152         be      1f
                                                   >> 153          andcc  %o1, 2, %g0
                                                   >> 154 
                                                   >> 155         EX(st   %g3, [%o0], and %o1, 7)
                                                   >> 156         add     %o0, 4, %o0
                                                   >> 157 1:
                                                   >> 158         be      1f
                                                   >> 159          andcc  %o1, 1, %g0
                                                   >> 160 
                                                   >> 161         EX(sth  %g3, [%o0], and %o1, 3)
                                                   >> 162         add     %o0, 2, %o0
                                                   >> 163 1:
                                                   >> 164         bne,a   8f
                                                   >> 165          EX(stb %g3, [%o0], and %o1, 1)
                                                   >> 166 8:
                                                   >> 167         b       0f
                                                   >> 168          nop
                                                   >> 169 7:
                                                   >> 170         be      13b
                                                   >> 171          orcc   %o1, 0, %g0
                                                   >> 172 
                                                   >> 173         be      0f
                                                   >> 174 8:
                                                   >> 175          add    %o0, 1, %o0
                                                   >> 176         subcc   %o1, 1, %o1
                                                   >> 177         bne     8b
                                                   >> 178          EX(stb %g3, [%o0 - 1], add %o1, 1)
                                                   >> 179 0:
                                                   >> 180         andcc   %g4, 1, %g0
                                                   >> 181         be      5f
                                                   >> 182          nop
                                                   >> 183         retl
                                                   >> 184          mov    %g1, %o0
                                                   >> 185 5:
                                                   >> 186         retl
                                                   >> 187          clr    %o0
                                                   >> 188 
                                                   >> 189         .section .fixup,#alloc,#execinstr
                                                   >> 190         .align  4
                                                   >> 191 30:
                                                   >> 192         and     %o1, 0x7f, %o1
                                                   >> 193         retl
                                                   >> 194          add    %o3, %o1, %o0
170                                                   195 
171 STACK_FRAME_NON_STANDARD __memset_fast         !! 196         .globl __bzero_end
                                                   >> 197 __bzero_end:
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php