~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/loongarch/lib/memset.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/loongarch/lib/memset.S (Architecture i386) and /arch/alpha/lib/memset.S (Architecture alpha)


  1 /* SPDX-License-Identifier: GPL-2.0 */              1 /* SPDX-License-Identifier: GPL-2.0 */
  2 /*                                                  2 /*
  3  * Copyright (C) 2020-2022 Loongson Technology !!   3  * linux/arch/alpha/lib/memset.S
  4  */                                            << 
  5                                                << 
  6 #include <linux/export.h>                      << 
  7 #include <asm/alternative-asm.h>               << 
  8 #include <asm/asm.h>                           << 
  9 #include <asm/asmmacro.h>                      << 
 10 #include <asm/cpu.h>                           << 
 11 #include <asm/regdef.h>                        << 
 12 #include <asm/unwind_hints.h>                  << 
 13                                                << 
 14 .macro fill_to_64 r0                           << 
 15         bstrins.d \r0, \r0, 15, 8              << 
 16         bstrins.d \r0, \r0, 31, 16             << 
 17         bstrins.d \r0, \r0, 63, 32             << 
 18 .endm                                          << 
 19                                                << 
 20 .section .noinstr.text, "ax"                   << 
 21                                                << 
 22 SYM_FUNC_START(memset)                         << 
 23         /*                                     << 
 24          * Some CPUs support hardware unaligne << 
 25          */                                    << 
 26         ALTERNATIVE     "b __memset_generic",  << 
 27                         "b __memset_fast", CPU << 
 28 SYM_FUNC_END(memset)                           << 
 29 SYM_FUNC_ALIAS(__memset, memset)               << 
 30                                                << 
 31 EXPORT_SYMBOL(memset)                          << 
 32 EXPORT_SYMBOL(__memset)                        << 
 33                                                << 
 34 _ASM_NOKPROBE(memset)                          << 
 35 _ASM_NOKPROBE(__memset)                        << 
 36                                                << 
 37 /*                                             << 
 38  * void *__memset_generic(void *s, int c, size << 
 39  *                                                  4  *
 40  * a0: s                                       !!   5  * This is an efficient (and small) implementation of the C library "memset()"
 41  * a1: c                                       !!   6  * function for the alpha.
 42  * a2: n                                       !!   7  *
 43  */                                            !!   8  *      (C) Copyright 1996 Linus Torvalds
 44 SYM_FUNC_START(__memset_generic)               << 
 45         move    a3, a0                         << 
 46         beqz    a2, 2f                         << 
 47                                                << 
 48 1:      st.b    a1, a0, 0                      << 
 49         addi.d  a0, a0, 1                      << 
 50         addi.d  a2, a2, -1                     << 
 51         bgt     a2, zero, 1b                   << 
 52                                                << 
 53 2:      move    a0, a3                         << 
 54         jr      ra                             << 
 55 SYM_FUNC_END(__memset_generic)                 << 
 56 _ASM_NOKPROBE(__memset_generic)                << 
 57                                                << 
 58 /*                                             << 
 59  * void *__memset_fast(void *s, int c, size_t  << 
 60  *                                                  9  *
 61  * a0: s                                       !!  10  * This routine is "moral-ware": you are free to use it any way you wish, and
 62  * a1: c                                       !!  11  * the only obligation I put on you is a moral one: if you make any improvements
 63  * a2: n                                       !!  12  * to the routine, please send me your improvements for me to use similarly.
                                                   >>  13  *
                                                   >>  14  * The scheduling comments are according to the EV5 documentation (and done by 
                                                   >>  15  * hand, so they might well be incorrect, please do tell me about it..)
 64  */                                                16  */
 65 SYM_FUNC_START(__memset_fast)                  !!  17 #include <linux/export.h>
 66         /* fill a1 to 64 bits */               !!  18         .set noat
 67         fill_to_64 a1                          !!  19         .set noreorder
 68                                                !!  20 .text
 69         sltui   t0, a2, 9                      !!  21         .globl memset
 70         bnez    t0, .Lsmall                    !!  22         .globl __memset
 71                                                !!  23         .globl ___memset
 72         add.d   a2, a0, a2                     !!  24         .globl __memset16
 73         st.d    a1, a0, 0                      !!  25         .globl __constant_c_memset
 74                                                !!  26 
 75         /* align up address */                 !!  27         .ent ___memset
 76         addi.d  a3, a0, 8                      !!  28 .align 5
 77         bstrins.d       a3, zero, 2, 0         !!  29 ___memset:
 78                                                !!  30         .frame $30,0,$26,0
 79         addi.d  a4, a2, -64                    !!  31         .prologue 0
 80         bgeu    a3, a4, .Llt64                 !!  32 
 81                                                !!  33         and $17,255,$1          /* E1 */
 82         /* set 64 bytes at a time */           !!  34         insbl $17,1,$17         /* .. E0 */
 83 .Lloop64:                                      !!  35         bis $17,$1,$17          /* E0 (p-c latency, next cycle) */
 84         st.d    a1, a3, 0                      !!  36         sll $17,16,$1           /* E1 (p-c latency, next cycle) */
 85         st.d    a1, a3, 8                      !!  37 
 86         st.d    a1, a3, 16                     !!  38         bis $17,$1,$17          /* E0 (p-c latency, next cycle) */
 87         st.d    a1, a3, 24                     !!  39         sll $17,32,$1           /* E1 (p-c latency, next cycle) */
 88         st.d    a1, a3, 32                     !!  40         bis $17,$1,$17          /* E0 (p-c latency, next cycle) */
 89         st.d    a1, a3, 40                     !!  41         ldq_u $31,0($30)        /* .. E1 */
 90         st.d    a1, a3, 48                     !!  42 
 91         st.d    a1, a3, 56                     !!  43 .align 5
 92         addi.d  a3, a3, 64                     !!  44 __constant_c_memset:
 93         bltu    a3, a4, .Lloop64               !!  45         addq $18,$16,$6         /* E0 */
 94                                                !!  46         bis $16,$16,$0          /* .. E1 */
 95         /* set the remaining bytes */          !!  47         xor $16,$6,$1           /* E0 */
 96 .Llt64:                                        !!  48         ble $18,end             /* .. E1 */
 97         addi.d  a4, a2, -32                    !!  49 
 98         bgeu    a3, a4, .Llt32                 !!  50         bic $1,7,$1             /* E0 */
 99         st.d    a1, a3, 0                      !!  51         beq $1,within_one_quad  /* .. E1 (note EV5 zero-latency forwarding) */
100         st.d    a1, a3, 8                      !!  52         and $16,7,$3            /* E0 */
101         st.d    a1, a3, 16                     !!  53         beq $3,aligned          /* .. E1 (note EV5 zero-latency forwarding) */
102         st.d    a1, a3, 24                     !!  54 
103         addi.d  a3, a3, 32                     !!  55         ldq_u $4,0($16)         /* E0 */
104                                                !!  56         bis $16,$16,$5          /* .. E1 */
105 .Llt32:                                        !!  57         insql $17,$16,$2        /* E0 */
106         addi.d  a4, a2, -16                    !!  58         subq $3,8,$3            /* .. E1 */
107         bgeu    a3, a4, .Llt16                 !!  59 
108         st.d    a1, a3, 0                      !!  60         addq $18,$3,$18         /* E0   $18 is new count ($3 is negative) */
109         st.d    a1, a3, 8                      !!  61         mskql $4,$16,$4         /* .. E1 (and possible load stall) */
110         addi.d  a3, a3, 16                     !!  62         subq $16,$3,$16         /* E0   $16 is new aligned destination */
111                                                !!  63         bis $2,$4,$1            /* .. E1 */
112 .Llt16:                                        !!  64 
113         addi.d  a4, a2, -8                     !!  65         bis $31,$31,$31         /* E0 */
114         bgeu    a3, a4, .Llt8                  !!  66         ldq_u $31,0($30)        /* .. E1 */
115         st.d    a1, a3, 0                      !!  67         stq_u $1,0($5)          /* E0 */
116                                                !!  68         bis $31,$31,$31         /* .. E1 */
117 .Llt8:                                         !!  69 
118         st.d    a1, a2, -8                     !!  70 .align 4
119                                                !!  71 aligned:
120         /* return */                           !!  72         sra $18,3,$3            /* E0 */
121         jr      ra                             !!  73         and $18,7,$18           /* .. E1 */
122                                                !!  74         bis $16,$16,$5          /* E0 */
123         .align  4                              !!  75         beq $3,no_quad          /* .. E1 */
124 .Lsmall:                                       !!  76 
125         pcaddi  t0, 4                          !!  77 .align 3
126         slli.d  a2, a2, 4                      !!  78 loop:
127         add.d   t0, t0, a2                     !!  79         stq $17,0($5)           /* E0 */
128         jr      t0                             !!  80         subq $3,1,$3            /* .. E1 */
129                                                !!  81         addq $5,8,$5            /* E0 */
130         .align  4                              !!  82         bne $3,loop             /* .. E1 */
131 0:      jr      ra                             !!  83 
132                                                !!  84 no_quad:
133         .align  4                              !!  85         bis $31,$31,$31         /* E0 */
134 1:      st.b    a1, a0, 0                      !!  86         beq $18,end             /* .. E1 */
135         jr      ra                             !!  87         ldq $7,0($5)            /* E0 */
136                                                !!  88         mskqh $7,$6,$2          /* .. E1 (and load stall) */
137         .align  4                              !!  89 
138 2:      st.h    a1, a0, 0                      !!  90         insqh $17,$6,$4         /* E0 */
139         jr      ra                             !!  91         bis $2,$4,$1            /* .. E1 */
140                                                !!  92         stq $1,0($5)            /* E0 */
141         .align  4                              !!  93         ret $31,($26),1         /* .. E1 */
142 3:      st.h    a1, a0, 0                      !!  94 
143         st.b    a1, a0, 2                      !!  95 .align 3
144         jr      ra                             !!  96 within_one_quad:
145                                                !!  97         ldq_u $1,0($16)         /* E0 */
146         .align  4                              !!  98         insql $17,$16,$2        /* E1 */
147 4:      st.w    a1, a0, 0                      !!  99         mskql $1,$16,$4         /* E0 (after load stall) */
148         jr      ra                             !! 100         bis $2,$4,$2            /* E0 */
149                                                !! 101 
150         .align  4                              !! 102         mskql $2,$6,$4          /* E0 */
151 5:      st.w    a1, a0, 0                      !! 103         mskqh $1,$6,$2          /* .. E1 */
152         st.b    a1, a0, 4                      !! 104         bis $2,$4,$1            /* E0 */
153         jr      ra                             !! 105         stq_u $1,0($16)         /* E0 */
154                                                !! 106 
155         .align  4                              !! 107 end:
156 6:      st.w    a1, a0, 0                      !! 108         ret $31,($26),1         /* E1 */
157         st.h    a1, a0, 4                      !! 109         .end ___memset
158         jr      ra                             !! 110 EXPORT_SYMBOL(___memset)
159                                                !! 111 EXPORT_SYMBOL(__constant_c_memset)
160         .align  4                              !! 112 
161 7:      st.w    a1, a0, 0                      !! 113         .align 5
162         st.w    a1, a0, 3                      !! 114         .ent __memset16
163         jr      ra                             !! 115 __memset16:
164                                                !! 116         .prologue 0
165         .align  4                              !! 117 
166 8:      st.d    a1, a0, 0                      !! 118         inswl $17,0,$1          /* E0 */
167         jr      ra                             !! 119         inswl $17,2,$2          /* E0 */
168 SYM_FUNC_END(__memset_fast)                    !! 120         inswl $17,4,$3          /* E0 */
169 _ASM_NOKPROBE(__memset_fast)                   !! 121         or $1,$2,$1             /* .. E1 */
170                                                !! 122         inswl $17,6,$4          /* E0 */
171 STACK_FRAME_NON_STANDARD __memset_fast         !! 123         or $1,$3,$1             /* .. E1 */
                                                   >> 124         or $1,$4,$17            /* E0 */
                                                   >> 125         br __constant_c_memset  /* .. E1 */
                                                   >> 126 
                                                   >> 127         .end __memset16
                                                   >> 128 EXPORT_SYMBOL(__memset16)
                                                   >> 129 
                                                   >> 130 memset = ___memset
                                                   >> 131 __memset = ___memset
                                                   >> 132         EXPORT_SYMBOL(memset)
                                                   >> 133         EXPORT_SYMBOL(__memset)
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php