~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/hexagon/lib/memset.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/hexagon/lib/memset.S (Architecture ppc) and /arch/alpha/lib/memset.S (Architecture alpha)


  1 /* SPDX-License-Identifier: GPL-2.0-only */    !!   1 /* SPDX-License-Identifier: GPL-2.0 */
  2 /*                                                  2 /*
  3  * Copyright (c) 2011, The Linux Foundation. A !!   3  * linux/arch/alpha/lib/memset.S
                                                   >>   4  *
                                                   >>   5  * This is an efficient (and small) implementation of the C library "memset()"
                                                   >>   6  * function for the alpha.
                                                   >>   7  *
                                                   >>   8  *      (C) Copyright 1996 Linus Torvalds
                                                   >>   9  *
                                                   >>  10  * This routine is "moral-ware": you are free to use it any way you wish, and
                                                   >>  11  * the only obligation I put on you is a moral one: if you make any improvements
                                                   >>  12  * to the routine, please send me your improvements for me to use similarly.
                                                   >>  13  *
                                                   >>  14  * The scheduling comments are according to the EV5 documentation (and done by 
                                                   >>  15  * hand, so they might well be incorrect, please do tell me about it..)
  4  */                                                16  */
  5                                                !!  17 #include <linux/export.h>
  6                                                !!  18         .set noat
  7 /* HEXAGON assembly optimized memset */        !!  19         .set noreorder
  8 /* Replaces the standard library function mems !!  20 .text
  9                                                !!  21         .globl memset
 10                                                !!  22         .globl __memset
 11         .macro HEXAGON_OPT_FUNC_BEGIN name     !!  23         .globl ___memset
 12         .text                                  !!  24         .globl __memset16
 13         .p2align 4                             !!  25         .globl __constant_c_memset
 14         .globl \name                           !!  26 
 15         .type  \name, @function                !!  27         .ent ___memset
 16 \name:                                         !!  28 .align 5
 17         .endm                                  !!  29 ___memset:
 18                                                !!  30         .frame $30,0,$26,0
 19         .macro HEXAGON_OPT_FUNC_FINISH name    !!  31         .prologue 0
 20         .size  \name, . - \name                !!  32 
 21         .endm                                  !!  33         and $17,255,$1          /* E1 */
 22                                                !!  34         insbl $17,1,$17         /* .. E0 */
 23 /* FUNCTION: memset (v2 version) */            !!  35         bis $17,$1,$17          /* E0 (p-c latency, next cycle) */
 24 #if __HEXAGON_ARCH__ < 3                       !!  36         sll $17,16,$1           /* E1 (p-c latency, next cycle) */
 25 HEXAGON_OPT_FUNC_BEGIN memset                  !!  37 
 26         {                                      !!  38         bis $17,$1,$17          /* E0 (p-c latency, next cycle) */
 27                 r6 = #8                        !!  39         sll $17,32,$1           /* E1 (p-c latency, next cycle) */
 28                 r7 = extractu(r0, #3 , #0)     !!  40         bis $17,$1,$17          /* E0 (p-c latency, next cycle) */
 29                 p0 = cmp.eq(r2, #0)            !!  41         ldq_u $31,0($30)        /* .. E1 */
 30                 p1 = cmp.gtu(r2, #7)           !!  42 
 31         }                                      !!  43 .align 5
 32         {                                      !!  44 __constant_c_memset:
 33                 r4 = vsplatb(r1)               !!  45         addq $18,$16,$6         /* E0 */
 34                 r8 = r0           /* leave r0  !!  46         bis $16,$16,$0          /* .. E1 */
 35                 r9 = sub(r6, r7)  /* bytes unt !!  47         xor $16,$6,$1           /* E0 */
 36                 if p0 jumpr r31   /* count ==  !!  48         ble $18,end             /* .. E1 */
 37         }                                      !!  49 
 38         {                                      !!  50         bic $1,7,$1             /* E0 */
 39                 r3 = #0                        !!  51         beq $1,within_one_quad  /* .. E1 (note EV5 zero-latency forwarding) */
 40                 r7 = #0                        !!  52         and $16,7,$3            /* E0 */
 41                 p0 = tstbit(r9, #0)            !!  53         beq $3,aligned          /* .. E1 (note EV5 zero-latency forwarding) */
 42                 if p1 jump 2f /* skip byte loo !!  54 
 43         }                                      !!  55         ldq_u $4,0($16)         /* E0 */
 44                                                !!  56         bis $16,$16,$5          /* .. E1 */
 45 /* less than 8 bytes to set, so just set a byt !!  57         insql $17,$16,$2        /* E0 */
 46                                                !!  58         subq $3,8,$3            /* .. E1 */
 47                 loop0(1f, r2) /* byte loop */  !!  59 
 48         .falign                                !!  60         addq $18,$3,$18         /* E0   $18 is new count ($3 is negative) */
 49 1: /* byte loop */                             !!  61         mskql $4,$16,$4         /* .. E1 (and possible load stall) */
 50         {                                      !!  62         subq $16,$3,$16         /* E0   $16 is new aligned destination */
 51                 memb(r8++#1) = r4              !!  63         bis $2,$4,$1            /* .. E1 */
 52         }:endloop0                             !!  64 
 53                 jumpr r31                      !!  65         bis $31,$31,$31         /* E0 */
 54         .falign                                !!  66         ldq_u $31,0($30)        /* .. E1 */
 55 2: /* skip byte loop */                        !!  67         stq_u $1,0($5)          /* E0 */
 56         {                                      !!  68         bis $31,$31,$31         /* .. E1 */
 57                 r6 = #1                        !!  69 
 58                 p0 = tstbit(r9, #1)            !!  70 .align 4
 59                 p1 = cmp.eq(r2, #1)            !!  71 aligned:
 60                 if !p0 jump 3f /* skip initial !!  72         sra $18,3,$3            /* E0 */
 61         }                                      !!  73         and $18,7,$18           /* .. E1 */
 62         {                                      !!  74         bis $16,$16,$5          /* E0 */
 63                 memb(r8++#1) = r4              !!  75         beq $3,no_quad          /* .. E1 */
 64                 r3:2 = sub(r3:2, r7:6)         !!  76 
 65                 if p1 jumpr r31                !!  77 .align 3
 66         }                                      !!  78 loop:
 67         .falign                                !!  79         stq $17,0($5)           /* E0 */
 68 3: /* skip initial byte store */               !!  80         subq $3,1,$3            /* .. E1 */
 69         {                                      !!  81         addq $5,8,$5            /* E0 */
 70                 r6 = #2                        !!  82         bne $3,loop             /* .. E1 */
 71                 p0 = tstbit(r9, #2)            !!  83 
 72                 p1 = cmp.eq(r2, #2)            !!  84 no_quad:
 73                 if !p0 jump 4f /* skip initial !!  85         bis $31,$31,$31         /* E0 */
 74         }                                      !!  86         beq $18,end             /* .. E1 */
 75         {                                      !!  87         ldq $7,0($5)            /* E0 */
 76                 memh(r8++#2) = r4              !!  88         mskqh $7,$6,$2          /* .. E1 (and load stall) */
 77                 r3:2 = sub(r3:2, r7:6)         !!  89 
 78                 if p1 jumpr r31                !!  90         insqh $17,$6,$4         /* E0 */
 79         }                                      !!  91         bis $2,$4,$1            /* .. E1 */
 80         .falign                                !!  92         stq $1,0($5)            /* E0 */
 81 4: /* skip initial half store */               !!  93         ret $31,($26),1         /* .. E1 */
 82         {                                      !!  94 
 83                 r6 = #4                        !!  95 .align 3
 84                 p0 = cmp.gtu(r2, #7)           !!  96 within_one_quad:
 85                 p1 = cmp.eq(r2, #4)            !!  97         ldq_u $1,0($16)         /* E0 */
 86                 if !p0 jump 5f /* skip initial !!  98         insql $17,$16,$2        /* E1 */
 87         }                                      !!  99         mskql $1,$16,$4         /* E0 (after load stall) */
 88         {                                      !! 100         bis $2,$4,$2            /* E0 */
 89                 memw(r8++#4) = r4              !! 101 
 90                 r3:2 = sub(r3:2, r7:6)         !! 102         mskql $2,$6,$4          /* E0 */
 91                 p0 = cmp.gtu(r2, #11)          !! 103         mskqh $1,$6,$2          /* .. E1 */
 92                 if p1 jumpr r31                !! 104         bis $2,$4,$1            /* E0 */
 93         }                                      !! 105         stq_u $1,0($16)         /* E0 */
 94         .falign                                !! 106 
 95 5: /* skip initial word store */               !! 107 end:
 96         {                                      !! 108         ret $31,($26),1         /* E1 */
 97                 r10 = lsr(r2, #3)              !! 109         .end ___memset
 98                 p1 = cmp.eq(r3, #1)            !! 110 EXPORT_SYMBOL(___memset)
 99                 if !p0 jump 7f /* skip double  !! 111 EXPORT_SYMBOL(__constant_c_memset)
100         }                                      !! 112 
101         {                                      !! 113         .align 5
102                 r5 = r4                        !! 114         .ent __memset16
103                 r6 = #8                        !! 115 __memset16:
104                 loop0(6f, r10) /* double loop  !! 116         .prologue 0
105         }                                      !! 117 
106                                                !! 118         inswl $17,0,$1          /* E0 */
107 /* set bytes a double word at a time  */       !! 119         inswl $17,2,$2          /* E0 */
108                                                !! 120         inswl $17,4,$3          /* E0 */
109         .falign                                !! 121         or $1,$2,$1             /* .. E1 */
110 6: /* double loop */                           !! 122         inswl $17,6,$4          /* E0 */
111         {                                      !! 123         or $1,$3,$1             /* .. E1 */
112                 memd(r8++#8) = r5:4            !! 124         or $1,$4,$17            /* E0 */
113                 r3:2 = sub(r3:2, r7:6)         !! 125         br __constant_c_memset  /* .. E1 */
114                 p1 = cmp.eq(r2, #8)            !! 126 
115         }:endloop0                             !! 127         .end __memset16
116         .falign                                !! 128 EXPORT_SYMBOL(__memset16)
117 7: /* skip double loop */                      !! 129 
118         {                                      !! 130 memset = ___memset
119                 p0 = tstbit(r2, #2)            !! 131 __memset = ___memset
120                 if p1 jumpr r31                !! 132         EXPORT_SYMBOL(memset)
121         }                                      !! 133         EXPORT_SYMBOL(__memset)
122         {                                      << 
123                 r6 = #4                        << 
124                 p0 = tstbit(r2, #1)            << 
125                 p1 = cmp.eq(r2, #4)            << 
126                 if !p0 jump 8f /* skip final w << 
127         }                                      << 
128         {                                      << 
129                 memw(r8++#4) = r4              << 
130                 r3:2 = sub(r3:2, r7:6)         << 
131                 if p1 jumpr r31                << 
132         }                                      << 
133         .falign                                << 
134 8: /* skip final word store */                 << 
135         {                                      << 
136                 p1 = cmp.eq(r2, #2)            << 
137                 if !p0 jump 9f /* skip final h << 
138         }                                      << 
139         {                                      << 
140                 memh(r8++#2) = r4              << 
141                 if p1 jumpr r31                << 
142         }                                      << 
143         .falign                                << 
144 9: /* skip final half store */                 << 
145         {                                      << 
146                 memb(r8++#1) = r4              << 
147                 jumpr r31                      << 
148         }                                      << 
149 HEXAGON_OPT_FUNC_FINISH memset                 << 
150 #endif                                         << 
151                                                << 
152                                                << 
153 /*  FUNCTION: memset (v3 and higher version)   << 
154 #if __HEXAGON_ARCH__ >= 3                      << 
155 HEXAGON_OPT_FUNC_BEGIN memset                  << 
156         {                                      << 
157                 r7=vsplatb(r1)                 << 
158                 r6 = r0                        << 
159                 if (r2==#0) jump:nt .L1        << 
160         }                                      << 
161         {                                      << 
162                 r5:4=combine(r7,r7)            << 
163                 p0 = cmp.gtu(r2,#8)            << 
164                 if (p0.new) jump:nt .L3        << 
165         }                                      << 
166         {                                      << 
167                 r3 = r0                        << 
168                 loop0(.L47,r2)                 << 
169         }                                      << 
170         .falign                                << 
171 .L47:                                          << 
172         {                                      << 
173                 memb(r3++#1) = r1              << 
174         }:endloop0 /* start=.L47 */            << 
175                 jumpr r31                      << 
176 .L3:                                           << 
177         {                                      << 
178                 p0 = tstbit(r0,#0)             << 
179                 if (!p0.new) jump:nt .L8       << 
180                 p1 = cmp.eq(r2, #1)            << 
181         }                                      << 
182         {                                      << 
183                 r6 = add(r0, #1)               << 
184                 r2 = add(r2,#-1)               << 
185                 memb(r0) = r1                  << 
186                 if (p1) jump .L1               << 
187         }                                      << 
188 .L8:                                           << 
189         {                                      << 
190                 p0 = tstbit(r6,#1)             << 
191                 if (!p0.new) jump:nt .L10      << 
192         }                                      << 
193         {                                      << 
194                 r2 = add(r2,#-2)               << 
195                 memh(r6++#2) = r7              << 
196                 p0 = cmp.eq(r2, #2)            << 
197                 if (p0.new) jump:nt .L1        << 
198         }                                      << 
199 .L10:                                          << 
200         {                                      << 
201                 p0 = tstbit(r6,#2)             << 
202                 if (!p0.new) jump:nt .L12      << 
203         }                                      << 
204         {                                      << 
205                 r2 = add(r2,#-4)               << 
206                 memw(r6++#4) = r7              << 
207                 p0 = cmp.eq(r2, #4)            << 
208                 if (p0.new) jump:nt .L1        << 
209         }                                      << 
210 .L12:                                          << 
211         {                                      << 
212                 p0 = cmp.gtu(r2,#127)          << 
213                 if (!p0.new) jump:nt .L14      << 
214         }                                      << 
215                 r3 = and(r6,#31)               << 
216                 if (r3==#0) jump:nt .L17       << 
217         {                                      << 
218                 memd(r6++#8) = r5:4            << 
219                 r2 = add(r2,#-8)               << 
220         }                                      << 
221                 r3 = and(r6,#31)               << 
222                 if (r3==#0) jump:nt .L17       << 
223         {                                      << 
224                 memd(r6++#8) = r5:4            << 
225                 r2 = add(r2,#-8)               << 
226         }                                      << 
227                 r3 = and(r6,#31)               << 
228                 if (r3==#0) jump:nt .L17       << 
229         {                                      << 
230                 memd(r6++#8) = r5:4            << 
231                 r2 = add(r2,#-8)               << 
232         }                                      << 
233 .L17:                                          << 
234         {                                      << 
235                 r3 = lsr(r2,#5)                << 
236                 if (r1!=#0) jump:nt .L18       << 
237         }                                      << 
238         {                                      << 
239                 r8 = r3                        << 
240                 r3 = r6                        << 
241                 loop0(.L46,r3)                 << 
242         }                                      << 
243         .falign                                << 
244 .L46:                                          << 
245         {                                      << 
246                 dczeroa(r6)                    << 
247                 r6 = add(r6,#32)               << 
248                 r2 = add(r2,#-32)              << 
249         }:endloop0 /* start=.L46 */            << 
250 .L14:                                          << 
251         {                                      << 
252                 p0 = cmp.gtu(r2,#7)            << 
253                 if (!p0.new) jump:nt .L28      << 
254                 r8 = lsr(r2,#3)                << 
255         }                                      << 
256                 loop0(.L44,r8)                 << 
257         .falign                                << 
258 .L44:                                          << 
259         {                                      << 
260                 memd(r6++#8) = r5:4            << 
261                 r2 = add(r2,#-8)               << 
262         }:endloop0 /* start=.L44 */            << 
263 .L28:                                          << 
264         {                                      << 
265                 p0 = tstbit(r2,#2)             << 
266                 if (!p0.new) jump:nt .L33      << 
267         }                                      << 
268         {                                      << 
269                 r2 = add(r2,#-4)               << 
270                 memw(r6++#4) = r7              << 
271         }                                      << 
272 .L33:                                          << 
273         {                                      << 
274                 p0 = tstbit(r2,#1)             << 
275                 if (!p0.new) jump:nt .L35      << 
276         }                                      << 
277         {                                      << 
278                 r2 = add(r2,#-2)               << 
279                 memh(r6++#2) = r7              << 
280         }                                      << 
281 .L35:                                          << 
282                 p0 = cmp.eq(r2,#1)             << 
283                 if (p0) memb(r6) = r1          << 
284 .L1:                                           << 
285                 jumpr r31                      << 
286 .L18:                                          << 
287                 loop0(.L45,r3)                 << 
288         .falign                                << 
289 .L45:                                          << 
290                 dczeroa(r6)                    << 
291         {                                      << 
292                 memd(r6++#8) = r5:4            << 
293                 r2 = add(r2,#-32)              << 
294         }                                      << 
295                 memd(r6++#8) = r5:4            << 
296                 memd(r6++#8) = r5:4            << 
297         {                                      << 
298                 memd(r6++#8) = r5:4            << 
299         }:endloop0 /* start=.L45  */           << 
300                 jump .L14                      << 
301 HEXAGON_OPT_FUNC_FINISH memset                 << 
302 #endif                                         << 
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php