~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/sparc/lib/M7memset.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/sparc/lib/M7memset.S (Architecture i386) and /arch/sparc/lib/M7memset.S (Architecture sparc)


  1 /*                                                  1 /*
  2  * M7memset.S: SPARC M7 optimized memset.           2  * M7memset.S: SPARC M7 optimized memset.
  3  *                                                  3  *
  4  * Copyright (c) 2016, Oracle and/or its affil      4  * Copyright (c) 2016, Oracle and/or its affiliates.  All rights reserved.
  5  */                                                 5  */
  6                                                     6 
  7 /*                                                  7 /*
  8  * M7memset.S: M7 optimized memset.                 8  * M7memset.S: M7 optimized memset.
  9  *                                                  9  *
 10  * char *memset(sp, c, n)                          10  * char *memset(sp, c, n)
 11  *                                                 11  *
 12  * Set an array of n chars starting at sp to t     12  * Set an array of n chars starting at sp to the character c.
 13  * Return sp.                                      13  * Return sp.
 14  *                                                 14  *
 15  * Fast assembler language version of the foll     15  * Fast assembler language version of the following C-program for memset
 16  * which represents the `standard' for the C-l     16  * which represents the `standard' for the C-library.
 17  *                                                 17  *
 18  *      void *                                     18  *      void *
 19  *      memset(void *sp1, int c, size_t n)         19  *      memset(void *sp1, int c, size_t n)
 20  *      {                                          20  *      {
 21  *          if (n != 0) {                          21  *          if (n != 0) {
 22  *              char *sp = sp1;                    22  *              char *sp = sp1;
 23  *              do {                               23  *              do {
 24  *                  *sp++ = (char)c;               24  *                  *sp++ = (char)c;
 25  *              } while (--n != 0);                25  *              } while (--n != 0);
 26  *          }                                      26  *          }
 27  *          return (sp1);                          27  *          return (sp1);
 28  *      }                                          28  *      }
 29  *                                                 29  *
 30  * The algorithm is as follows :                   30  * The algorithm is as follows :
 31  *                                                 31  *
 32  *      For small 6 or fewer bytes stores, byt     32  *      For small 6 or fewer bytes stores, bytes will be stored.
 33  *                                                 33  *
 34  *      For less than 32 bytes stores, align t     34  *      For less than 32 bytes stores, align the address on 4 byte boundary.
 35  *      Then store as many 4-byte chunks, foll     35  *      Then store as many 4-byte chunks, followed by trailing bytes.
 36  *                                                 36  *
 37  *      For sizes greater than 32 bytes, align     37  *      For sizes greater than 32 bytes, align the address on 8 byte boundary.
 38  *      if (count >= 64) {                         38  *      if (count >= 64) {
 39  *              store 8-bytes chunks to align      39  *              store 8-bytes chunks to align the address on 64 byte boundary
 40  *              if (value to be set is zero &&     40  *              if (value to be set is zero && count >= MIN_ZERO) {
 41  *                      Using BIS stores, set      41  *                      Using BIS stores, set the first long word of each
 42  *                      64-byte cache line to      42  *                      64-byte cache line to zero which will also clear the
 43  *                      other seven long words     43  *                      other seven long words of the cache line.
 44  *              }                                  44  *              }
 45  *              else if (count >= MIN_LOOP) {      45  *              else if (count >= MIN_LOOP) {
 46  *                      Using BIS stores, set      46  *                      Using BIS stores, set the first long word of each of
 47  *                      ST_CHUNK cache lines (     47  *                      ST_CHUNK cache lines (64 bytes each) before the main
 48  *                      loop is entered.           48  *                      loop is entered.
 49  *                      In the main loop, cont     49  *                      In the main loop, continue pre-setting the first long
 50  *                      word of each cache lin     50  *                      word of each cache line ST_CHUNK lines in advance while
 51  *                      setting the other seve     51  *                      setting the other seven long words (56 bytes) of each
 52  *                      cache line until fewer     52  *                      cache line until fewer than ST_CHUNK*64 bytes remain.
 53  *                      Then set the remaining     53  *                      Then set the remaining seven long words of each cache
 54  *                      line that has already      54  *                      line that has already had its first long word set.
 55  *              }                                  55  *              }
 56  *              store remaining data in 64-byt     56  *              store remaining data in 64-byte chunks until less than
 57  *              64 bytes remain.                   57  *              64 bytes remain.
 58  *       }                                         58  *       }
 59  *       Store as many 8-byte chunks, followed     59  *       Store as many 8-byte chunks, followed by trailing bytes.
 60  *                                                 60  *
 61  * BIS = Block Init Store                          61  * BIS = Block Init Store
 62  *   Doing the advance store of the first elem     62  *   Doing the advance store of the first element of the cache line
 63  *   initiates the displacement of a cache lin     63  *   initiates the displacement of a cache line while only using a single
 64  *   instruction in the pipeline. That avoids      64  *   instruction in the pipeline. That avoids various pipeline delays,
 65  *   such as filling the miss buffer. The perf     65  *   such as filling the miss buffer. The performance effect is
 66  *   similar to prefetching for normal stores.     66  *   similar to prefetching for normal stores.
 67  *   The special case for zero fills runs fast     67  *   The special case for zero fills runs faster and uses fewer instruction
 68  *   cycles than the normal memset loop.           68  *   cycles than the normal memset loop.
 69  *                                                 69  *
 70  * We only use BIS for memset of greater than      70  * We only use BIS for memset of greater than MIN_LOOP bytes because a sequence
 71  * BIS stores must be followed by a membar #St     71  * BIS stores must be followed by a membar #StoreStore. The benefit of
 72  * the BIS store must be balanced against the      72  * the BIS store must be balanced against the cost of the membar operation.
 73  */                                                73  */
 74                                                    74 
 75 /*                                                 75 /*
 76  * ASI_STBI_P marks the cache line as "least r     76  * ASI_STBI_P marks the cache line as "least recently used"
 77  * which means if many threads are active, it      77  * which means if many threads are active, it has a high chance
 78  * of being pushed out of the cache between th     78  * of being pushed out of the cache between the first initializing
 79  * store and the final stores.                     79  * store and the final stores.
 80  * Thus, we use ASI_STBIMRU_P which marks the      80  * Thus, we use ASI_STBIMRU_P which marks the cache line as
 81  * "most recently used" for all but the last s     81  * "most recently used" for all but the last store to the cache line.
 82  */                                                82  */
 83                                                    83 
 84 #include <asm/asi.h>                               84 #include <asm/asi.h>
 85 #include <asm/page.h>                              85 #include <asm/page.h>
 86                                                    86 
 87 #define ASI_STBI_P      ASI_BLK_INIT_QUAD_LDD_     87 #define ASI_STBI_P      ASI_BLK_INIT_QUAD_LDD_P
 88 #define ASI_STBIMRU_P   ASI_ST_BLKINIT_MRU_P       88 #define ASI_STBIMRU_P   ASI_ST_BLKINIT_MRU_P
 89                                                    89 
 90                                                    90 
 91 #define ST_CHUNK        24   /* multiple of 4      91 #define ST_CHUNK        24   /* multiple of 4 due to loop unrolling */
 92 #define MIN_LOOP        16320                      92 #define MIN_LOOP        16320
 93 #define MIN_ZERO        512                        93 #define MIN_ZERO        512
 94                                                    94 
 95         .section        ".text"                    95         .section        ".text"
 96         .align          32                         96         .align          32
 97                                                    97 
 98 /*                                                 98 /*
 99  * Define clear_page(dest) as memset(dest, 0,      99  * Define clear_page(dest) as memset(dest, 0, PAGE_SIZE)
100  * (can create a more optimized version later.    100  * (can create a more optimized version later.)
101  */                                               101  */
102         .globl          M7clear_page              102         .globl          M7clear_page
103         .globl          M7clear_user_page         103         .globl          M7clear_user_page
104 M7clear_page:           /* clear_page(dest) */    104 M7clear_page:           /* clear_page(dest) */
105 M7clear_user_page:                                105 M7clear_user_page:
106         set     PAGE_SIZE, %o1                    106         set     PAGE_SIZE, %o1
107         /* fall through into bzero code */        107         /* fall through into bzero code */
108                                                   108 
109         .size           M7clear_page,.-M7clear    109         .size           M7clear_page,.-M7clear_page
110         .size           M7clear_user_page,.-M7    110         .size           M7clear_user_page,.-M7clear_user_page
111                                                   111 
112 /*                                                112 /*
113  * Define bzero(dest, n) as memset(dest, 0, n)    113  * Define bzero(dest, n) as memset(dest, 0, n)
114  * (can create a more optimized version later.    114  * (can create a more optimized version later.)
115  */                                               115  */
116         .globl          M7bzero                   116         .globl          M7bzero
117 M7bzero:                /* bzero(dest, size) *    117 M7bzero:                /* bzero(dest, size) */
118         mov     %o1, %o2                          118         mov     %o1, %o2
119         mov     0, %o1                            119         mov     0, %o1
120         /* fall through into memset code */       120         /* fall through into memset code */
121                                                   121 
122         .size           M7bzero,.-M7bzero         122         .size           M7bzero,.-M7bzero
123                                                   123 
124         .global         M7memset                  124         .global         M7memset
125         .type           M7memset, #function       125         .type           M7memset, #function
126         .register       %g3, #scratch             126         .register       %g3, #scratch
127 M7memset:                                         127 M7memset:
128         mov     %o0, %o5                ! copy    128         mov     %o0, %o5                ! copy sp1 before using it
129         cmp     %o2, 7                  ! if s    129         cmp     %o2, 7                  ! if small counts, just write bytes
130         bleu,pn %xcc, .wrchar                     130         bleu,pn %xcc, .wrchar
131          and     %o1, 0xff, %o1          ! o1     131          and     %o1, 0xff, %o1          ! o1 is (char)c
132                                                   132 
133         sll     %o1, 8, %o3                       133         sll     %o1, 8, %o3
134         or      %o1, %o3, %o1           ! now     134         or      %o1, %o3, %o1           ! now o1 has 2 bytes of c
135         sll     %o1, 16, %o3                      135         sll     %o1, 16, %o3
136         cmp     %o2, 32                           136         cmp     %o2, 32
137         blu,pn  %xcc, .wdalign                    137         blu,pn  %xcc, .wdalign
138          or      %o1, %o3, %o1           ! now    138          or      %o1, %o3, %o1           ! now o1 has 4 bytes of c
139                                                   139 
140         sllx    %o1, 32, %o3                      140         sllx    %o1, 32, %o3
141         or      %o1, %o3, %o1           ! now     141         or      %o1, %o3, %o1           ! now o1 has 8 bytes of c
142                                                   142 
143 .dbalign:                                         143 .dbalign:
144         andcc   %o5, 7, %o3             ! is s    144         andcc   %o5, 7, %o3             ! is sp1 aligned on a 8 byte bound?
145         bz,pt   %xcc, .blkalign         ! alre    145         bz,pt   %xcc, .blkalign         ! already long word aligned
146          sub     %o3, 8, %o3             ! -(b    146          sub     %o3, 8, %o3             ! -(bytes till long word aligned)
147                                                   147 
148         add     %o2, %o3, %o2           ! upda    148         add     %o2, %o3, %o2           ! update o2 with new count
149         ! Set -(%o3) bytes till sp1 long word     149         ! Set -(%o3) bytes till sp1 long word aligned
150 1:      stb     %o1, [%o5]              ! ther    150 1:      stb     %o1, [%o5]              ! there is at least 1 byte to set
151         inccc   %o3                     ! byte    151         inccc   %o3                     ! byte clearing loop
152         bl,pt   %xcc, 1b                          152         bl,pt   %xcc, 1b
153          inc     %o5                              153          inc     %o5
154                                                   154 
155         ! Now sp1 is long word aligned (sp1 is    155         ! Now sp1 is long word aligned (sp1 is found in %o5)
156 .blkalign:                                        156 .blkalign:
157         cmp     %o2, 64                 ! chec    157         cmp     %o2, 64                 ! check if there are 64 bytes to set
158         blu,pn  %xcc, .wrshort                    158         blu,pn  %xcc, .wrshort
159          mov     %o2, %o3                         159          mov     %o2, %o3
160                                                   160 
161         andcc   %o5, 63, %o3            ! is s    161         andcc   %o5, 63, %o3            ! is sp1 block aligned?
162         bz,pt   %xcc, .blkwr            ! now     162         bz,pt   %xcc, .blkwr            ! now block aligned
163          sub     %o3, 64, %o3            ! o3     163          sub     %o3, 64, %o3            ! o3 is -(bytes till block aligned)
164         add     %o2, %o3, %o2           ! o2 i    164         add     %o2, %o3, %o2           ! o2 is the remainder
165                                                   165 
166         ! Store -(%o3) bytes till dst is block    166         ! Store -(%o3) bytes till dst is block (64 byte) aligned.
167         ! Use long word stores.                   167         ! Use long word stores.
168         ! Recall that dst is already long word    168         ! Recall that dst is already long word aligned
169 1:                                                169 1:
170         addcc   %o3, 8, %o3                       170         addcc   %o3, 8, %o3
171         stx     %o1, [%o5]                        171         stx     %o1, [%o5]
172         bl,pt   %xcc, 1b                          172         bl,pt   %xcc, 1b
173          add     %o5, 8, %o5                      173          add     %o5, 8, %o5
174                                                   174 
175         ! Now sp1 is block aligned                175         ! Now sp1 is block aligned
176 .blkwr:                                           176 .blkwr:
177         andn    %o2, 63, %o4            ! calc    177         andn    %o2, 63, %o4            ! calculate size of blocks in bytes
178         brz,pn  %o1, .wrzero            ! spec    178         brz,pn  %o1, .wrzero            ! special case if c == 0
179          and     %o2, 63, %o3            ! %o3    179          and     %o2, 63, %o3            ! %o3 = bytes left after blk stores.
180                                                   180 
181         set     MIN_LOOP, %g1                     181         set     MIN_LOOP, %g1
182         cmp     %o4, %g1                ! chec    182         cmp     %o4, %g1                ! check there are enough bytes to set
183         blu,pn  %xcc, .short_set        ! to j    183         blu,pn  %xcc, .short_set        ! to justify cost of membar
184                                         ! must    184                                         ! must be > pre-cleared lines
185          nop                                      185          nop
186                                                   186 
187         ! initial cache-clearing stores           187         ! initial cache-clearing stores
188         ! get store pipeline moving               188         ! get store pipeline moving
189         rd      %asi, %g3               ! save    189         rd      %asi, %g3               ! save %asi to be restored later
190         wr     %g0, ASI_STBIMRU_P, %asi           190         wr     %g0, ASI_STBIMRU_P, %asi
191                                                   191 
192         ! Primary memset loop for large memset    192         ! Primary memset loop for large memsets
193 .wr_loop:                                         193 .wr_loop:
194         sub     %o5, 8, %o5             ! adju    194         sub     %o5, 8, %o5             ! adjust %o5 for ASI store alignment
195         mov     ST_CHUNK, %g1                     195         mov     ST_CHUNK, %g1
196 .wr_loop_start:                                   196 .wr_loop_start:
197         stxa    %o1, [%o5+8]%asi                  197         stxa    %o1, [%o5+8]%asi
198         subcc   %g1, 4, %g1                       198         subcc   %g1, 4, %g1
199         stxa    %o1, [%o5+8+64]%asi               199         stxa    %o1, [%o5+8+64]%asi
200         add     %o5, 256, %o5                     200         add     %o5, 256, %o5
201         stxa    %o1, [%o5+8-128]%asi              201         stxa    %o1, [%o5+8-128]%asi
202         bgu     %xcc, .wr_loop_start              202         bgu     %xcc, .wr_loop_start
203          stxa    %o1, [%o5+8-64]%asi              203          stxa    %o1, [%o5+8-64]%asi
204                                                   204 
205         sub     %o5, ST_CHUNK*64, %o5   ! rese    205         sub     %o5, ST_CHUNK*64, %o5   ! reset %o5
206         mov     ST_CHUNK, %g1                     206         mov     ST_CHUNK, %g1
207                                                   207 
208 .wr_loop_rest:                                    208 .wr_loop_rest:
209         stxa    %o1, [%o5+8+8]%asi                209         stxa    %o1, [%o5+8+8]%asi
210         sub     %o4, 64, %o4                      210         sub     %o4, 64, %o4
211         stxa    %o1, [%o5+16+8]%asi               211         stxa    %o1, [%o5+16+8]%asi
212         subcc   %g1, 1, %g1                       212         subcc   %g1, 1, %g1
213         stxa    %o1, [%o5+24+8]%asi               213         stxa    %o1, [%o5+24+8]%asi
214         stxa    %o1, [%o5+32+8]%asi               214         stxa    %o1, [%o5+32+8]%asi
215         stxa    %o1, [%o5+40+8]%asi               215         stxa    %o1, [%o5+40+8]%asi
216         add     %o5, 64, %o5                      216         add     %o5, 64, %o5
217         stxa    %o1, [%o5-8]%asi                  217         stxa    %o1, [%o5-8]%asi
218         bgu     %xcc, .wr_loop_rest               218         bgu     %xcc, .wr_loop_rest
219          stxa    %o1, [%o5]ASI_STBI_P             219          stxa    %o1, [%o5]ASI_STBI_P
220                                                   220 
221         ! If more than ST_CHUNK*64 bytes remai    221         ! If more than ST_CHUNK*64 bytes remain to set, continue
222         ! setting the first long word of each     222         ! setting the first long word of each cache line in advance
223         ! to keep the store pipeline moving.      223         ! to keep the store pipeline moving.
224                                                   224 
225         cmp     %o4, ST_CHUNK*64                  225         cmp     %o4, ST_CHUNK*64
226         bge,pt  %xcc, .wr_loop_start              226         bge,pt  %xcc, .wr_loop_start
227          mov     ST_CHUNK, %g1                    227          mov     ST_CHUNK, %g1
228                                                   228 
229         brz,a,pn %o4, .asi_done                   229         brz,a,pn %o4, .asi_done
230          add     %o5, 8, %o5             ! res    230          add     %o5, 8, %o5             ! restore %o5 offset
231                                                   231 
232 .wr_loop_small:                                   232 .wr_loop_small:
233         stxa    %o1, [%o5+8]%asi                  233         stxa    %o1, [%o5+8]%asi
234         stxa    %o1, [%o5+8+8]%asi                234         stxa    %o1, [%o5+8+8]%asi
235         stxa    %o1, [%o5+16+8]%asi               235         stxa    %o1, [%o5+16+8]%asi
236         stxa    %o1, [%o5+24+8]%asi               236         stxa    %o1, [%o5+24+8]%asi
237         stxa    %o1, [%o5+32+8]%asi               237         stxa    %o1, [%o5+32+8]%asi
238         subcc   %o4, 64, %o4                      238         subcc   %o4, 64, %o4
239         stxa    %o1, [%o5+40+8]%asi               239         stxa    %o1, [%o5+40+8]%asi
240         add     %o5, 64, %o5                      240         add     %o5, 64, %o5
241         stxa    %o1, [%o5-8]%asi                  241         stxa    %o1, [%o5-8]%asi
242         bgu,pt  %xcc, .wr_loop_small              242         bgu,pt  %xcc, .wr_loop_small
243          stxa    %o1, [%o5]ASI_STBI_P             243          stxa    %o1, [%o5]ASI_STBI_P
244                                                   244 
245         ba      .asi_done                         245         ba      .asi_done
246          add     %o5, 8, %o5             ! res    246          add     %o5, 8, %o5             ! restore %o5 offset
247                                                   247 
248         ! Special case loop for zero fill mems    248         ! Special case loop for zero fill memsets
249         ! For each 64 byte cache line, single     249         ! For each 64 byte cache line, single STBI to first element
250         ! clears line                             250         ! clears line
251 .wrzero:                                          251 .wrzero:
252         cmp     %o4, MIN_ZERO           ! chec    252         cmp     %o4, MIN_ZERO           ! check if enough bytes to set
253                                         ! to p    253                                         ! to pay %asi + membar cost
254         blu     %xcc, .short_set                  254         blu     %xcc, .short_set
255          nop                                      255          nop
256         sub     %o4, 256, %o4                     256         sub     %o4, 256, %o4
257                                                   257 
258 .wrzero_loop:                                     258 .wrzero_loop:
259         mov     64, %g3                           259         mov     64, %g3
260         stxa    %o1, [%o5]ASI_STBI_P              260         stxa    %o1, [%o5]ASI_STBI_P
261         subcc   %o4, 256, %o4                     261         subcc   %o4, 256, %o4
262         stxa    %o1, [%o5+%g3]ASI_STBI_P          262         stxa    %o1, [%o5+%g3]ASI_STBI_P
263         add     %o5, 256, %o5                     263         add     %o5, 256, %o5
264         sub     %g3, 192, %g3                     264         sub     %g3, 192, %g3
265         stxa    %o1, [%o5+%g3]ASI_STBI_P          265         stxa    %o1, [%o5+%g3]ASI_STBI_P
266         add %g3, 64, %g3                          266         add %g3, 64, %g3
267         bge,pt  %xcc, .wrzero_loop                267         bge,pt  %xcc, .wrzero_loop
268          stxa    %o1, [%o5+%g3]ASI_STBI_P         268          stxa    %o1, [%o5+%g3]ASI_STBI_P
269         add     %o4, 256, %o4                     269         add     %o4, 256, %o4
270                                                   270 
271         brz,pn  %o4, .bsi_done                    271         brz,pn  %o4, .bsi_done
272          nop                                      272          nop
273                                                   273 
274 .wrzero_small:                                    274 .wrzero_small:
275         stxa    %o1, [%o5]ASI_STBI_P              275         stxa    %o1, [%o5]ASI_STBI_P
276         subcc   %o4, 64, %o4                      276         subcc   %o4, 64, %o4
277         bgu,pt  %xcc, .wrzero_small               277         bgu,pt  %xcc, .wrzero_small
278          add     %o5, 64, %o5                     278          add     %o5, 64, %o5
279         ba,a    .bsi_done                         279         ba,a    .bsi_done
280                                                   280 
281 .asi_done:                                        281 .asi_done:
282         wr      %g3, 0x0, %asi          ! rest    282         wr      %g3, 0x0, %asi          ! restored saved %asi
283 .bsi_done:                                        283 .bsi_done:
284         membar  #StoreStore             ! requ    284         membar  #StoreStore             ! required by use of Block Store Init
285                                                   285 
286 .short_set:                                       286 .short_set:
287         cmp     %o4, 64                 ! chec    287         cmp     %o4, 64                 ! check if 64 bytes to set
288         blu     %xcc, 5f                          288         blu     %xcc, 5f
289          nop                                      289          nop
290 4:                                      ! set     290 4:                                      ! set final blocks of 64 bytes
291         stx     %o1, [%o5]                        291         stx     %o1, [%o5]
292         stx     %o1, [%o5+8]                      292         stx     %o1, [%o5+8]
293         stx     %o1, [%o5+16]                     293         stx     %o1, [%o5+16]
294         stx     %o1, [%o5+24]                     294         stx     %o1, [%o5+24]
295         subcc   %o4, 64, %o4                      295         subcc   %o4, 64, %o4
296         stx     %o1, [%o5+32]                     296         stx     %o1, [%o5+32]
297         stx     %o1, [%o5+40]                     297         stx     %o1, [%o5+40]
298         add     %o5, 64, %o5                      298         add     %o5, 64, %o5
299         stx     %o1, [%o5-16]                     299         stx     %o1, [%o5-16]
300         bgu,pt  %xcc, 4b                          300         bgu,pt  %xcc, 4b
301          stx     %o1, [%o5-8]                     301          stx     %o1, [%o5-8]
302                                                   302 
303 5:                                                303 5:
304         ! Set the remaining long words            304         ! Set the remaining long words
305 .wrshort:                                         305 .wrshort:
306         subcc   %o3, 8, %o3             ! Can     306         subcc   %o3, 8, %o3             ! Can we store any long words?
307         blu,pn  %xcc, .wrchars                    307         blu,pn  %xcc, .wrchars
308          and     %o2, 7, %o2             ! cal    308          and     %o2, 7, %o2             ! calc bytes left after long words
309 6:                                                309 6:
310         subcc   %o3, 8, %o3                       310         subcc   %o3, 8, %o3
311         stx     %o1, [%o5]              ! stor    311         stx     %o1, [%o5]              ! store the long words
312         bgeu,pt %xcc, 6b                          312         bgeu,pt %xcc, 6b
313          add     %o5, 8, %o5                      313          add     %o5, 8, %o5
314                                                   314 
315 .wrchars:                               ! chec    315 .wrchars:                               ! check for extra chars
316         brnz    %o2, .wrfin                       316         brnz    %o2, .wrfin
317          nop                                      317          nop
318         retl                                      318         retl
319          nop                                      319          nop
320                                                   320 
321 .wdalign:                                         321 .wdalign:
322         andcc   %o5, 3, %o3             ! is s    322         andcc   %o5, 3, %o3             ! is sp1 aligned on a word boundary
323         bz,pn   %xcc, .wrword                     323         bz,pn   %xcc, .wrword
324          andn    %o2, 3, %o3             ! cre    324          andn    %o2, 3, %o3             ! create word sized count in %o3
325                                                   325 
326         dec     %o2                     ! decr    326         dec     %o2                     ! decrement count
327         stb     %o1, [%o5]              ! clea    327         stb     %o1, [%o5]              ! clear a byte
328         b       .wdalign                          328         b       .wdalign
329          inc     %o5                     ! nex    329          inc     %o5                     ! next byte
330                                                   330 
331 .wrword:                                          331 .wrword:
332         subcc   %o3, 4, %o3                       332         subcc   %o3, 4, %o3
333         st      %o1, [%o5]              ! 4-by    333         st      %o1, [%o5]              ! 4-byte writing loop
334         bnz,pt  %xcc, .wrword                     334         bnz,pt  %xcc, .wrword
335          add     %o5, 4, %o5                      335          add     %o5, 4, %o5
336                                                   336 
337         and     %o2, 3, %o2             ! left    337         and     %o2, 3, %o2             ! leftover count, if any
338                                                   338 
339 .wrchar:                                          339 .wrchar:
340         ! Set the remaining bytes, if any         340         ! Set the remaining bytes, if any
341         brz     %o2, .exit                        341         brz     %o2, .exit
342          nop                                      342          nop
343 .wrfin:                                           343 .wrfin:
344         deccc   %o2                               344         deccc   %o2
345         stb     %o1, [%o5]                        345         stb     %o1, [%o5]
346         bgu,pt  %xcc, .wrfin                      346         bgu,pt  %xcc, .wrfin
347          inc     %o5                              347          inc     %o5
348 .exit:                                            348 .exit:
349         retl                            ! %o0     349         retl                            ! %o0 was preserved
350          nop                                      350          nop
351                                                   351 
352         .size           M7memset,.-M7memset       352         .size           M7memset,.-M7memset
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php