~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/alpha/lib/ev6-stxncpy.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/alpha/lib/ev6-stxncpy.S (Version linux-6.12-rc7) and /arch/i386/lib/ev6-stxncpy.S (Version linux-4.16.18)


  1 /* SPDX-License-Identifier: GPL-2.0 */            
  2 /*                                                
  3  * arch/alpha/lib/ev6-stxncpy.S                   
  4  * 21264 version contributed by Rick Gorton <ri    
  5  *                                                
  6  * Copy no more than COUNT bytes of the null-t    
  7  * SRC to DST.                                    
  8  *                                                
  9  * This is an internal routine used by strncpy    
 10  * As such, it uses special linkage convention    
 11  * of these public functions more efficient.      
 12  *                                                
 13  * On input:                                      
 14  *      t9 = return address                       
 15  *      a0 = DST                                  
 16  *      a1 = SRC                                  
 17  *      a2 = COUNT                                
 18  *                                                
 19  * Furthermore, COUNT may not be zero.            
 20  *                                                
 21  * On output:                                     
 22  *      t0  = last word written                   
 23  *      t10 = bitmask (with one bit set) indic    
 24  *            the end of the range specified b    
 25  *      t12 = bitmask (with one bit set) indic    
 26  *      a0  = unaligned address of the last *w    
 27  *      a2  = the number of full words left in    
 28  *                                                
 29  * Furthermore, v0, a3-a5, t11, and $at are un    
 30  *                                                
 31  * Much of the information about 21264 schedul    
 32  *      Compiler Writer's Guide for the Alpha     
 33  *      abbreviated as 'CWG' in other comments    
 34  *      ftp.digital.com/pub/Digital/info/semic    
 35  * Scheduling notation:                           
 36  *      E       - either cluster                  
 37  *      U       - upper subcluster; U0 - subcl    
 38  *      L       - lower subcluster; L0 - subcl    
 39  * Try not to change the actual algorithm if p    
 40  */                                               
 41                                                   
 42 #include <asm/regdef.h>                           
 43                                                   
 44         .set noat                                 
 45         .set noreorder                            
 46                                                   
 47         .text                                     
 48                                                   
 49 /* There is a problem with either gdb (as of 4    
 50    doesn't like putting the entry point for a     
 51    middle of the procedure descriptor.  Work a    
 52    aligned copy in its own procedure descripto    
 53                                                   
 54                                                   
 55         .ent stxncpy_aligned                      
 56         .align 4                                  
 57 stxncpy_aligned:                                  
 58         .frame sp, 0, t9, 0                       
 59         .prologue 0                               
 60                                                   
 61         /* On entry to this basic block:          
 62            t0 == the first destination word fo    
 63            t1 == the first source word.  */       
 64                                                   
 65         /* Create the 1st output word and dete    
 66         lda     t2, -1          # E : build a     
 67         mskqh   t2, a1, t2      # U :   detect    
 68         mskqh   t1, a1, t3      # U :             
 69         ornot   t1, t2, t2      # E : (stall)     
 70                                                   
 71         mskql   t0, a1, t0      # U : assemble    
 72         cmpbge  zero, t2, t8    # E : bits set    
 73         or      t0, t3, t0      # E : (stall)     
 74         beq     a2, $a_eoc      # U :             
 75                                                   
 76         bne     t8, $a_eos      # U :             
 77         nop                                       
 78         nop                                       
 79         nop                                       
 80                                                   
 81         /* On entry to this basic block:          
 82            t0 == a source word not containing     
 83                                                   
 84         /*                                        
 85          * nops here to:                          
 86          *      separate store quads from load    
 87          *      limit of 1 bcond/quad to permi    
 88          */                                       
 89 $a_loop:                                          
 90         stq_u   t0, 0(a0)       # L :             
 91         addq    a0, 8, a0       # E :             
 92         subq    a2, 1, a2       # E :             
 93         nop                                       
 94                                                   
 95         ldq_u   t0, 0(a1)       # L :             
 96         addq    a1, 8, a1       # E :             
 97         cmpbge  zero, t0, t8    # E :             
 98         beq     a2, $a_eoc      # U :             
 99                                                   
100         beq     t8, $a_loop     # U :             
101         nop                                       
102         nop                                       
103         nop                                       
104                                                   
105         /* Take care of the final (partial) wo    
106            the end-of-count bit is set in t8 i    
107                                                   
108            On entry to this basic block we hav    
109            t0 == the source word containing th    
110            t8 == the cmpbge mask that found it    
111                                                   
112 $a_eos:                                           
113         negq    t8, t12         # E : find low    
114         and     t8, t12, t12    # E : (stall)     
115         /* For the sake of the cache, don't re    
116            if we're not going to need it.  */     
117         and     t12, 0x80, t6   # E : (stall)     
118         bne     t6, 1f          # U : (stall)     
119                                                   
120         /* We're doing a partial word store an    
121            our source and original destination    
122         ldq_u   t1, 0(a0)       # L :             
123         subq    t12, 1, t6      # E :             
124         or      t12, t6, t8     # E : (stall)     
125         zapnot  t0, t8, t0      # U : clear sr    
126                                                   
127         zap     t1, t8, t1      # .. e1 : clea    
128         or      t0, t1, t0      # e1    : (sta    
129         nop                                       
130         nop                                       
131                                                   
132 1:      stq_u   t0, 0(a0)       # L :             
133         ret     (t9)            # L0 : Latency    
134         nop                                       
135         nop                                       
136                                                   
137         /* Add the end-of-count bit to the eos    
138 $a_eoc:                                           
139         or      t10, t8, t8     # E :             
140         br      $a_eos          # L0 : Latency    
141         nop                                       
142         nop                                       
143                                                   
144         .end stxncpy_aligned                      
145                                                   
146         .align 4                                  
147         .ent __stxncpy                            
148         .globl __stxncpy                          
149 __stxncpy:                                        
150         .frame sp, 0, t9, 0                       
151         .prologue 0                               
152                                                   
153         /* Are source and destination co-align    
154         xor     a0, a1, t1      # E :             
155         and     a0, 7, t0       # E : find des    
156         and     t1, 7, t1       # E : (stall)     
157         addq    a2, t0, a2      # E : bias cou    
158                                                   
159         subq    a2, 1, a2       # E :             
160         and     a2, 7, t2       # E : (stall)     
161         srl     a2, 3, a2       # U : a2 = loo    
162         addq    zero, 1, t10    # E :             
163                                                   
164         sll     t10, t2, t10    # U : t10 = bi    
165         bne     t1, $unaligned  # U :             
166         /* We are co-aligned; take care of a p    
167         ldq_u   t1, 0(a1)       # L : load fir    
168         addq    a1, 8, a1       # E :             
169                                                   
170         beq     t0, stxncpy_aligned     # U :     
171         ldq_u   t0, 0(a0)       # L :             
172         nop                                       
173         nop                                       
174                                                   
175         br      stxncpy_aligned # .. e1 :         
176         nop                                       
177         nop                                       
178         nop                                       
179                                                   
180                                                   
181                                                   
182 /* The source and destination are not co-align    
183    and cope.  We have to be very careful about    
184    causing a SEGV.  */                            
185                                                   
186         .align 4                                  
187 $u_head:                                          
188         /* We know just enough now to be able     
189            full source word.  We can still fin    
190            that prevents us from outputting th    
191                                                   
192            On entry to this basic block:          
193            t0 == the first dest word, unmasked    
194            t1 == the shifted low bits of the f    
195            t6 == bytemask that is -1 in dest w    
196                                                   
197         ldq_u   t2, 8(a1)       # L : Latency=    
198         addq    a1, 8, a1       # E :             
199         mskql   t0, a0, t0      # U : mask tra    
200         extqh   t2, a1, t4      # U : (3 cycle    
201                                                   
202         or      t1, t4, t1      # E : first al    
203         mskqh   t1, a0, t1      # U : mask lea    
204         or      t0, t1, t0      # E : first ou    
205         or      t0, t6, t6      # E : mask ori    
206                                                   
207         cmpbge  zero, t6, t8    # E :             
208         beq     a2, $u_eocfin   # U :             
209         lda     t6, -1          # E :             
210         nop                                       
211                                                   
212         bne     t8, $u_final    # U :             
213         mskql   t6, a1, t6      # U : mask out    
214         stq_u   t0, 0(a0)       # L : store fi    
215         or      t6, t2, t2      # E : (stall)     
216                                                   
217         cmpbge  zero, t2, t8    # E : find nul    
218         addq    a0, 8, a0       # E :             
219         subq    a2, 1, a2       # E :             
220         bne     t8, $u_late_head_exit   # U :     
221                                                   
222         /* Finally, we've got all the stupid l    
223            of and we can set up to enter the m    
224         extql   t2, a1, t1      # U : position    
225         beq     a2, $u_eoc      # U :             
226         ldq_u   t2, 8(a1)       # L : read nex    
227         addq    a1, 8, a1       # E :             
228                                                   
229         extqh   t2, a1, t0      # U : position    
230         cmpbge  zero, t2, t8    # E :             
231         nop                                       
232         bne     t8, $u_eos      # U :             
233                                                   
234         /* Unaligned copy main loop.  In order    
235            the loop is structured to detect ze    
236            This has, unfortunately, effectivel    
237            iteration out into the head and hal    
238            prevent nastiness from accumulating    
239            to run as fast as possible.            
240                                                   
241            On entry to this basic block:          
242            t0 == the shifted low-order bits fr    
243            t1 == the shifted high-order bits f    
244            t2 == the unshifted current source     
245                                                   
246            We further know that t2 does not co    
247                                                   
248         .align 4                                  
249 $u_loop:                                          
250         or      t0, t1, t0      # E : current     
251         subq    a2, 1, a2       # E : decremen    
252         extql   t2, a1, t1      # U : extract     
253         addq    a0, 8, a0       # E :             
254                                                   
255         stq_u   t0, -8(a0)      # U : save the    
256         beq     a2, $u_eoc      # U :             
257         ldq_u   t2, 8(a1)       # U : Latency=    
258         addq    a1, 8, a1       # E :             
259                                                   
260         extqh   t2, a1, t0      # U : extract     
261         cmpbge  zero, t2, t8    # E : test new    
262         nop                                       
263         beq     t8, $u_loop     # U :             
264                                                   
265         /* We've found a zero somewhere in the    
266            If it resides in the lower half, we    
267            word to write out, and if it reside    
268            have one full and one partial word     
269                                                   
270            On entry to this basic block:          
271            t0 == the shifted low-order bits fr    
272            t1 == the shifted high-order bits f    
273            t2 == the unshifted current source     
274 $u_eos:                                           
275         or      t0, t1, t0      # E : first (p    
276         nop                                       
277         cmpbge  zero, t0, t8    # E : is the n    
278         bne     t8, $u_final    # U : (stall)     
279                                                   
280         stq_u   t0, 0(a0)       # L : the null    
281         addq    a0, 8, a0       # E :             
282         subq    a2, 1, a2       # E :             
283         nop                                       
284                                                   
285 $u_late_head_exit:                                
286         extql   t2, a1, t0      # U :             
287         cmpbge  zero, t0, t8    # E :             
288         or      t8, t10, t6     # E : (stall)     
289         cmoveq  a2, t6, t8      # E : Latency=    
290                                                   
291         /* Take care of a final (probably part    
292            On entry to this basic block:          
293            t0 == assembled source word            
294            t8 == cmpbge mask that found the nu    
295 $u_final:                                         
296         negq    t8, t6          # E : isolate     
297         and     t6, t8, t12     # E : (stall)     
298         and     t12, 0x80, t6   # E : avoid de    
299         bne     t6, 1f          # U : (stall)     
300                                                   
301         ldq_u   t1, 0(a0)       # L :             
302         subq    t12, 1, t6      # E :             
303         or      t6, t12, t8     # E : (stall)     
304         zapnot  t0, t8, t0      # U : kill sou    
305                                                   
306         zap     t1, t8, t1      # U : kill des    
307         or      t0, t1, t0      # E : (stall)     
308         nop                                       
309         nop                                       
310                                                   
311 1:      stq_u   t0, 0(a0)       # L :             
312         ret     (t9)            # L0 : Latency    
313                                                   
314           /* Got to end-of-count before end of    
315              On entry to this basic block:        
316              t1 == the shifted high-order bits    
317 $u_eoc:                                           
318         and     a1, 7, t6       # E : avoid fi    
319         sll     t10, t6, t6     # U : (stall)     
320         and     t6, 0xff, t6    # E : (stall)     
321         bne     t6, 1f          # U : (stall)     
322                                                   
323         ldq_u   t2, 8(a1)       # L : load fin    
324         nop                                       
325         extqh   t2, a1, t0      # U : extract     
326         or      t1, t0, t1      # E : (stall)     
327                                                   
328 1:      cmpbge  zero, t1, t8    # E :             
329         mov     t1, t0          # E :             
330                                                   
331 $u_eocfin:                      # end-of-count    
332         or      t10, t8, t8     # E :             
333         br      $u_final        # L0 : Latency    
334                                                   
335         /* Unaligned copy entry point.  */        
336         .align 4                                  
337 $unaligned:                                       
338                                                   
339         ldq_u   t1, 0(a1)       # L : load fir    
340         and     a0, 7, t4       # E : find des    
341         and     a1, 7, t5       # E : find src    
342         /* Conditionally load the first destin    
343            with 0xff indicating that the desti    
344         mov     zero, t0        # E :             
345                                                   
346         mov     zero, t6        # E :             
347         beq     t4, 1f          # U :             
348         ldq_u   t0, 0(a0)       # L :             
349         lda     t6, -1          # E :             
350                                                   
351         mskql   t6, a0, t6      # U :             
352         nop                                       
353         nop                                       
354         subq    a1, t4, a1      # E : sub dest    
355                                                   
356         /* If source misalignment is larger th    
357            extra startup checks to avoid SEGV.    
358                                                   
359 1:      cmplt   t4, t5, t12     # E :             
360         extql   t1, a1, t1      # U : shift sr    
361         lda     t2, -1          # E : for crea    
362         beq     t12, $u_head    # U : (stall)     
363                                                   
364         extql   t2, a1, t2      # U :             
365         cmpbge  zero, t1, t8    # E : is there    
366         andnot  t2, t6, t2      # E : dest mas    
367         or      t8, t10, t5     # E : test for    
368                                                   
369         cmpbge  zero, t2, t3    # E :             
370         cmoveq  a2, t5, t8      # E : Latency=    
371         nop                     # E : keep wit    
372         andnot  t8, t3, t8      # E : (stall)     
373                                                   
374         beq     t8, $u_head     # U :             
375         /* At this point we've found a zero in    
376            the source.  We need to isolate the    
377            it into the original destination da    
378            that we'll need at least one byte o    
379         ldq_u   t0, 0(a0)       # L :             
380         negq    t8, t6          # E : build bi    
381         mskqh   t1, t4, t1      # U :             
382                                                   
383         and     t6, t8, t12     # E :             
384         subq    t12, 1, t6      # E : (stall)     
385         or      t6, t12, t8     # E : (stall)     
386         zapnot  t2, t8, t2      # U : prepare     
387                                                   
388         zapnot  t1, t8, t1      # U : to sourc    
389         andnot  t0, t2, t0      # E : zero pla    
390         or      t0, t1, t0      # E : and put     
391         stq_u   t0, 0(a0)       # L : (stall)     
392                                                   
393         ret     (t9)            # L0 : Latency    
394         nop                                       
395         nop                                       
396         nop                                       
397                                                   
398         .end __stxncpy                            
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php