~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/alpha/lib/ev6-stxcpy.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/alpha/lib/ev6-stxcpy.S (Version linux-6.12-rc7) and /arch/sparc/lib/ev6-stxcpy.S (Version linux-4.9.337)


  1 /* SPDX-License-Identifier: GPL-2.0 */            
  2 /*                                                
  3  * arch/alpha/lib/ev6-stxcpy.S                    
  4  * 21264 version contributed by Rick Gorton <ri    
  5  *                                                
  6  * Copy a null-terminated string from SRC to D    
  7  *                                                
  8  * This is an internal routine used by strcpy,    
  9  * As such, it uses special linkage convention    
 10  * of these public functions more efficient.      
 11  *                                                
 12  * On input:                                      
 13  *      t9 = return address                       
 14  *      a0 = DST                                  
 15  *      a1 = SRC                                  
 16  *                                                
 17  * On output:                                     
 18  *      t12 = bitmask (with one bit set) indic    
 19  *      a0  = unaligned address of the last *w    
 20  *                                                
 21  * Furthermore, v0, a3-a5, t11, and t12 are un    
 22  *                                                
 23  * Much of the information about 21264 schedul    
 24  *      Compiler Writer's Guide for the Alpha     
 25  *      abbreviated as 'CWG' in other comments    
 26  *      ftp.digital.com/pub/Digital/info/semic    
 27  * Scheduling notation:                           
 28  *      E       - either cluster                  
 29  *      U       - upper subcluster; U0 - subcl    
 30  *      L       - lower subcluster; L0 - subcl    
 31  * Try not to change the actual algorithm if p    
 32  */                                               
 33                                                   
 34 #include <asm/regdef.h>                           
 35                                                   
 36         .set noat                                 
 37         .set noreorder                            
 38                                                   
 39         .text                                     
 40                                                   
 41 /* There is a problem with either gdb (as of 4    
 42    doesn't like putting the entry point for a     
 43    middle of the procedure descriptor.  Work a    
 44    aligned copy in its own procedure descripto    
 45                                                   
 46                                                   
 47         .ent stxcpy_aligned                       
 48         .align 4                                  
 49 stxcpy_aligned:                                   
 50         .frame sp, 0, t9                          
 51         .prologue 0                               
 52                                                   
 53         /* On entry to this basic block:          
 54            t0 == the first destination word fo    
 55            t1 == the first source word.  */       
 56                                                   
 57         /* Create the 1st output word and dete    
 58         lda     t2, -1          # E : build a     
 59         mskqh   t2, a1, t2      # U :   detect    
 60         mskqh   t1, a1, t3      # U :             
 61         ornot   t1, t2, t2      # E : (stall)     
 62                                                   
 63         mskql   t0, a1, t0      # U : assemble    
 64         cmpbge  zero, t2, t8    # E : bits set    
 65         or      t0, t3, t1      # E : (stall)     
 66         bne     t8, $a_eos      # U : (stall)     
 67                                                   
 68         /* On entry to this basic block:          
 69            t0 == the first destination word fo    
 70            t1 == a source word not containing     
 71         /* Nops here to separate store quads f    
 72                                                   
 73 $a_loop:                                          
 74         stq_u   t1, 0(a0)       # L :             
 75         addq    a0, 8, a0       # E :             
 76         nop                                       
 77         nop                                       
 78                                                   
 79         ldq_u   t1, 0(a1)       # L : Latency=    
 80         addq    a1, 8, a1       # E :             
 81         cmpbge  zero, t1, t8    # E : (3 cycle    
 82         beq     t8, $a_loop     # U : (stall f    
 83                                                   
 84         /* Take care of the final (partial) wo    
 85            On entry to this basic block we hav    
 86            t1 == the source word containing th    
 87            t8 == the cmpbge mask that found it    
 88 $a_eos:                                           
 89         negq    t8, t6          # E : find low    
 90         and     t8, t6, t12     # E : (stall)     
 91         /* For the sake of the cache, don't re    
 92            if we're not going to need it.  */     
 93         and     t12, 0x80, t6   # E : (stall)     
 94         bne     t6, 1f          # U : (stall)     
 95                                                   
 96         /* We're doing a partial word store an    
 97            our source and original destination    
 98         ldq_u   t0, 0(a0)       # L : Latency=    
 99         subq    t12, 1, t6      # E :             
100         zapnot  t1, t6, t1      # U : clear sr    
101         or      t12, t6, t8     # E : (stall)     
102                                                   
103         zap     t0, t8, t0      # E : clear ds    
104         or      t0, t1, t1      # E : (stall)     
105         nop                                       
106         nop                                       
107                                                   
108 1:      stq_u   t1, 0(a0)       # L :             
109         ret     (t9)            # L0 : Latency    
110         nop                                       
111         nop                                       
112                                                   
113         .end stxcpy_aligned                       
114                                                   
115         .align 4                                  
116         .ent __stxcpy                             
117         .globl __stxcpy                           
118 __stxcpy:                                         
119         .frame sp, 0, t9                          
120         .prologue 0                               
121                                                   
122         /* Are source and destination co-align    
123         xor     a0, a1, t0      # E :             
124         unop                    # E :             
125         and     t0, 7, t0       # E : (stall)     
126         bne     t0, $unaligned  # U : (stall)     
127                                                   
128         /* We are co-aligned; take care of a p    
129         ldq_u   t1, 0(a1)               # L :     
130         and     a0, 7, t0               # E :     
131         addq    a1, 8, a1               # E :     
132         beq     t0, stxcpy_aligned      # U :     
133                                                   
134         ldq_u   t0, 0(a0)       # L :             
135         br      stxcpy_aligned  # L0 : Latency    
136         nop                                       
137         nop                                       
138                                                   
139                                                   
140 /* The source and destination are not co-align    
141    and cope.  We have to be very careful about    
142    causing a SEGV.  */                            
143                                                   
144         .align 4                                  
145 $u_head:                                          
146         /* We know just enough now to be able     
147            full source word.  We can still fin    
148            that prevents us from outputting th    
149                                                   
150            On entry to this basic block:          
151            t0 == the first dest word, for mask    
152            t1 == the low bits of the first sou    
153            t6 == bytemask that is -1 in dest w    
154                                                   
155         ldq_u   t2, 8(a1)       # L :             
156         addq    a1, 8, a1       # E :             
157         extql   t1, a1, t1      # U : (stall o    
158         extqh   t2, a1, t4      # U : (stall o    
159                                                   
160         mskql   t0, a0, t0      # U :             
161         or      t1, t4, t1      # E :             
162         mskqh   t1, a0, t1      # U : (stall o    
163         or      t0, t1, t1      # E : (stall o    
164                                                   
165         or      t1, t6, t6      # E :             
166         cmpbge  zero, t6, t8    # E : (stall)     
167         lda     t6, -1          # E : for mask    
168         bne     t8, $u_final    # U : (stall)     
169                                                   
170         mskql   t6, a1, t6              # U :     
171         or      t6, t2, t2              # E :     
172         cmpbge  zero, t2, t8            # E :     
173         bne     t8, $u_late_head_exit   # U :     
174                                                   
175         /* Finally, we've got all the stupid l    
176            of and we can set up to enter the m    
177                                                   
178         stq_u   t1, 0(a0)       # L : store fi    
179         addq    a0, 8, a0       # E :             
180         extql   t2, a1, t0      # U : position    
181         ldq_u   t2, 8(a1)       # U : read nex    
182                                                   
183         addq    a1, 8, a1       # E :             
184         cmpbge  zero, t2, t8    # E : (stall f    
185         nop                     # E :             
186         bne     t8, $u_eos      # U : (stall)     
187                                                   
188         /* Unaligned copy main loop.  In order    
189            the loop is structured to detect ze    
190            This has, unfortunately, effectivel    
191            iteration out into the head and hal    
192            prevent nastiness from accumulating    
193            to run as fast as possible.            
194                                                   
195            On entry to this basic block:          
196            t0 == the shifted high-order bits f    
197            t2 == the unshifted current source     
198                                                   
199            We further know that t2 does not co    
200                                                   
201         .align 3                                  
202 $u_loop:                                          
203         extqh   t2, a1, t1      # U : extract     
204         addq    a1, 8, a1       # E : (stall)     
205         extql   t2, a1, t3      # U : extract     
206         addq    a0, 8, a0       # E :             
207                                                   
208         or      t0, t1, t1      # E : current     
209         ldq_u   t2, 0(a1)       # L : Latency=    
210         stq_u   t1, -8(a0)      # L : save the    
211         mov     t3, t0          # E :             
212                                                   
213         cmpbge  zero, t2, t8    # E : test new    
214         beq     t8, $u_loop     # U : (stall)     
215         nop                                       
216         nop                                       
217                                                   
218         /* We've found a zero somewhere in the    
219            If it resides in the lower half, we    
220            word to write out, and if it reside    
221            have one full and one partial word     
222                                                   
223            On entry to this basic block:          
224            t0 == the shifted high-order bits f    
225            t2 == the unshifted current source     
226 $u_eos:                                           
227         extqh   t2, a1, t1      # U :             
228         or      t0, t1, t1      # E : first (p    
229         cmpbge  zero, t1, t8    # E : is the n    
230         bne     t8, $u_final    # U : (stall)     
231                                                   
232 $u_late_head_exit:                                
233         stq_u   t1, 0(a0)       # L : the null    
234         addq    a0, 8, a0       # E :             
235         extql   t2, a1, t1      # U :             
236         cmpbge  zero, t1, t8    # E : (stall)     
237                                                   
238         /* Take care of a final (probably part    
239            On entry to this basic block:          
240            t1 == assembled source word            
241            t8 == cmpbge mask that found the nu    
242 $u_final:                                         
243         negq    t8, t6          # E : isolate     
244         and     t6, t8, t12     # E : (stall)     
245         and     t12, 0x80, t6   # E : avoid de    
246         bne     t6, 1f          # U : (stall)     
247                                                   
248         ldq_u   t0, 0(a0)       # E :             
249         subq    t12, 1, t6      # E :             
250         or      t6, t12, t8     # E : (stall)     
251         zapnot  t1, t6, t1      # U : kill sou    
252                                                   
253         zap     t0, t8, t0      # U : kill des    
254         or      t0, t1, t1      # E : (stall)     
255         nop                                       
256         nop                                       
257                                                   
258 1:      stq_u   t1, 0(a0)       # L :             
259         ret     (t9)            # L0 : Latency    
260         nop                                       
261         nop                                       
262                                                   
263         /* Unaligned copy entry point.  */        
264         .align 4                                  
265 $unaligned:                                       
266                                                   
267         ldq_u   t1, 0(a1)       # L : load fir    
268         and     a0, 7, t4       # E : find des    
269         and     a1, 7, t5       # E : find src    
270         /* Conditionally load the first destin    
271            with 0xff indicating that the desti    
272         mov     zero, t0        # E :             
273                                                   
274         mov     zero, t6        # E :             
275         beq     t4, 1f          # U :             
276         ldq_u   t0, 0(a0)       # L :             
277         lda     t6, -1          # E :             
278                                                   
279         mskql   t6, a0, t6      # U :             
280         nop                                       
281         nop                                       
282         nop                                       
283 1:                                                
284         subq    a1, t4, a1      # E : sub dest    
285         /* If source misalignment is larger th    
286            extra startup checks to avoid SEGV.    
287         cmplt   t4, t5, t12     # E :             
288         beq     t12, $u_head    # U :             
289         lda     t2, -1          # E : mask out    
290                                                   
291         mskqh   t2, t5, t2      # U :             
292         ornot   t1, t2, t3      # E : (stall)     
293         cmpbge  zero, t3, t8    # E : is there    
294         beq     t8, $u_head     # U : (stall)     
295                                                   
296         /* At this point we've found a zero in    
297            the source.  We need to isolate the    
298            it into the original destination da    
299            that we'll need at least one byte o    
300                                                   
301         ldq_u   t0, 0(a0)       # L :             
302         negq    t8, t6          # E : build bi    
303         and     t6, t8, t12     # E : (stall)     
304         and     a1, 7, t5       # E :             
305                                                   
306         subq    t12, 1, t6      # E :             
307         or      t6, t12, t8     # E : (stall)     
308         srl     t12, t5, t12    # U : adjust f    
309         zapnot  t2, t8, t2      # U : prepare     
310                                                   
311         and     t1, t2, t1      # E : to sourc    
312         extql   t2, a1, t2      # U :             
313         extql   t1, a1, t1      # U : (stall)     
314         andnot  t0, t2, t0      # .. e1 : zero    
315                                                   
316         or      t0, t1, t1      # e1    : and     
317         stq_u   t1, 0(a0)       # .. e0 : (sta    
318         ret     (t9)            # e1    :         
319         nop                                       
320                                                   
321         .end __stxcpy                             
322                                                   
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php