~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/lib/strncmp.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/arm64/lib/strncmp.S (Version linux-6.12-rc7) and /arch/i386/lib/strncmp.S (Version linux-6.4.16)


  1 /* SPDX-License-Identifier: GPL-2.0-only */       
  2 /*                                                
  3  * Copyright (c) 2013-2022, Arm Limited.          
  4  *                                                
  5  * Adapted from the original at:                  
  6  * https://github.com/ARM-software/optimized-r    
  7  */                                               
  8                                                   
  9 #include <linux/linkage.h>                        
 10 #include <asm/assembler.h>                        
 11                                                   
 12 /* Assumptions:                                   
 13  *                                                
 14  * ARMv8-a, AArch64.                              
 15  * MTE compatible.                                
 16  */                                               
 17                                                   
 18 #define L(label) .L ## label                      
 19                                                   
 20 #define REP8_01 0x0101010101010101                
 21 #define REP8_7f 0x7f7f7f7f7f7f7f7f                
 22                                                   
 23 /* Parameters and result.  */                     
 24 #define src1            x0                        
 25 #define src2            x1                        
 26 #define limit           x2                        
 27 #define result          x0                        
 28                                                   
 29 /* Internal variables.  */                        
 30 #define data1           x3                        
 31 #define data1w          w3                        
 32 #define data2           x4                        
 33 #define data2w          w4                        
 34 #define has_nul         x5                        
 35 #define diff            x6                        
 36 #define syndrome        x7                        
 37 #define tmp1            x8                        
 38 #define tmp2            x9                        
 39 #define tmp3            x10                       
 40 #define zeroones        x11                       
 41 #define pos             x12                       
 42 #define mask            x13                       
 43 #define endloop         x14                       
 44 #define count           mask                      
 45 #define offset          pos                       
 46 #define neg_offset      x15                       
 47                                                   
 48 /* Define endian dependent shift operations.      
 49    On big-endian early bytes are at MSB and on    
 50    LS_FW means shifting towards early bytes.      
 51    LS_BK means shifting towards later bytes.      
 52    */                                             
 53 #ifdef __AARCH64EB__                              
 54 #define LS_FW lsl                                 
 55 #define LS_BK lsr                                 
 56 #else                                             
 57 #define LS_FW lsr                                 
 58 #define LS_BK lsl                                 
 59 #endif                                            
 60                                                   
 61 SYM_FUNC_START(__pi_strncmp)                      
 62         cbz     limit, L(ret0)                    
 63         eor     tmp1, src1, src2                  
 64         mov     zeroones, #REP8_01                
 65         tst     tmp1, #7                          
 66         and     count, src1, #7                   
 67         b.ne    L(misaligned8)                    
 68         cbnz    count, L(mutual_align)            
 69                                                   
 70         /* NUL detection works on the principl    
 71            (=> (X - 1) & ~(X | 0x7f)) is non-z    
 72            can be done in parallel across the     
 73         .p2align 4                                
 74 L(loop_aligned):                                  
 75         ldr     data1, [src1], #8                 
 76         ldr     data2, [src2], #8                 
 77 L(start_realigned):                               
 78         subs    limit, limit, #8                  
 79         sub     tmp1, data1, zeroones             
 80         orr     tmp2, data1, #REP8_7f             
 81         eor     diff, data1, data2      /* Non    
 82         csinv   endloop, diff, xzr, hi  /* Las    
 83         bics    has_nul, tmp1, tmp2     /* Non    
 84         ccmp    endloop, #0, #0, eq               
 85         b.eq    L(loop_aligned)                   
 86         /* End of main loop */                    
 87                                                   
 88 L(full_check):                                    
 89 #ifndef __AARCH64EB__                             
 90         orr     syndrome, diff, has_nul           
 91         add     limit, limit, 8 /* Rewind limi    
 92 L(syndrome_check):                                
 93         /* Limit was reached. Check if the NUL    
 94            is before the limit. */                
 95         rev     syndrome, syndrome                
 96         rev     data1, data1                      
 97         clz     pos, syndrome                     
 98         rev     data2, data2                      
 99         lsl     data1, data1, pos                 
100         cmp     limit, pos, lsr #3                
101         lsl     data2, data2, pos                 
102         /* But we need to zero-extend (char is    
103            perform a signed 32-bit subtraction    
104         lsr     data1, data1, #56                 
105         sub     result, data1, data2, lsr #56     
106         csel result, result, xzr, hi              
107         ret                                       
108 #else                                             
109         /* Not reached the limit, must have fo    
110         tbz     limit, #63, L(not_limit)          
111         add     tmp1, limit, 8                    
112         cbz     limit, L(not_limit)               
113                                                   
114         lsl     limit, tmp1, #3 /* Bits -> byt    
115         mov     mask, #~0                         
116         lsr     mask, mask, limit                 
117         bic     data1, data1, mask                
118         bic     data2, data2, mask                
119                                                   
120         /* Make sure that the NUL byte is mark    
121         orr     has_nul, has_nul, mask            
122                                                   
123 L(not_limit):                                     
124         /* For big-endian we cannot use the tr    
125            as carry-propagation can corrupt th    
126            bytes in the string contain 0x01.      
127         /* However, if there is no NUL byte in    
128            the result directly.  We can't just    
129            MSB might be significant.  */          
130         cbnz    has_nul, 1f                       
131         cmp     data1, data2                      
132         cset    result, ne                        
133         cneg    result, result, lo                
134         ret                                       
135 1:                                                
136         /* Re-compute the NUL-byte detection,     
137         rev     tmp3, data1                       
138         sub     tmp1, tmp3, zeroones              
139         orr     tmp2, tmp3, #REP8_7f              
140         bic     has_nul, tmp1, tmp2               
141         rev     has_nul, has_nul                  
142         orr     syndrome, diff, has_nul           
143         clz     pos, syndrome                     
144         /* The most-significant-non-zero bit o    
145            first bit that is different, or the    
146            Shifting left now will bring the cr    
147            top bits.  */                          
148 L(end_quick):                                     
149         lsl     data1, data1, pos                 
150         lsl     data2, data2, pos                 
151         /* But we need to zero-extend (char is    
152            perform a signed 32-bit subtraction    
153         lsr     data1, data1, #56                 
154         sub     result, data1, data2, lsr #56     
155         ret                                       
156 #endif                                            
157                                                   
158 L(mutual_align):                                  
159         /* Sources are mutually aligned, but a    
160            alignment boundary.  Round down the    
161            the bytes that precede the start po    
162            We also need to adjust the limit ca    
163            overflowing if the limit is near UL    
164         bic     src1, src1, #7                    
165         bic     src2, src2, #7                    
166         ldr     data1, [src1], #8                 
167         neg     tmp3, count, lsl #3     /* 64     
168         ldr     data2, [src2], #8                 
169         mov     tmp2, #~0                         
170         LS_FW   tmp2, tmp2, tmp3        /* Shi    
171         /* Adjust the limit and ensure it does    
172         adds    limit, limit, count               
173         csinv   limit, limit, xzr, lo             
174         orr     data1, data1, tmp2                
175         orr     data2, data2, tmp2                
176         b       L(start_realigned)                
177                                                   
178         .p2align 4                                
179         /* Don't bother with dwords for up to     
180 L(misaligned8):                                   
181         cmp     limit, #16                        
182         b.hs    L(try_misaligned_words)           
183                                                   
184 L(byte_loop):                                     
185         /* Perhaps we can do better than this.    
186         ldrb    data1w, [src1], #1                
187         ldrb    data2w, [src2], #1                
188         subs    limit, limit, #1                  
189         ccmp    data1w, #1, #0, hi      /* NZC    
190         ccmp    data1w, data2w, #0, cs  /* NZC    
191         b.eq    L(byte_loop)                      
192 L(done):                                          
193         sub     result, data1, data2              
194         ret                                       
195         /* Align the SRC1 to a dword by doing     
196            the dword loop.  */                    
197 L(try_misaligned_words):                          
198         cbz     count, L(src1_aligned)            
199                                                   
200         neg     count, count                      
201         and     count, count, #7                  
202         sub     limit, limit, count               
203                                                   
204 L(page_end_loop):                                 
205         ldrb    data1w, [src1], #1                
206         ldrb    data2w, [src2], #1                
207         cmp     data1w, #1                        
208         ccmp    data1w, data2w, #0, cs  /* NZC    
209         b.ne    L(done)                           
210         subs    count, count, #1                  
211         b.hi    L(page_end_loop)                  
212                                                   
213         /* The following diagram explains the     
214            The bytes are shown in natural orde    
215            reversed in the registers. The "x"     
216            The "|" separates data that is load    
217            src1     | a a a a a a a a | b b b     
218            src2     | x x x x x a a a   a a a     
219                                                   
220            After shifting in each step, the da    
221                         STEP_A              ST    
222            data1    a a a a a a a a     b b b     
223            data2    a a a a a a a a     b b b     
224                                                   
225            The bytes with "0" are eliminated f    
226                                                   
227            Align SRC2 down to 16 bytes. This w    
228            time from SRC2. The comparison happ    
229            the loop can exit, or read from SRC    
230 L(src1_aligned):                                  
231         /* Calculate offset from 8 byte alignm    
232            need to mask offset since shifts ar    
233         lsl     offset, src2, #3                  
234         bic     src2, src2, #0xf                  
235         mov     mask, -1                          
236         neg     neg_offset, offset                
237         ldr     data1, [src1], #8                 
238         ldp     tmp1, tmp2, [src2], #16           
239         LS_BK   mask, mask, neg_offset            
240         and     neg_offset, neg_offset, #63       
241         /* Skip the first compare if data in t    
242         tbnz    offset, 6, L(misaligned_mid_lo    
243                                                   
244 L(loop_misaligned):                               
245         /* STEP_A: Compare full 8 bytes when t    
246         LS_FW   data2, tmp1, offset               
247         LS_BK   tmp1, tmp2, neg_offset            
248         subs    limit, limit, #8                  
249         orr     data2, data2, tmp1      /* 8 b    
250         sub     has_nul, data1, zeroones          
251         eor     diff, data1, data2      /* Non    
252         orr     tmp3, data1, #REP8_7f             
253         csinv   endloop, diff, xzr, hi  /* If     
254         bic     has_nul, has_nul, tmp3  /* Non    
255         orr     tmp3, endloop, has_nul            
256         cbnz    tmp3, L(full_check)               
257                                                   
258         ldr     data1, [src1], #8                 
259 L(misaligned_mid_loop):                           
260         /* STEP_B: Compare first part of data1    
261         LS_FW   data2, tmp2, offset               
262 #ifdef __AARCH64EB__                              
263         /* For big-endian we do a byte reverse    
264         problem described above. This way we c    
265         next step and also use syndrome value     
266         rev     tmp3, data1                       
267         #define data1_fixed tmp3                  
268 #else                                             
269         #define data1_fixed data1                 
270 #endif                                            
271         sub     has_nul, data1_fixed, zeroones    
272         orr     tmp3, data1_fixed, #REP8_7f       
273         eor     diff, data2, data1      /* Non    
274         bic     has_nul, has_nul, tmp3  /* Non    
275 #ifdef __AARCH64EB__                              
276         rev     has_nul, has_nul                  
277 #endif                                            
278         cmp     limit, neg_offset, lsr #3         
279         orr     syndrome, diff, has_nul           
280         bic     syndrome, syndrome, mask          
281         csinv   tmp3, syndrome, xzr, hi /* If     
282         cbnz    tmp3, L(syndrome_check)           
283                                                   
284         /* STEP_C: Compare second part of data    
285         ldp     tmp1, tmp2, [src2], #16           
286         cmp     limit, #8                         
287         LS_BK   data2, tmp1, neg_offset           
288         eor     diff, data2, data1      /* Non    
289         orr     syndrome, diff, has_nul           
290         and     syndrome, syndrome, mask          
291         csinv   tmp3, syndrome, xzr, hi /* If     
292         cbnz    tmp3, L(syndrome_check)           
293                                                   
294         ldr     data1, [src1], #8                 
295         sub     limit, limit, #8                  
296         b       L(loop_misaligned)                
297                                                   
298 #ifdef  __AARCH64EB__                             
299 L(syndrome_check):                                
300         clz     pos, syndrome                     
301         cmp     pos, limit, lsl #3                
302         b.lo    L(end_quick)                      
303 #endif                                            
304                                                   
305 L(ret0):                                          
306         mov     result, #0                        
307         ret                                       
308 SYM_FUNC_END(__pi_strncmp)                        
309 SYM_FUNC_ALIAS_WEAK(strncmp, __pi_strncmp)        
310 EXPORT_SYMBOL_NOKASAN(strncmp)                    
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php