~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/alpha/lib/ev67-strrchr.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/alpha/lib/ev67-strrchr.S (Architecture i386) and /arch/alpha/lib/ev67-strrchr.S (Architecture alpha)


  1 /* SPDX-License-Identifier: GPL-2.0 */              1 /* SPDX-License-Identifier: GPL-2.0 */
  2 /*                                                  2 /*
  3  * arch/alpha/lib/ev67-strrchr.S                    3  * arch/alpha/lib/ev67-strrchr.S
  4  * 21264 version by Rick Gorton <rick.gorton@al      4  * 21264 version by Rick Gorton <rick.gorton@alpha-processor.com>
  5  *                                                  5  *
  6  * Finds length of a 0-terminated string.  Opt      6  * Finds length of a 0-terminated string.  Optimized for the
  7  * Alpha architecture:                              7  * Alpha architecture:
  8  *                                                  8  *
  9  *      - memory accessed as aligned quadwords      9  *      - memory accessed as aligned quadwords only
 10  *      - uses bcmpge to compare 8 bytes in pa     10  *      - uses bcmpge to compare 8 bytes in parallel
 11  *                                                 11  *
 12  * Much of the information about 21264 schedul     12  * Much of the information about 21264 scheduling/coding comes from:
 13  *      Compiler Writer's Guide for the Alpha      13  *      Compiler Writer's Guide for the Alpha 21264
 14  *      abbreviated as 'CWG' in other comments     14  *      abbreviated as 'CWG' in other comments here
 15  *      ftp.digital.com/pub/Digital/info/semic     15  *      ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
 16  * Scheduling notation:                            16  * Scheduling notation:
 17  *      E       - either cluster                   17  *      E       - either cluster
 18  *      U       - upper subcluster; U0 - subcl     18  *      U       - upper subcluster; U0 - subcluster U0; U1 - subcluster U1
 19  *      L       - lower subcluster; L0 - subcl     19  *      L       - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
 20  */                                                20  */
 21                                                    21 
 22 #include <linux/export.h>                          22 #include <linux/export.h>
 23 #include <asm/regdef.h>                            23 #include <asm/regdef.h>
 24                                                    24 
 25         .set noreorder                             25         .set noreorder
 26         .set noat                                  26         .set noat
 27                                                    27 
 28         .align 4                                   28         .align 4
 29         .ent strrchr                               29         .ent strrchr
 30         .globl strrchr                             30         .globl strrchr
 31 strrchr:                                           31 strrchr:
 32         .frame sp, 0, ra                           32         .frame sp, 0, ra
 33         .prologue 0                                33         .prologue 0
 34                                                    34 
 35         and     a1, 0xff, t2    # E : 00000000     35         and     a1, 0xff, t2    # E : 00000000000000ch
 36         insbl   a1, 1, t4       # U : 00000000     36         insbl   a1, 1, t4       # U : 000000000000ch00
 37         insbl   a1, 2, t5       # U : 00000000     37         insbl   a1, 2, t5       # U : 0000000000ch0000
 38         ldq_u   t0, 0(a0)       # L : load fir     38         ldq_u   t0, 0(a0)       # L : load first quadword Latency=3
 39                                                    39 
 40         mov     zero, t6        # E : t6 is la     40         mov     zero, t6        # E : t6 is last match aligned addr
 41         or      t2, t4, a1      # E : 00000000     41         or      t2, t4, a1      # E : 000000000000chch
 42         sll     t5, 8, t3       # U : 00000000     42         sll     t5, 8, t3       # U : 00000000ch000000
 43         mov     zero, t8        # E : t8 is la     43         mov     zero, t8        # E : t8 is last match byte compare mask
 44                                                    44 
 45         andnot  a0, 7, v0       # E : align so     45         andnot  a0, 7, v0       # E : align source addr
 46         or      t5, t3, t3      # E : 00000000     46         or      t5, t3, t3      # E : 00000000chch0000
 47         sll     a1, 32, t2      # U : 0000chch     47         sll     a1, 32, t2      # U : 0000chch00000000
 48         sll     a1, 48, t4      # U : chch0000     48         sll     a1, 48, t4      # U : chch000000000000
 49                                                    49 
 50         or      t4, a1, a1      # E : chch0000     50         or      t4, a1, a1      # E : chch00000000chch
 51         or      t2, t3, t2      # E : 0000chch     51         or      t2, t3, t2      # E : 0000chchchch0000
 52         or      a1, t2, a1      # E : chchchch     52         or      a1, t2, a1      # E : chchchchchchchch
 53         lda     t5, -1          # E : build ga     53         lda     t5, -1          # E : build garbage mask
 54                                                    54 
 55         cmpbge  zero, t0, t1    # E : bits set     55         cmpbge  zero, t0, t1    # E : bits set iff byte == zero
 56         mskqh   t5, a0, t4      # E : Complete     56         mskqh   t5, a0, t4      # E : Complete garbage mask
 57         xor     t0, a1, t2      # E : make byt     57         xor     t0, a1, t2      # E : make bytes == c zero
 58         cmpbge  zero, t4, t4    # E : bits set     58         cmpbge  zero, t4, t4    # E : bits set iff byte is garbage
 59                                                    59 
 60         cmpbge  zero, t2, t3    # E : bits set     60         cmpbge  zero, t2, t3    # E : bits set iff byte == c
 61         andnot  t1, t4, t1      # E : clear ga     61         andnot  t1, t4, t1      # E : clear garbage from null test
 62         andnot  t3, t4, t3      # E : clear ga     62         andnot  t3, t4, t3      # E : clear garbage from char test
 63         bne     t1, $eos        # U : did we a     63         bne     t1, $eos        # U : did we already hit the terminator?
 64                                                    64 
 65         /* Character search main loop */           65         /* Character search main loop */
 66 $loop:                                             66 $loop:
 67         ldq     t0, 8(v0)       # L : load nex     67         ldq     t0, 8(v0)       # L : load next quadword
 68         cmovne  t3, v0, t6      # E : save pre     68         cmovne  t3, v0, t6      # E : save previous comparisons match
 69         nop                     #   : Latency=     69         nop                     #   : Latency=2, extra map slot (keep nop with cmov)
 70         nop                                        70         nop
 71                                                    71 
 72         cmovne  t3, t3, t8      # E : Latency=     72         cmovne  t3, t3, t8      # E : Latency=2, extra map slot
 73         nop                     #   : keep wit     73         nop                     #   : keep with cmovne
 74         addq    v0, 8, v0       # E :              74         addq    v0, 8, v0       # E :
 75         xor     t0, a1, t2      # E :              75         xor     t0, a1, t2      # E :
 76                                                    76 
 77         cmpbge  zero, t0, t1    # E : bits set     77         cmpbge  zero, t0, t1    # E : bits set iff byte == zero
 78         cmpbge  zero, t2, t3    # E : bits set     78         cmpbge  zero, t2, t3    # E : bits set iff byte == c
 79         beq     t1, $loop       # U : if we ha     79         beq     t1, $loop       # U : if we havnt seen a null, loop
 80         nop                                        80         nop
 81                                                    81 
 82         /* Mask out character matches after te     82         /* Mask out character matches after terminator */
 83 $eos:                                              83 $eos:
 84         negq    t1, t4          # E : isolate      84         negq    t1, t4          # E : isolate first null byte match
 85         and     t1, t4, t4      # E :              85         and     t1, t4, t4      # E :
 86         subq    t4, 1, t5       # E : build a      86         subq    t4, 1, t5       # E : build a mask of the bytes up to...
 87         or      t4, t5, t4      # E : ... and      87         or      t4, t5, t4      # E : ... and including the null
 88                                                    88 
 89         and     t3, t4, t3      # E : mask out     89         and     t3, t4, t3      # E : mask out char matches after null
 90         cmovne  t3, t3, t8      # E : save it,     90         cmovne  t3, t3, t8      # E : save it, if match found Latency=2, extra map slot
 91         nop                     #   : Keep wit     91         nop                     #   : Keep with cmovne
 92         nop                                        92         nop
 93                                                    93 
 94         cmovne  t3, v0, t6      # E :              94         cmovne  t3, v0, t6      # E :
 95         nop                     #   : Keep wit     95         nop                     #   : Keep with cmovne
 96         /* Locate the address of the last matc     96         /* Locate the address of the last matched character */
 97         ctlz    t8, t2          # U0 : Latency     97         ctlz    t8, t2          # U0 : Latency=3 (0x40 for t8=0)
 98         nop                                        98         nop
 99                                                    99 
100         cmoveq  t8, 0x3f, t2    # E : Compensa    100         cmoveq  t8, 0x3f, t2    # E : Compensate for case when no match is seen
101         nop                     # E : hide the    101         nop                     # E : hide the cmov latency (2) behind ctlz latency
102         lda     t5, 0x3f($31)   # E :             102         lda     t5, 0x3f($31)   # E :
103         subq    t5, t2, t5      # E : Normaliz    103         subq    t5, t2, t5      # E : Normalize leading zero count
104                                                   104 
105         addq    t6, t5, v0      # E : and add     105         addq    t6, t5, v0      # E : and add to quadword address
106         ret                     # L0 : Latency    106         ret                     # L0 : Latency=3
107         nop                                       107         nop
108         nop                                       108         nop
109                                                   109 
110         .end strrchr                              110         .end strrchr
111         EXPORT_SYMBOL(strrchr)                    111         EXPORT_SYMBOL(strrchr)
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php