~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
checksum.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~
Diff markup

Differences between /arch/xtensa/lib/checksum.S (Version linux-6.12-rc7) and /arch/sparc/lib/checksum.S (Version linux-2.4.37.11)

  1 /* SPDX-License-Identifier: GPL-2.0-or-later * !!   1 /* checksum.S: Sparc optimized checksum code.
  2 /*                                             << 
  3  * INET         An implementation of the TCP/I << 
  4  *              operating system.  INET is imp << 
  5  *              interface as the means of comm << 
  6  *                                                  2  *
  7  *              IP/TCP/UDP checksumming routin !!   3  *  Copyright(C) 1995 Linus Torvalds
                                                   >>   4  *  Copyright(C) 1995 Miguel de Icaza
                                                   >>   5  *  Copyright(C) 1996 David S. Miller
                                                   >>   6  *  Copyright(C) 1997 Jakub Jelinek
  8  *                                                  7  *
  9  * Xtensa version:  Copyright (C) 2001 Tensili !!   8  * derived from:
 10  *                  Optimized by Joe Taylor    !!   9  *      Linux/Alpha checksum c-code
                                                   >>  10  *      Linux/ix86 inline checksum assembly
                                                   >>  11  *      RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
                                                   >>  12  *      David Mosberger-Tang for optimized reference c-code
                                                   >>  13  *      BSD4.4 portable checksum routine
 11  */                                                14  */
 12                                                    15 
 13 #include <linux/errno.h>                       !!  16 #include <asm/cprefix.h>
 14 #include <linux/linkage.h>                     !!  17 #include <asm/errno.h>
 15 #include <asm/asmmacro.h>                      << 
 16 #include <asm/core.h>                          << 
 17                                                    18 
 18 /*                                             !!  19 #define CSUM_BIGCHUNK(buf, offset, sum, t0, t1, t2, t3, t4, t5) \
 19  * computes a partial checksum, e.g. for TCP/U !!  20         ldd     [buf + offset + 0x00], t0;                      \
 20  */                                            !!  21         ldd     [buf + offset + 0x08], t2;                      \
 21                                                !!  22         addxcc  t0, sum, sum;                                   \
 22 /*                                             !!  23         addxcc  t1, sum, sum;                                   \
 23  * unsigned int csum_partial(const unsigned ch !!  24         ldd     [buf + offset + 0x10], t4;                      \
 24  *                           unsigned int sum) !!  25         addxcc  t2, sum, sum;                                   \
 25  *    a2 = buf                                 !!  26         addxcc  t3, sum, sum;                                   \
 26  *    a3 = len                                 !!  27         ldd     [buf + offset + 0x18], t0;                      \
 27  *    a4 = sum                                 !!  28         addxcc  t4, sum, sum;                                   \
 28  *                                             !!  29         addxcc  t5, sum, sum;                                   \
 29  * This function assumes 2- or 4-byte alignmen !!  30         addxcc  t0, sum, sum;                                   \
 30  */                                            !!  31         addxcc  t1, sum, sum;
                                                   >>  32 
                                                   >>  33 #define CSUM_LASTCHUNK(buf, offset, sum, t0, t1, t2, t3)        \
                                                   >>  34         ldd     [buf - offset - 0x08], t0;                      \
                                                   >>  35         ldd     [buf - offset - 0x00], t2;                      \
                                                   >>  36         addxcc  t0, sum, sum;                                   \
                                                   >>  37         addxcc  t1, sum, sum;                                   \
                                                   >>  38         addxcc  t2, sum, sum;                                   \
                                                   >>  39         addxcc  t3, sum, sum;
                                                   >>  40 
                                                   >>  41         /* Do end cruft out of band to get better cache patterns. */
                                                   >>  42 csum_partial_end_cruft:
                                                   >>  43         be      1f                              ! caller asks %o1 & 0x8
                                                   >>  44          andcc  %o1, 4, %g0                     ! nope, check for word remaining
                                                   >>  45         ldd     [%o0], %g2                      ! load two
                                                   >>  46         addcc   %g2, %o2, %o2                   ! add first word to sum
                                                   >>  47         addxcc  %g3, %o2, %o2                   ! add second word as well
                                                   >>  48         add     %o0, 8, %o0                     ! advance buf ptr
                                                   >>  49         addx    %g0, %o2, %o2                   ! add in final carry
                                                   >>  50         andcc   %o1, 4, %g0                     ! check again for word remaining
                                                   >>  51 1:      be      1f                              ! nope, skip this code
                                                   >>  52          andcc  %o1, 3, %o1                     ! check for trailing bytes
                                                   >>  53         ld      [%o0], %g2                      ! load it
                                                   >>  54         addcc   %g2, %o2, %o2                   ! add to sum
                                                   >>  55         add     %o0, 4, %o0                     ! advance buf ptr
                                                   >>  56         addx    %g0, %o2, %o2                   ! add in final carry
                                                   >>  57         andcc   %o1, 3, %g0                     ! check again for trailing bytes
                                                   >>  58 1:      be      1f                              ! no trailing bytes, return
                                                   >>  59          addcc  %o1, -1, %g0                    ! only one byte remains?
                                                   >>  60         bne     2f                              ! at least two bytes more
                                                   >>  61          subcc  %o1, 2, %o1                     ! only two bytes more?
                                                   >>  62         b       4f                              ! only one byte remains
                                                   >>  63          or     %g0, %g0, %o4                   ! clear fake hword value
                                                   >>  64 2:      lduh    [%o0], %o4                      ! get hword
                                                   >>  65         be      6f                              ! jmp if only hword remains
                                                   >>  66          add    %o0, 2, %o0                     ! advance buf ptr either way
                                                   >>  67         sll     %o4, 16, %o4                    ! create upper hword
                                                   >>  68 4:      ldub    [%o0], %o5                      ! get final byte
                                                   >>  69         sll     %o5, 8, %o5                     ! put into place
                                                   >>  70         or      %o5, %o4, %o4                   ! coalese with hword (if any)
                                                   >>  71 6:      addcc   %o4, %o2, %o2                   ! add to sum
                                                   >>  72 1:      retl                                    ! get outta here
                                                   >>  73          addx   %g0, %o2, %o0                   ! add final carry into retval
                                                   >>  74 
                                                   >>  75         /* Also do alignment out of band to get better cache patterns. */
                                                   >>  76 csum_partial_fix_alignment:
                                                   >>  77         cmp     %o1, 6
                                                   >>  78         bl      cpte - 0x4
                                                   >>  79          andcc  %o0, 0x2, %g0
                                                   >>  80         be      1f
                                                   >>  81          andcc  %o0, 0x4, %g0
                                                   >>  82         lduh    [%o0 + 0x00], %g2
                                                   >>  83         sub     %o1, 2, %o1
                                                   >>  84         add     %o0, 2, %o0
                                                   >>  85         sll     %g2, 16, %g2
                                                   >>  86         addcc   %g2, %o2, %o2
                                                   >>  87         srl     %o2, 16, %g3
                                                   >>  88         addx    %g0, %g3, %g2
                                                   >>  89         sll     %o2, 16, %o2
                                                   >>  90         sll     %g2, 16, %g3
                                                   >>  91         srl     %o2, 16, %o2
                                                   >>  92         andcc   %o0, 0x4, %g0
                                                   >>  93         or      %g3, %o2, %o2
                                                   >>  94 1:      be      cpa
                                                   >>  95          andcc  %o1, 0xffffff80, %o3
                                                   >>  96         ld      [%o0 + 0x00], %g2
                                                   >>  97         sub     %o1, 4, %o1
                                                   >>  98         addcc   %g2, %o2, %o2
                                                   >>  99         add     %o0, 4, %o0
                                                   >> 100         addx    %g0, %o2, %o2
                                                   >> 101         b       cpa
                                                   >> 102          andcc  %o1, 0xffffff80, %o3
 31                                                   103 
 32 /* ONES_ADD converts twos-complement math to o !! 104         /* The common case is to get called with a nicely aligned
 33 #define ONES_ADD(sum, val)        \            !! 105          * buffer of size 0x20.  Follow the code path for that case.
 34         add     sum, sum, val   ; \            << 
 35         bgeu    sum, val, 99f   ; \            << 
 36         addi    sum, sum, 1     ; \            << 
 37 99:                             ;              << 
 38                                                << 
 39 .text                                          << 
 40 ENTRY(csum_partial)                            << 
 41                                                << 
 42         /*                                     << 
 43          * Experiments with Ethernet and SLIP  << 
 44          * is aligned on either a 2-byte or 4- << 
 45          */                                       106          */
 46         abi_entry_default                      !! 107         .globl  C_LABEL(csum_partial)
 47         extui   a5, a2, 0, 2                   !! 108 C_LABEL(csum_partial):                  /* %o0=buf, %o1=len, %o2=sum */
 48         bnez    a5, 8f          /* branch if 2 !! 109         andcc   %o0, 0x7, %g0                           ! alignment problems?
 49         /* Fall-through on common case, 4-byte !! 110         bne     csum_partial_fix_alignment              ! yep, handle it
 50 1:                                             !! 111          sethi  %hi(cpte - 8), %g7                      ! prepare table jmp ptr
 51         srli    a5, a3, 5       /* 32-byte chu !! 112         andcc   %o1, 0xffffff80, %o3                    ! num loop iterations
 52 #if XCHAL_HAVE_LOOPS                           !! 113 cpa:    be      3f                                      ! none to do
 53         loopgtz a5, 2f                         !! 114          andcc  %o1, 0x70, %g1                          ! clears carry flag too
 54 #else                                          !! 115 5:      CSUM_BIGCHUNK(%o0, 0x00, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
 55         beqz    a5, 2f                         !! 116         CSUM_BIGCHUNK(%o0, 0x20, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
 56         slli    a5, a5, 5                      !! 117         CSUM_BIGCHUNK(%o0, 0x40, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
 57         add     a5, a5, a2      /* a5 = end of !! 118         CSUM_BIGCHUNK(%o0, 0x60, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
 58 .Loop1:                                        !! 119         addx    %g0, %o2, %o2                           ! sink in final carry
 59 #endif                                         !! 120         subcc   %o3, 128, %o3                           ! detract from loop iters
 60         l32i    a6, a2, 0                      !! 121         bne     5b                                      ! more to do
 61         l32i    a7, a2, 4                      !! 122          add    %o0, 128, %o0                           ! advance buf ptr
 62         ONES_ADD(a4, a6)                       !! 123         andcc   %o1, 0x70, %g1                          ! clears carry flag too
 63         ONES_ADD(a4, a7)                       !! 124 3:      be      cpte                                    ! nope
 64         l32i    a6, a2, 8                      !! 125          andcc  %o1, 0xf, %g0                           ! anything left at all?
 65         l32i    a7, a2, 12                     !! 126         srl     %g1, 1, %o4                             ! compute offset
 66         ONES_ADD(a4, a6)                       !! 127         sub     %g7, %g1, %g7                           ! adjust jmp ptr
 67         ONES_ADD(a4, a7)                       !! 128         sub     %g7, %o4, %g7                           ! final jmp ptr adjust
 68         l32i    a6, a2, 16                     !! 129         jmp     %g7 + %lo(cpte - 8)                     ! enter the table
 69         l32i    a7, a2, 20                     !! 130          add    %o0, %g1, %o0                           ! advance buf ptr
 70         ONES_ADD(a4, a6)                       !! 131 cptbl:  CSUM_LASTCHUNK(%o0, 0x68, %o2, %g2, %g3, %g4, %g5)
 71         ONES_ADD(a4, a7)                       !! 132         CSUM_LASTCHUNK(%o0, 0x58, %o2, %g2, %g3, %g4, %g5)
 72         l32i    a6, a2, 24                     !! 133         CSUM_LASTCHUNK(%o0, 0x48, %o2, %g2, %g3, %g4, %g5)
 73         l32i    a7, a2, 28                     !! 134         CSUM_LASTCHUNK(%o0, 0x38, %o2, %g2, %g3, %g4, %g5)
 74         ONES_ADD(a4, a6)                       !! 135         CSUM_LASTCHUNK(%o0, 0x28, %o2, %g2, %g3, %g4, %g5)
 75         ONES_ADD(a4, a7)                       !! 136         CSUM_LASTCHUNK(%o0, 0x18, %o2, %g2, %g3, %g4, %g5)
 76         addi    a2, a2, 4*8                    !! 137         CSUM_LASTCHUNK(%o0, 0x08, %o2, %g2, %g3, %g4, %g5)
 77 #if !XCHAL_HAVE_LOOPS                          !! 138         addx    %g0, %o2, %o2                           ! fetch final carry
 78         blt     a2, a5, .Loop1                 !! 139         andcc   %o1, 0xf, %g0                           ! anything left at all?
 79 #endif                                         !! 140 cpte:   bne     csum_partial_end_cruft                  ! yep, handle it
 80 2:                                             !! 141          andcc  %o1, 8, %g0                             ! check how much
 81         extui   a5, a3, 2, 3    /* remaining 4 !! 142 cpout:  retl                                            ! get outta here
 82 #if XCHAL_HAVE_LOOPS                           !! 143          mov    %o2, %o0                                ! return computed csum
 83         loopgtz a5, 3f                         !! 144 
 84 #else                                          !! 145         .globl C_LABEL(__csum_partial_copy_start), C_LABEL(__csum_partial_copy_end)
 85         beqz    a5, 3f                         !! 146 C_LABEL(__csum_partial_copy_start):
 86         slli    a5, a5, 2                      !! 147 
 87         add     a5, a5, a2      /* a5 = end of !! 148 /* Work around cpp -rob */
 88 .Loop2:                                        !! 149 #define ALLOC #alloc
 89 #endif                                         !! 150 #define EXECINSTR #execinstr
 90         l32i    a6, a2, 0                      !! 151 #define EX(x,y,a,b)                             \
 91         ONES_ADD(a4, a6)                       !! 152 98:     x,y;                                    \
 92         addi    a2, a2, 4                      !! 153         .section .fixup,ALLOC,EXECINSTR;        \
 93 #if !XCHAL_HAVE_LOOPS                          !! 154         .align  4;                              \
 94         blt     a2, a5, .Loop2                 !! 155 99:     ba 30f;                                 \
 95 #endif                                         !! 156          a, b, %o3;                             \
 96 3:                                             !! 157         .section __ex_table,ALLOC;              \
 97         _bbci.l a3, 1, 5f       /* remaining 2 !! 158         .align  4;                              \
 98         l16ui   a6, a2, 0                      !! 159         .word   98b, 99b;                       \
 99         ONES_ADD(a4, a6)                       !! 160         .text;                                  \
100         addi    a2, a2, 2                      !! 161         .align  4
101 5:                                             !! 162 
102         _bbci.l a3, 0, 7f       /* remaining 1 !! 163 #define EX2(x,y)                                \
103 6:      l8ui    a6, a2, 0                      !! 164 98:     x,y;                                    \
104 #ifdef __XTENSA_EB__                           !! 165         .section __ex_table,ALLOC;              \
105         slli    a6, a6, 8       /* load byte i !! 166         .align  4;                              \
106 #endif                                         !! 167         .word   98b, 30f;                       \
107         ONES_ADD(a4, a6)                       !! 168         .text;                                  \
108 7:                                             !! 169         .align  4
109         mov     a2, a4                         !! 170 
110         abi_ret_default                        !! 171 #define EX3(x,y)                                \
111                                                !! 172 98:     x,y;                                    \
112         /* uncommon case, buf is 2-byte aligne !! 173         .section __ex_table,ALLOC;              \
113 8:                                             !! 174         .align  4;                              \
114         beqz    a3, 7b          /* branch if l !! 175         .word   98b, 96f;                       \
115         beqi    a3, 1, 6b       /* branch if l !! 176         .text;                                  \
116                                                !! 177         .align  4
117         extui   a5, a2, 0, 1                   !! 178 
118         bnez    a5, 8f          /* branch if 1 !! 179 #define EXT(start,end,handler)                  \
119                                                !! 180         .section __ex_table,ALLOC;              \
120         l16ui   a6, a2, 0       /* common case !! 181         .align  4;                              \
121         ONES_ADD(a4, a6)                       !! 182         .word   start, 0, end, handler;         \
122         addi    a2, a2, 2       /* adjust buf  !! 183         .text;                                  \
123         addi    a3, a3, -2      /* adjust len  !! 184         .align  4
124         j       1b              /* now buf is  !! 185 
125                                                !! 186         /* This aligned version executes typically in 8.5 superscalar cycles, this
126         /* case: odd-byte aligned, len > 1     !! 187          * is the best I can do.  I say 8.5 because the final add will pair with
127          * This case is dog slow, so don't giv !! 188          * the next ldd in the main unrolled loop.  Thus the pipe is always full.
128          * (I don't think this ever happens, b !! 189          * If you change these macros (including order of instructions),
                                                   >> 190          * please check the fixup code below as well.
129          */                                       191          */
130 8:                                             !! 192 #define CSUMCOPY_BIGCHUNK_ALIGNED(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7)   \
131         srli    a5, a3, 2       /* 4-byte chun !! 193         ldd     [src + off + 0x00], t0;                                                 \
132 #if XCHAL_HAVE_LOOPS                           !! 194         ldd     [src + off + 0x08], t2;                                                 \
133         loopgtz a5, 2f                         !! 195         addxcc  t0, sum, sum;                                                           \
134 #else                                          !! 196         ldd     [src + off + 0x10], t4;                                                 \
135         beqz    a5, 2f                         !! 197         addxcc  t1, sum, sum;                                                           \
136         slli    a5, a5, 2                      !! 198         ldd     [src + off + 0x18], t6;                                                 \
137         add     a5, a5, a2      /* a5 = end of !! 199         addxcc  t2, sum, sum;                                                           \
138 .Loop3:                                        !! 200         std     t0, [dst + off + 0x00];                                                 \
139 #endif                                         !! 201         addxcc  t3, sum, sum;                                                           \
140         l8ui    a6, a2, 0       /* bits 24..31 !! 202         std     t2, [dst + off + 0x08];                                                 \
141         l16ui   a7, a2, 1       /* bits  8..23 !! 203         addxcc  t4, sum, sum;                                                           \
142         l8ui    a8, a2, 3       /* bits  0.. 8 !! 204         std     t4, [dst + off + 0x10];                                                 \
143 #ifdef  __XTENSA_EB__                          !! 205         addxcc  t5, sum, sum;                                                           \
144         slli    a6, a6, 24                     !! 206         std     t6, [dst + off + 0x18];                                                 \
145 #else                                          !! 207         addxcc  t6, sum, sum;                                                           \
146         slli    a8, a8, 24                     !! 208         addxcc  t7, sum, sum;
147 #endif                                         !! 209 
148         slli    a7, a7, 8                      !! 210         /* 12 superscalar cycles seems to be the limit for this case,
149         or      a7, a7, a6                     !! 211          * because of this we thus do all the ldd's together to get
150         or      a7, a7, a8                     !! 212          * Viking MXCC into streaming mode.  Ho hum...
151         ONES_ADD(a4, a7)                       !! 213          */
152         addi    a2, a2, 4                      !! 214 #define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7)   \
153 #if !XCHAL_HAVE_LOOPS                          !! 215         ldd     [src + off + 0x00], t0;                                         \
154         blt     a2, a5, .Loop3                 !! 216         ldd     [src + off + 0x08], t2;                                         \
155 #endif                                         !! 217         ldd     [src + off + 0x10], t4;                                         \
156 2:                                             !! 218         ldd     [src + off + 0x18], t6;                                         \
157         _bbci.l a3, 1, 3f       /* remaining 2 !! 219         st      t0, [dst + off + 0x00];                                         \
158         l8ui    a6, a2, 0                      !! 220         addxcc  t0, sum, sum;                                                   \
159         l8ui    a7, a2, 1                      !! 221         st      t1, [dst + off + 0x04];                                         \
160 #ifdef  __XTENSA_EB__                          !! 222         addxcc  t1, sum, sum;                                                   \
161         slli    a6, a6, 8                      !! 223         st      t2, [dst + off + 0x08];                                         \
162 #else                                          !! 224         addxcc  t2, sum, sum;                                                   \
163         slli    a7, a7, 8                      !! 225         st      t3, [dst + off + 0x0c];                                         \
164 #endif                                         !! 226         addxcc  t3, sum, sum;                                                   \
165         or      a7, a7, a6                     !! 227         st      t4, [dst + off + 0x10];                                         \
166         ONES_ADD(a4, a7)                       !! 228         addxcc  t4, sum, sum;                                                   \
167         addi    a2, a2, 2                      !! 229         st      t5, [dst + off + 0x14];                                         \
168 3:                                             !! 230         addxcc  t5, sum, sum;                                                   \
169         j       5b              /* branch to h !! 231         st      t6, [dst + off + 0x18];                                         \
170                                                !! 232         addxcc  t6, sum, sum;                                                   \
171 ENDPROC(csum_partial)                          !! 233         st      t7, [dst + off + 0x1c];                                         \
172 EXPORT_SYMBOL(csum_partial)                    !! 234         addxcc  t7, sum, sum;
173                                                !! 235 
174 /*                                             !! 236         /* Yuck, 6 superscalar cycles... */
175  * Copy from ds while checksumming, otherwise  !! 237 #define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3)  \
176  */                                            !! 238         ldd     [src - off - 0x08], t0;                         \
177                                                !! 239         ldd     [src - off - 0x00], t2;                         \
178 /*                                             !! 240         addxcc  t0, sum, sum;                                   \
179 unsigned int csum_partial_copy_generic (const  !! 241         st      t0, [dst - off - 0x08];                         \
180         a2  = src                              !! 242         addxcc  t1, sum, sum;                                   \
181         a3  = dst                              !! 243         st      t1, [dst - off - 0x04];                         \
182         a4  = len                              !! 244         addxcc  t2, sum, sum;                                   \
183         a5  = sum                              !! 245         st      t2, [dst - off - 0x00];                         \
184         a8  = temp                             !! 246         addxcc  t3, sum, sum;                                   \
185         a9  = temp                             !! 247         st      t3, [dst - off + 0x04];
186         a10 = temp                             !! 248 
187                                                !! 249         /* Handle the end cruft code out of band for better cache patterns. */
188     This function is optimized for 4-byte alig !! 250 cc_end_cruft:
189     alignments work, but not nearly as efficie !! 251         be      1f
190  */                                            !! 252          andcc  %o3, 4, %g0
191                                                !! 253         EX(ldd  [%o0 + 0x00], %g2, and %o3, 0xf)
192 ENTRY(csum_partial_copy_generic)               !! 254         add     %o1, 8, %o1
193                                                !! 255         addcc   %g2, %g7, %g7
194         abi_entry_default                      !! 256         add     %o0, 8, %o0
195         movi    a5, -1                         !! 257         addxcc  %g3, %g7, %g7
196         or      a10, a2, a3                    !! 258         EX2(st  %g2, [%o1 - 0x08])
197                                                !! 259         addx    %g0, %g7, %g7
198         /* We optimize the following alignment !! 260         andcc   %o3, 4, %g0
199         aligned case.  Two bbsi.l instructions !! 261         EX2(st  %g3, [%o1 - 0x04])
200         (commented out below).  However, both  !! 262 1:      be      1f
201         of the imm8 range, so the assembler re !! 263          andcc  %o3, 3, %o3
202         equivalent bbci.l, j combinations, whi !! 264         EX(ld   [%o0 + 0x00], %g2, add %o3, 4)
203         slower. */                             !! 265         add     %o1, 4, %o1
204                                                !! 266         addcc   %g2, %g7, %g7
205         extui   a9, a10, 0, 2                  !! 267         EX2(st  %g2, [%o1 - 0x04])
206         beqz    a9, 1f          /* branch if b !! 268         addx    %g0, %g7, %g7
207         bbsi.l  a10, 0, 5f      /* branch if o !! 269         andcc   %o3, 3, %g0
208         j       3f              /* one address !! 270         add     %o0, 4, %o0
209                                                !! 271 1:      be      1f
210 /*      _bbsi.l a10, 0, 5f */   /* branch if o !! 272          addcc  %o3, -1, %g0
211 /*      _bbsi.l a10, 1, 3f */   /* branch if 2 !! 273         bne     2f
212                                                !! 274          subcc  %o3, 2, %o3
                                                   >> 275         b       4f
                                                   >> 276          or     %g0, %g0, %o4
                                                   >> 277 2:      EX(lduh [%o0 + 0x00], %o4, add %o3, 2)
                                                   >> 278         add     %o0, 2, %o0
                                                   >> 279         EX2(sth %o4, [%o1 + 0x00])
                                                   >> 280         be      6f
                                                   >> 281          add    %o1, 2, %o1
                                                   >> 282         sll     %o4, 16, %o4
                                                   >> 283 4:      EX(ldub [%o0 + 0x00], %o5, add %g0, 1)
                                                   >> 284         EX2(stb %o5, [%o1 + 0x00])
                                                   >> 285         sll     %o5, 8, %o5
                                                   >> 286         or      %o5, %o4, %o4
                                                   >> 287 6:      addcc   %o4, %g7, %g7
                                                   >> 288 1:      retl
                                                   >> 289          addx   %g0, %g7, %o0
                                                   >> 290 
                                                   >> 291         /* Also, handle the alignment code out of band. */
                                                   >> 292 cc_dword_align:
                                                   >> 293         cmp     %g1, 6
                                                   >> 294         bl,a    ccte
                                                   >> 295          andcc  %g1, 0xf, %o3
                                                   >> 296         andcc   %o0, 0x1, %g0
                                                   >> 297         bne     ccslow
                                                   >> 298          andcc  %o0, 0x2, %g0
                                                   >> 299         be      1f
                                                   >> 300          andcc  %o0, 0x4, %g0
                                                   >> 301         EX(lduh [%o0 + 0x00], %g4, add %g1, 0)
                                                   >> 302         sub     %g1, 2, %g1
                                                   >> 303         EX2(sth %g4, [%o1 + 0x00])
                                                   >> 304         add     %o0, 2, %o0
                                                   >> 305         sll     %g4, 16, %g4
                                                   >> 306         addcc   %g4, %g7, %g7
                                                   >> 307         add     %o1, 2, %o1
                                                   >> 308         srl     %g7, 16, %g3
                                                   >> 309         addx    %g0, %g3, %g4
                                                   >> 310         sll     %g7, 16, %g7
                                                   >> 311         sll     %g4, 16, %g3
                                                   >> 312         srl     %g7, 16, %g7
                                                   >> 313         andcc   %o0, 0x4, %g0
                                                   >> 314         or      %g3, %g7, %g7
                                                   >> 315 1:      be      3f
                                                   >> 316          andcc  %g1, 0xffffff80, %g0
                                                   >> 317         EX(ld   [%o0 + 0x00], %g4, add %g1, 0)
                                                   >> 318         sub     %g1, 4, %g1
                                                   >> 319         EX2(st  %g4, [%o1 + 0x00])
                                                   >> 320         add     %o0, 4, %o0
                                                   >> 321         addcc   %g4, %g7, %g7
                                                   >> 322         add     %o1, 4, %o1
                                                   >> 323         addx    %g0, %g7, %g7
                                                   >> 324         b       3f
                                                   >> 325          andcc  %g1, 0xffffff80, %g0
                                                   >> 326 
                                                   >> 327         /* Sun, you just can't beat me, you just can't.  Stop trying,
                                                   >> 328          * give up.  I'm serious, I am going to kick the living shit
                                                   >> 329          * out of you, game over, lights out.
                                                   >> 330          */
                                                   >> 331         .align  8
                                                   >> 332         .globl  C_LABEL(__csum_partial_copy_sparc_generic)
                                                   >> 333 C_LABEL(__csum_partial_copy_sparc_generic):
                                                   >> 334                                         /* %o0=src, %o1=dest, %g1=len, %g7=sum */
                                                   >> 335         xor     %o0, %o1, %o4           ! get changing bits
                                                   >> 336         andcc   %o4, 3, %g0             ! check for mismatched alignment
                                                   >> 337         bne     ccslow                  ! better this than unaligned/fixups
                                                   >> 338          andcc  %o0, 7, %g0             ! need to align things?
                                                   >> 339         bne     cc_dword_align          ! yes, we check for short lengths there
                                                   >> 340          andcc  %g1, 0xffffff80, %g0    ! can we use unrolled loop?
                                                   >> 341 3:      be      3f                      ! nope, less than one loop remains
                                                   >> 342          andcc  %o1, 4, %g0             ! dest aligned on 4 or 8 byte boundry?
                                                   >> 343         be      ccdbl + 4               ! 8 byte aligned, kick ass
                                                   >> 344 5:      CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
                                                   >> 345         CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
                                                   >> 346         CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
                                                   >> 347         CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
                                                   >> 348 10:     EXT(5b, 10b, 20f)               ! note for exception handling
                                                   >> 349         sub     %g1, 128, %g1           ! detract from length
                                                   >> 350         addx    %g0, %g7, %g7           ! add in last carry bit
                                                   >> 351         andcc   %g1, 0xffffff80, %g0    ! more to csum?
                                                   >> 352         add     %o0, 128, %o0           ! advance src ptr
                                                   >> 353         bne     5b                      ! we did not go negative, continue looping
                                                   >> 354          add    %o1, 128, %o1           ! advance dest ptr
                                                   >> 355 3:      andcc   %g1, 0x70, %o2          ! can use table?
                                                   >> 356 ccmerge:be      ccte                    ! nope, go and check for end cruft
                                                   >> 357          andcc  %g1, 0xf, %o3           ! get low bits of length (clears carry btw)
                                                   >> 358         srl     %o2, 1, %o4             ! begin negative offset computation
                                                   >> 359         sethi   %hi(12f), %o5           ! set up table ptr end
                                                   >> 360         add     %o0, %o2, %o0           ! advance src ptr
                                                   >> 361         sub     %o5, %o4, %o5           ! continue table calculation
                                                   >> 362         sll     %o2, 1, %g2             ! constant multiplies are fun...
                                                   >> 363         sub     %o5, %g2, %o5           ! some more adjustments
                                                   >> 364         jmp     %o5 + %lo(12f)          ! jump into it, duff style, wheee...
                                                   >> 365          add    %o1, %o2, %o1           ! advance dest ptr (carry is clear btw)
                                                   >> 366 cctbl:  CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x68,%g2,%g3,%g4,%g5)
                                                   >> 367         CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x58,%g2,%g3,%g4,%g5)
                                                   >> 368         CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x48,%g2,%g3,%g4,%g5)
                                                   >> 369         CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x38,%g2,%g3,%g4,%g5)
                                                   >> 370         CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g2,%g3,%g4,%g5)
                                                   >> 371         CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g2,%g3,%g4,%g5)
                                                   >> 372         CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5)
                                                   >> 373 12:     EXT(cctbl, 12b, 22f)            ! note for exception table handling
                                                   >> 374         addx    %g0, %g7, %g7
                                                   >> 375         andcc   %o3, 0xf, %g0           ! check for low bits set
                                                   >> 376 ccte:   bne     cc_end_cruft            ! something left, handle it out of band
                                                   >> 377          andcc  %o3, 8, %g0             ! begin checks for that code
                                                   >> 378         retl                            ! return
                                                   >> 379          mov    %g7, %o0                ! give em the computed checksum
                                                   >> 380 ccdbl:  CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
                                                   >> 381         CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
                                                   >> 382         CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
                                                   >> 383         CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
                                                   >> 384 11:     EXT(ccdbl, 11b, 21f)            ! note for exception table handling
                                                   >> 385         sub     %g1, 128, %g1           ! detract from length
                                                   >> 386         addx    %g0, %g7, %g7           ! add in last carry bit
                                                   >> 387         andcc   %g1, 0xffffff80, %g0    ! more to csum?
                                                   >> 388         add     %o0, 128, %o0           ! advance src ptr
                                                   >> 389         bne     ccdbl                   ! we did not go negative, continue looping
                                                   >> 390          add    %o1, 128, %o1           ! advance dest ptr
                                                   >> 391         b       ccmerge                 ! finish it off, above
                                                   >> 392          andcc  %g1, 0x70, %o2          ! can use table? (clears carry btw)
                                                   >> 393 
                                                   >> 394 ccslow: cmp     %g1, 0
                                                   >> 395         mov     0, %g5
                                                   >> 396         bleu    4f
                                                   >> 397          andcc  %o0, 1, %o5             
                                                   >> 398         be,a    1f
                                                   >> 399          srl    %g1, 1, %g4             
                                                   >> 400         sub     %g1, 1, %g1     
                                                   >> 401         EX(ldub [%o0], %g5, add %g1, 1)
                                                   >> 402         add     %o0, 1, %o0     
                                                   >> 403         EX2(stb %g5, [%o1])
                                                   >> 404         srl     %g1, 1, %g4
                                                   >> 405         add     %o1, 1, %o1
                                                   >> 406 1:      cmp     %g4, 0          
                                                   >> 407         be,a    3f
                                                   >> 408          andcc  %g1, 1, %g0
                                                   >> 409         andcc   %o0, 2, %g0     
                                                   >> 410         be,a    1f
                                                   >> 411          srl    %g4, 1, %g4
                                                   >> 412         EX(lduh [%o0], %o4, add %g1, 0)
                                                   >> 413         sub     %g1, 2, %g1     
                                                   >> 414         srl     %o4, 8, %g2
                                                   >> 415         sub     %g4, 1, %g4     
                                                   >> 416         EX2(stb %g2, [%o1])
                                                   >> 417         add     %o4, %g5, %g5
                                                   >> 418         EX2(stb %o4, [%o1 + 1])
                                                   >> 419         add     %o0, 2, %o0     
                                                   >> 420         srl     %g4, 1, %g4
                                                   >> 421         add     %o1, 2, %o1
                                                   >> 422 1:      cmp     %g4, 0          
                                                   >> 423         be,a    2f
                                                   >> 424          andcc  %g1, 2, %g0
                                                   >> 425         EX3(ld  [%o0], %o4)
                                                   >> 426 5:      srl     %o4, 24, %g2
                                                   >> 427         srl     %o4, 16, %g3
                                                   >> 428         EX2(stb %g2, [%o1])
                                                   >> 429         srl     %o4, 8, %g2
                                                   >> 430         EX2(stb %g3, [%o1 + 1])
                                                   >> 431         add     %o0, 4, %o0
                                                   >> 432         EX2(stb %g2, [%o1 + 2])
                                                   >> 433         addcc   %o4, %g5, %g5
                                                   >> 434         EX2(stb %o4, [%o1 + 3])
                                                   >> 435         addx    %g5, %g0, %g5   ! I am now to lazy to optimize this (question it
                                                   >> 436         add     %o1, 4, %o1     ! is worthy). Maybe some day - with the sll/srl
                                                   >> 437         subcc   %g4, 1, %g4     ! tricks
                                                   >> 438         bne,a   5b
                                                   >> 439          EX3(ld [%o0], %o4)
                                                   >> 440         sll     %g5, 16, %g2
                                                   >> 441         srl     %g5, 16, %g5
                                                   >> 442         srl     %g2, 16, %g2
                                                   >> 443         andcc   %g1, 2, %g0
                                                   >> 444         add     %g2, %g5, %g5 
                                                   >> 445 2:      be,a    3f              
                                                   >> 446          andcc  %g1, 1, %g0
                                                   >> 447         EX(lduh [%o0], %o4, and %g1, 3)
                                                   >> 448         andcc   %g1, 1, %g0
                                                   >> 449         srl     %o4, 8, %g2
                                                   >> 450         add     %o0, 2, %o0     
                                                   >> 451         EX2(stb %g2, [%o1])
                                                   >> 452         add     %g5, %o4, %g5
                                                   >> 453         EX2(stb %o4, [%o1 + 1])
                                                   >> 454         add     %o1, 2, %o1
                                                   >> 455 3:      be,a    1f              
                                                   >> 456          sll    %g5, 16, %o4
                                                   >> 457         EX(ldub [%o0], %g2, add %g0, 1)
                                                   >> 458         sll     %g2, 8, %o4     
                                                   >> 459         EX2(stb %g2, [%o1])
                                                   >> 460         add     %g5, %o4, %g5
                                                   >> 461         sll     %g5, 16, %o4
                                                   >> 462 1:      addcc   %o4, %g5, %g5
                                                   >> 463         srl     %g5, 16, %o4
                                                   >> 464         addx    %g0, %o4, %g5
                                                   >> 465         orcc    %o5, %g0, %g0
                                                   >> 466         be      4f
                                                   >> 467          srl    %g5, 8, %o4
                                                   >> 468         and     %g5, 0xff, %g2
                                                   >> 469         and     %o4, 0xff, %o4
                                                   >> 470         sll     %g2, 8, %g2
                                                   >> 471         or      %g2, %o4, %g5
                                                   >> 472 4:      addcc   %g7, %g5, %g7
                                                   >> 473         retl    
                                                   >> 474          addx   %g0, %g7, %o0
                                                   >> 475 C_LABEL(__csum_partial_copy_end):
                                                   >> 476 
                                                   >> 477 /* We do these strange calculations for the csum_*_from_user case only, ie.
                                                   >> 478  * we only bother with faults on loads... */
                                                   >> 479 
                                                   >> 480 /* o2 = ((g2%20)&3)*8
                                                   >> 481  * o3 = g1 - (g2/20)*32 - o2 */
                                                   >> 482 20:
                                                   >> 483         cmp     %g2, 20
                                                   >> 484         blu,a   1f
                                                   >> 485          and    %g2, 3, %o2
                                                   >> 486         sub     %g1, 32, %g1
                                                   >> 487         b       20b
                                                   >> 488          sub    %g2, 20, %g2
213 1:                                                489 1:
214         /* src and dst are both 4-byte aligned !! 490         sll     %o2, 3, %o2
215         srli    a10, a4, 5      /* 32-byte chu !! 491         b       31f
216 #if XCHAL_HAVE_LOOPS                           !! 492          sub    %g1, %o2, %o3
217         loopgtz a10, 2f                        !! 493 
218 #else                                          !! 494 /* o2 = (!(g2 & 15) ? 0 : (((g2 & 15) + 1) & ~1)*8)
219         beqz    a10, 2f                        !! 495  * o3 = g1 - (g2/16)*32 - o2 */
220         slli    a10, a10, 5                    !! 496 21:
221         add     a10, a10, a2    /* a10 = end o !! 497         andcc   %g2, 15, %o3
222 .Loop5:                                        !! 498         srl     %g2, 4, %g2
223 #endif                                         !! 499         be,a    1f
224 EX(10f) l32i    a9, a2, 0                      !! 500          clr    %o2
225 EX(10f) l32i    a8, a2, 4                      !! 501         add     %o3, 1, %o3
226 EX(10f) s32i    a9, a3, 0                      !! 502         and     %o3, 14, %o3
227 EX(10f) s32i    a8, a3, 4                      !! 503         sll     %o3, 3, %o2
228         ONES_ADD(a5, a9)                       !! 504 1:
229         ONES_ADD(a5, a8)                       !! 505         sll     %g2, 5, %g2
230 EX(10f) l32i    a9, a2, 8                      !! 506         sub     %g1, %g2, %o3
231 EX(10f) l32i    a8, a2, 12                     !! 507         b       31f
232 EX(10f) s32i    a9, a3, 8                      !! 508          sub    %o3, %o2, %o3
233 EX(10f) s32i    a8, a3, 12                     !! 509 
234         ONES_ADD(a5, a9)                       !! 510 /* o0 += (g2/10)*16 - 0x70
235         ONES_ADD(a5, a8)                       !! 511  * 01 += (g2/10)*16 - 0x70
236 EX(10f) l32i    a9, a2, 16                     !! 512  * o2 = (g2 % 10) ? 8 : 0
237 EX(10f) l32i    a8, a2, 20                     !! 513  * o3 += 0x70 - (g2/10)*16 - o2 */
238 EX(10f) s32i    a9, a3, 16                     !! 514 22:
239 EX(10f) s32i    a8, a3, 20                     !! 515         cmp     %g2, 10
240         ONES_ADD(a5, a9)                       !! 516         blu,a   1f
241         ONES_ADD(a5, a8)                       !! 517          sub    %o0, 0x70, %o0
242 EX(10f) l32i    a9, a2, 24                     !! 518         add     %o0, 16, %o0
243 EX(10f) l32i    a8, a2, 28                     !! 519         add     %o1, 16, %o1
244 EX(10f) s32i    a9, a3, 24                     !! 520         sub     %o3, 16, %o3
245 EX(10f) s32i    a8, a3, 28                     !! 521         b       22b
246         ONES_ADD(a5, a9)                       !! 522          sub    %g2, 10, %g2
247         ONES_ADD(a5, a8)                       !! 523 1:
248         addi    a2, a2, 32                     !! 524         sub     %o1, 0x70, %o1
249         addi    a3, a3, 32                     !! 525         add     %o3, 0x70, %o3
250 #if !XCHAL_HAVE_LOOPS                          !! 526         clr     %o2
251         blt     a2, a10, .Loop5                !! 527         tst     %g2
252 #endif                                         !! 528         bne,a   1f
253 2:                                             !! 529          mov    8, %o2
254         extui   a10, a4, 2, 3   /* remaining 4 !! 530 1:
255         extui   a4, a4, 0, 2    /* reset len f !! 531         b       31f
256 #if XCHAL_HAVE_LOOPS                           !! 532          sub    %o3, %o2, %o3
257         loopgtz a10, 3f                        !! 533 96:
258 #else                                          !! 534         and     %g1, 3, %g1
259         beqz    a10, 3f                        !! 535         sll     %g4, 2, %g4
260         slli    a10, a10, 2                    !! 536         add     %g1, %g4, %o3
261         add     a10, a10, a2    /* a10 = end o !! 537 30:
262 .Loop6:                                        !! 538 /* %o1 is dst
263 #endif                                         !! 539  * %o3 is # bytes to zero out
264 EX(10f) l32i    a9, a2, 0                      !! 540  * %o4 is faulting address
265 EX(10f) s32i    a9, a3, 0                      !! 541  * %o5 is %pc where fault occurred */
266         ONES_ADD(a5, a9)                       !! 542         clr     %o2
267         addi    a2, a2, 4                      !! 543 31:
268         addi    a3, a3, 4                      !! 544 /* %o0 is src
269 #if !XCHAL_HAVE_LOOPS                          !! 545  * %o1 is dst
270         blt     a2, a10, .Loop6                !! 546  * %o2 is # of bytes to copy from src to dst
271 #endif                                         !! 547  * %o3 is # bytes to zero out
272 3:                                             !! 548  * %o4 is faulting address
273         /*                                     !! 549  * %o5 is %pc where fault occurred */
274         Control comes to here in two cases: (1 !! 550         save    %sp, -104, %sp
275         to here from the 4-byte alignment case !! 551         mov     %i5, %o0
276         one 2-byte chunk.  (2) It branches to  !! 552         mov     %i7, %o1
277         either src or dst is 2-byte aligned, a !! 553         mov     %i4, %o2
278         here, except for perhaps a trailing od !! 554         call    C_LABEL(lookup_fault)
279         inefficient, so align your addresses t !! 555          mov    %g7, %i4
280                                                !! 556         cmp     %o0, 2
281         a2 = src                               !! 557         bne     1f      
282         a3 = dst                               !! 558          add    %g0, -EFAULT, %i5
283         a4 = len                               !! 559         tst     %i2
284         a5 = sum                               !! 560         be      2f
285         */                                     !! 561          mov    %i0, %o1
286         srli    a10, a4, 1      /* 2-byte chun !! 562         mov     %i1, %o0
287 #if XCHAL_HAVE_LOOPS                           << 
288         loopgtz a10, 4f                        << 
289 #else                                          << 
290         beqz    a10, 4f                        << 
291         slli    a10, a10, 1                    << 
292         add     a10, a10, a2    /* a10 = end o << 
293 .Loop7:                                        << 
294 #endif                                         << 
295 EX(10f) l16ui   a9, a2, 0                      << 
296 EX(10f) s16i    a9, a3, 0                      << 
297         ONES_ADD(a5, a9)                       << 
298         addi    a2, a2, 2                      << 
299         addi    a3, a3, 2                      << 
300 #if !XCHAL_HAVE_LOOPS                          << 
301         blt     a2, a10, .Loop7                << 
302 #endif                                         << 
303 4:                                             << 
304         /* This section processes a possible t << 
305         _bbci.l a4, 0, 8f       /* 1-byte chun << 
306 EX(10f) l8ui    a9, a2, 0                      << 
307 EX(10f) s8i     a9, a3, 0                      << 
308 #ifdef __XTENSA_EB__                           << 
309         slli    a9, a9, 8       /* shift byte  << 
310 #endif                                         << 
311         ONES_ADD(a5, a9)                       << 
312 8:                                             << 
313         mov     a2, a5                         << 
314         abi_ret_default                        << 
315                                                << 
316 5:                                                563 5:
317         /* Control branch to here when either  !! 564         call    C_LABEL(__memcpy)
318         process all bytes using 8-bit accesses !! 565          mov    %i2, %o2
319         so don't feed us an odd address. */    !! 566         tst     %o0
320                                                !! 567         bne,a   2f
321         srli    a10, a4, 1      /* handle in p !! 568          add    %i3, %i2, %i3
322 #if XCHAL_HAVE_LOOPS                           !! 569         add     %i1, %i2, %i1
323         loopgtz a10, 6f                        !! 570 2:
324 #else                                          !! 571         mov     %i1, %o0
325         beqz    a10, 6f                        << 
326         slli    a10, a10, 1                    << 
327         add     a10, a10, a2    /* a10 = end o << 
328 .Loop8:                                        << 
329 #endif                                         << 
330 EX(10f) l8ui    a9, a2, 0                      << 
331 EX(10f) l8ui    a8, a2, 1                      << 
332 EX(10f) s8i     a9, a3, 0                      << 
333 EX(10f) s8i     a8, a3, 1                      << 
334 #ifdef __XTENSA_EB__                           << 
335         slli    a9, a9, 8       /* combine int << 
336 #else                           /* for checksu << 
337         slli    a8, a8, 8                      << 
338 #endif                                         << 
339         or      a9, a9, a8                     << 
340         ONES_ADD(a5, a9)                       << 
341         addi    a2, a2, 2                      << 
342         addi    a3, a3, 2                      << 
343 #if !XCHAL_HAVE_LOOPS                          << 
344         blt     a2, a10, .Loop8                << 
345 #endif                                         << 
346 6:                                                572 6:
347         j       4b              /* process the !! 573         call    C_LABEL(__bzero)
348                                                !! 574          mov    %i3, %o1
349 ENDPROC(csum_partial_copy_generic)             !! 575 1:
350 EXPORT_SYMBOL(csum_partial_copy_generic)       !! 576         ld      [%sp + 168], %o2                ! struct_ptr of parent
351                                                !! 577         st      %i5, [%o2]
352                                                !! 578         ret
353 # Exception handler:                           !! 579          restore
354 .section .fixup, "ax"                          !! 580 
355 10:                                            !! 581         .section __ex_table,#alloc
356         movi    a2, 0                          !! 582         .align 4
357         abi_ret_default                        !! 583         .word 5b,2
358                                                !! 584         .word 6b,2
359 .previous                                      <<
~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.
TOMOYO Linux Cross Reference Linux/arch/xtensa/lib/checksum.S

Diff markup

Differences between /arch/xtensa/lib/checksum.S (Version linux-6.12-rc7) and /arch/sparc/lib/checksum.S (Version linux-2.4.37.11)

TOMOYO Linux Cross Reference
Linux/arch/xtensa/lib/checksum.S