~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/xtensa/lib/checksum.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/xtensa/lib/checksum.S (Version linux-6.12-rc7) and /arch/i386/lib/checksum.S (Version linux-2.4.37.11)


  1 /* SPDX-License-Identifier: GPL-2.0-or-later * << 
  2 /*                                                  1 /*
  3  * INET         An implementation of the TCP/I      2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
  4  *              operating system.  INET is imp      3  *              operating system.  INET is implemented using the  BSD Socket
  5  *              interface as the means of comm      4  *              interface as the means of communication with the user level.
  6  *                                                  5  *
  7  *              IP/TCP/UDP checksumming routin      6  *              IP/TCP/UDP checksumming routines
  8  *                                                  7  *
  9  * Xtensa version:  Copyright (C) 2001 Tensili !!   8  * Authors:     Jorge Cwik, <jorge@laser.satlink.net>
 10  *                  Optimized by Joe Taylor    !!   9  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
                                                   >>  10  *              Tom May, <ftom@netcom.com>
                                                   >>  11  *              Pentium Pro/II routines:
                                                   >>  12  *              Alexander Kjeldaas <astor@guardian.no>
                                                   >>  13  *              Finn Arne Gangstad <finnag@guardian.no>
                                                   >>  14  *              Lots of code moved from tcp.c and ip.c; see those files
                                                   >>  15  *              for more names.
                                                   >>  16  *
                                                   >>  17  * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
                                                   >>  18  *                           handling.
                                                   >>  19  *              Andi Kleen,  add zeroing on error
                                                   >>  20  *                   converted to pure assembler
                                                   >>  21  *
                                                   >>  22  *              This program is free software; you can redistribute it and/or
                                                   >>  23  *              modify it under the terms of the GNU General Public License
                                                   >>  24  *              as published by the Free Software Foundation; either version
                                                   >>  25  *              2 of the License, or (at your option) any later version.
 11  */                                                26  */
 12                                                    27 
 13 #include <linux/errno.h>                       !!  28 #include <linux/config.h>
 14 #include <linux/linkage.h>                     !!  29 #include <asm/errno.h>
 15 #include <asm/asmmacro.h>                      !!  30                                 
 16 #include <asm/core.h>                          << 
 17                                                << 
 18 /*                                                 31 /*
 19  * computes a partial checksum, e.g. for TCP/U     32  * computes a partial checksum, e.g. for TCP/UDP fragments
 20  */                                                33  */
 21                                                    34 
 22 /*                                             !!  35 /*      
 23  * unsigned int csum_partial(const unsigned ch !!  36 unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
 24  *                           unsigned int sum) << 
 25  *    a2 = buf                                 << 
 26  *    a3 = len                                 << 
 27  *    a4 = sum                                 << 
 28  *                                             << 
 29  * This function assumes 2- or 4-byte alignmen << 
 30  */                                                37  */
 31                                                !!  38                 
 32 /* ONES_ADD converts twos-complement math to o << 
 33 #define ONES_ADD(sum, val)        \            << 
 34         add     sum, sum, val   ; \            << 
 35         bgeu    sum, val, 99f   ; \            << 
 36         addi    sum, sum, 1     ; \            << 
 37 99:                             ;              << 
 38                                                << 
 39 .text                                              39 .text
 40 ENTRY(csum_partial)                            !!  40 .align 4
 41                                                !!  41 .globl csum_partial                                                             
 42         /*                                     !!  42                 
 43          * Experiments with Ethernet and SLIP  !!  43 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
 44          * is aligned on either a 2-byte or 4- !!  44 
 45          */                                    !!  45           /*            
 46         abi_entry_default                      !!  46            * Experiments with Ethernet and SLIP connections show that buff
 47         extui   a5, a2, 0, 2                   !!  47            * is aligned on either a 2-byte or 4-byte boundary.  We get at
 48         bnez    a5, 8f          /* branch if 2 !!  48            * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
 49         /* Fall-through on common case, 4-byte !!  49            * Fortunately, it is easy to convert 2-byte alignment to 4-byte
 50 1:                                             !!  50            * alignment for the unrolled loop.
 51         srli    a5, a3, 5       /* 32-byte chu !!  51            */           
 52 #if XCHAL_HAVE_LOOPS                           !!  52 csum_partial:   
 53         loopgtz a5, 2f                         !!  53         pushl %esi
 54 #else                                          !!  54         pushl %ebx
 55         beqz    a5, 2f                         !!  55         movl 20(%esp),%eax      # Function arg: unsigned int sum
 56         slli    a5, a5, 5                      !!  56         movl 16(%esp),%ecx      # Function arg: int len
 57         add     a5, a5, a2      /* a5 = end of !!  57         movl 12(%esp),%esi      # Function arg: unsigned char *buff
 58 .Loop1:                                        !!  58         testl $3, %esi          # Check alignment.
 59 #endif                                         !!  59         jz 2f                   # Jump if alignment is ok.
 60         l32i    a6, a2, 0                      !!  60         testl $1, %esi          # Check alignment.
 61         l32i    a7, a2, 4                      !!  61         jz 10f                  # Jump if alignment is boundary of 2bytes.
 62         ONES_ADD(a4, a6)                       !!  62 
 63         ONES_ADD(a4, a7)                       !!  63         # buf is odd
 64         l32i    a6, a2, 8                      !!  64         dec %ecx
 65         l32i    a7, a2, 12                     !!  65         jl 8f
 66         ONES_ADD(a4, a6)                       !!  66         movzbl (%esi), %ebx
 67         ONES_ADD(a4, a7)                       !!  67         adcl %ebx, %eax
 68         l32i    a6, a2, 16                     !!  68         roll $8, %eax
 69         l32i    a7, a2, 20                     !!  69         inc %esi
 70         ONES_ADD(a4, a6)                       !!  70         testl $2, %esi
 71         ONES_ADD(a4, a7)                       !!  71         jz 2f
 72         l32i    a6, a2, 24                     !!  72 10:
 73         l32i    a7, a2, 28                     !!  73         subl $2, %ecx           # Alignment uses up two bytes.
 74         ONES_ADD(a4, a6)                       !!  74         jae 1f                  # Jump if we had at least two bytes.
 75         ONES_ADD(a4, a7)                       !!  75         addl $2, %ecx           # ecx was < 2.  Deal with it.
 76         addi    a2, a2, 4*8                    !!  76         jmp 4f
 77 #if !XCHAL_HAVE_LOOPS                          !!  77 1:      movw (%esi), %bx
 78         blt     a2, a5, .Loop1                 !!  78         addl $2, %esi
 79 #endif                                         !!  79         addw %bx, %ax
                                                   >>  80         adcl $0, %eax
 80 2:                                                 81 2:
 81         extui   a5, a3, 2, 3    /* remaining 4 !!  82         movl %ecx, %edx
 82 #if XCHAL_HAVE_LOOPS                           !!  83         shrl $5, %ecx
 83         loopgtz a5, 3f                         !!  84         jz 2f
                                                   >>  85         testl %esi, %esi
                                                   >>  86 1:      movl (%esi), %ebx
                                                   >>  87         adcl %ebx, %eax
                                                   >>  88         movl 4(%esi), %ebx
                                                   >>  89         adcl %ebx, %eax
                                                   >>  90         movl 8(%esi), %ebx
                                                   >>  91         adcl %ebx, %eax
                                                   >>  92         movl 12(%esi), %ebx
                                                   >>  93         adcl %ebx, %eax
                                                   >>  94         movl 16(%esi), %ebx
                                                   >>  95         adcl %ebx, %eax
                                                   >>  96         movl 20(%esi), %ebx
                                                   >>  97         adcl %ebx, %eax
                                                   >>  98         movl 24(%esi), %ebx
                                                   >>  99         adcl %ebx, %eax
                                                   >> 100         movl 28(%esi), %ebx
                                                   >> 101         adcl %ebx, %eax
                                                   >> 102         lea 32(%esi), %esi
                                                   >> 103         dec %ecx
                                                   >> 104         jne 1b
                                                   >> 105         adcl $0, %eax
                                                   >> 106 2:      movl %edx, %ecx
                                                   >> 107         andl $0x1c, %edx
                                                   >> 108         je 4f
                                                   >> 109         shrl $2, %edx           # This clears CF
                                                   >> 110 3:      adcl (%esi), %eax
                                                   >> 111         lea 4(%esi), %esi
                                                   >> 112         dec %edx
                                                   >> 113         jne 3b
                                                   >> 114         adcl $0, %eax
                                                   >> 115 4:      andl $3, %ecx
                                                   >> 116         jz 7f
                                                   >> 117         cmpl $2, %ecx
                                                   >> 118         jb 5f
                                                   >> 119         movw (%esi),%cx
                                                   >> 120         leal 2(%esi),%esi
                                                   >> 121         je 6f
                                                   >> 122         shll $16,%ecx
                                                   >> 123 5:      movb (%esi),%cl
                                                   >> 124 6:      addl %ecx,%eax
                                                   >> 125         adcl $0, %eax 
                                                   >> 126 7:      
                                                   >> 127         testl $1, 12(%esp)
                                                   >> 128         jz 8f
                                                   >> 129         roll $8, %eax
                                                   >> 130 8:
                                                   >> 131         popl %ebx
                                                   >> 132         popl %esi
                                                   >> 133         ret
                                                   >> 134 
 84 #else                                             135 #else
 85         beqz    a5, 3f                         << 
 86         slli    a5, a5, 2                      << 
 87         add     a5, a5, a2      /* a5 = end of << 
 88 .Loop2:                                        << 
 89 #endif                                         << 
 90         l32i    a6, a2, 0                      << 
 91         ONES_ADD(a4, a6)                       << 
 92         addi    a2, a2, 4                      << 
 93 #if !XCHAL_HAVE_LOOPS                          << 
 94         blt     a2, a5, .Loop2                 << 
 95 #endif                                         << 
 96 3:                                             << 
 97         _bbci.l a3, 1, 5f       /* remaining 2 << 
 98         l16ui   a6, a2, 0                      << 
 99         ONES_ADD(a4, a6)                       << 
100         addi    a2, a2, 2                      << 
101 5:                                             << 
102         _bbci.l a3, 0, 7f       /* remaining 1 << 
103 6:      l8ui    a6, a2, 0                      << 
104 #ifdef __XTENSA_EB__                           << 
105         slli    a6, a6, 8       /* load byte i << 
106 #endif                                         << 
107         ONES_ADD(a4, a6)                       << 
108 7:                                             << 
109         mov     a2, a4                         << 
110         abi_ret_default                        << 
111                                                   136 
112         /* uncommon case, buf is 2-byte aligne !! 137 /* Version for PentiumII/PPro */
113 8:                                             << 
114         beqz    a3, 7b          /* branch if l << 
115         beqi    a3, 1, 6b       /* branch if l << 
116                                                   138 
117         extui   a5, a2, 0, 1                   !! 139 csum_partial:
118         bnez    a5, 8f          /* branch if 1 !! 140         pushl %esi
                                                   >> 141         pushl %ebx
                                                   >> 142         movl 20(%esp),%eax      # Function arg: unsigned int sum
                                                   >> 143         movl 16(%esp),%ecx      # Function arg: int len
                                                   >> 144         movl 12(%esp),%esi      # Function arg: const unsigned char *buf
119                                                   145 
120         l16ui   a6, a2, 0       /* common case !! 146         testl $3, %esi         
121         ONES_ADD(a4, a6)                       !! 147         jnz 25f                 
122         addi    a2, a2, 2       /* adjust buf  !! 148 10:
123         addi    a3, a3, -2      /* adjust len  !! 149         movl %ecx, %edx
124         j       1b              /* now buf is  !! 150         movl %ecx, %ebx
125                                                !! 151         andl $0x7c, %ebx
126         /* case: odd-byte aligned, len > 1     !! 152         shrl $7, %ecx
127          * This case is dog slow, so don't giv !! 153         addl %ebx,%esi
128          * (I don't think this ever happens, b !! 154         shrl $2, %ebx  
129          */                                    !! 155         negl %ebx
130 8:                                             !! 156         lea 45f(%ebx,%ebx,2), %ebx
131         srli    a5, a3, 2       /* 4-byte chun !! 157         testl %esi, %esi
132 #if XCHAL_HAVE_LOOPS                           !! 158         jmp *%ebx
133         loopgtz a5, 2f                         !! 159 
134 #else                                          !! 160         # Handle 2-byte-aligned regions
135         beqz    a5, 2f                         !! 161 20:     addw (%esi), %ax
136         slli    a5, a5, 2                      !! 162         lea 2(%esi), %esi
137         add     a5, a5, a2      /* a5 = end of !! 163         adcl $0, %eax
138 .Loop3:                                        !! 164         jmp 10b
139 #endif                                         !! 165 25:
140         l8ui    a6, a2, 0       /* bits 24..31 !! 166         testl $1, %esi         
141         l16ui   a7, a2, 1       /* bits  8..23 !! 167         jz 30f                 
142         l8ui    a8, a2, 3       /* bits  0.. 8 !! 168         # buf is odd
143 #ifdef  __XTENSA_EB__                          !! 169         dec %ecx
144         slli    a6, a6, 24                     !! 170         jl 90f
145 #else                                          !! 171         movzbl (%esi), %ebx
146         slli    a8, a8, 24                     !! 172         addl %ebx, %eax
147 #endif                                         !! 173         adcl $0, %eax
148         slli    a7, a7, 8                      !! 174         roll $8, %eax
149         or      a7, a7, a6                     !! 175         inc %esi
150         or      a7, a7, a8                     !! 176         testl $2, %esi
151         ONES_ADD(a4, a7)                       !! 177         jz 10b
152         addi    a2, a2, 4                      !! 178 
153 #if !XCHAL_HAVE_LOOPS                          !! 179 30:     subl $2, %ecx          
154         blt     a2, a5, .Loop3                 !! 180         ja 20b                 
155 #endif                                         !! 181         je 32f
156 2:                                             !! 182         addl $2, %ecx
157         _bbci.l a3, 1, 3f       /* remaining 2 !! 183         jz 80f
158         l8ui    a6, a2, 0                      !! 184         movzbl (%esi),%ebx      # csumming 1 byte, 2-aligned
159         l8ui    a7, a2, 1                      !! 185         addl %ebx, %eax
160 #ifdef  __XTENSA_EB__                          !! 186         adcl $0, %eax
161         slli    a6, a6, 8                      !! 187         jmp 80f
162 #else                                          !! 188 32:
163         slli    a7, a7, 8                      !! 189         addw (%esi), %ax        # csumming 2 bytes, 2-aligned
                                                   >> 190         adcl $0, %eax
                                                   >> 191         jmp 80f
                                                   >> 192 
                                                   >> 193 40: 
                                                   >> 194         addl -128(%esi), %eax
                                                   >> 195         adcl -124(%esi), %eax
                                                   >> 196         adcl -120(%esi), %eax
                                                   >> 197         adcl -116(%esi), %eax   
                                                   >> 198         adcl -112(%esi), %eax   
                                                   >> 199         adcl -108(%esi), %eax
                                                   >> 200         adcl -104(%esi), %eax
                                                   >> 201         adcl -100(%esi), %eax
                                                   >> 202         adcl -96(%esi), %eax
                                                   >> 203         adcl -92(%esi), %eax
                                                   >> 204         adcl -88(%esi), %eax
                                                   >> 205         adcl -84(%esi), %eax
                                                   >> 206         adcl -80(%esi), %eax
                                                   >> 207         adcl -76(%esi), %eax
                                                   >> 208         adcl -72(%esi), %eax
                                                   >> 209         adcl -68(%esi), %eax
                                                   >> 210         adcl -64(%esi), %eax     
                                                   >> 211         adcl -60(%esi), %eax     
                                                   >> 212         adcl -56(%esi), %eax     
                                                   >> 213         adcl -52(%esi), %eax   
                                                   >> 214         adcl -48(%esi), %eax   
                                                   >> 215         adcl -44(%esi), %eax
                                                   >> 216         adcl -40(%esi), %eax
                                                   >> 217         adcl -36(%esi), %eax
                                                   >> 218         adcl -32(%esi), %eax
                                                   >> 219         adcl -28(%esi), %eax
                                                   >> 220         adcl -24(%esi), %eax
                                                   >> 221         adcl -20(%esi), %eax
                                                   >> 222         adcl -16(%esi), %eax
                                                   >> 223         adcl -12(%esi), %eax
                                                   >> 224         adcl -8(%esi), %eax
                                                   >> 225         adcl -4(%esi), %eax
                                                   >> 226 45:
                                                   >> 227         lea 128(%esi), %esi
                                                   >> 228         adcl $0, %eax
                                                   >> 229         dec %ecx
                                                   >> 230         jge 40b
                                                   >> 231         movl %edx, %ecx
                                                   >> 232 50:     andl $3, %ecx
                                                   >> 233         jz 80f
                                                   >> 234 
                                                   >> 235         # Handle the last 1-3 bytes without jumping
                                                   >> 236         notl %ecx               # 1->2, 2->1, 3->0, higher bits are masked
                                                   >> 237         movl $0xffffff,%ebx     # by the shll and shrl instructions
                                                   >> 238         shll $3,%ecx
                                                   >> 239         shrl %cl,%ebx
                                                   >> 240         andl -128(%esi),%ebx    # esi is 4-aligned so should be ok
                                                   >> 241         addl %ebx,%eax
                                                   >> 242         adcl $0,%eax
                                                   >> 243 80: 
                                                   >> 244         testl $1, 12(%esp)
                                                   >> 245         jz 90f
                                                   >> 246         roll $8, %eax
                                                   >> 247 90: 
                                                   >> 248         popl %ebx
                                                   >> 249         popl %esi
                                                   >> 250         ret
                                                   >> 251                                 
164 #endif                                            252 #endif
165         or      a7, a7, a6                     << 
166         ONES_ADD(a4, a7)                       << 
167         addi    a2, a2, 2                      << 
168 3:                                             << 
169         j       5b              /* branch to h << 
170                                                   253 
171 ENDPROC(csum_partial)                          !! 254 /*
172 EXPORT_SYMBOL(csum_partial)                    !! 255 unsigned int csum_partial_copy_generic (const char *src, char *dst,
                                                   >> 256                                   int len, int sum, int *src_err_ptr, int *dst_err_ptr)
                                                   >> 257  */ 
173                                                   258 
174 /*                                                259 /*
175  * Copy from ds while checksumming, otherwise     260  * Copy from ds while checksumming, otherwise like csum_partial
                                                   >> 261  *
                                                   >> 262  * The macros SRC and DST specify the type of access for the instruction.
                                                   >> 263  * thus we can call a custom exception handler for all access types.
                                                   >> 264  *
                                                   >> 265  * FIXME: could someone double-check whether I haven't mixed up some SRC and
                                                   >> 266  *        DST definitions? It's damn hard to trigger all cases.  I hope I got
                                                   >> 267  *        them all but there's no guarantee.
176  */                                               268  */
177                                                   269 
178 /*                                             !! 270 #define SRC(y...)                       \
179 unsigned int csum_partial_copy_generic (const  !! 271         9999: y;                        \
180         a2  = src                              !! 272         .section __ex_table, "a";       \
181         a3  = dst                              !! 273         .long 9999b, 6001f      ;       \
182         a4  = len                              !! 274         .previous
183         a5  = sum                              !! 275 
184         a8  = temp                             !! 276 #define DST(y...)                       \
185         a9  = temp                             !! 277         9999: y;                        \
186         a10 = temp                             !! 278         .section __ex_table, "a";       \
                                                   >> 279         .long 9999b, 6002f      ;       \
                                                   >> 280         .previous
                                                   >> 281 
                                                   >> 282 .align 4
                                                   >> 283 .globl csum_partial_copy_generic
                                                   >> 284                                 
                                                   >> 285 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
                                                   >> 286 
                                                   >> 287 #define ARGBASE 16              
                                                   >> 288 #define FP              12
                                                   >> 289                 
                                                   >> 290 csum_partial_copy_generic:
                                                   >> 291         subl  $4,%esp   
                                                   >> 292         pushl %edi
                                                   >> 293         pushl %esi
                                                   >> 294         pushl %ebx
                                                   >> 295         movl ARGBASE+16(%esp),%eax      # sum
                                                   >> 296         movl ARGBASE+12(%esp),%ecx      # len
                                                   >> 297         movl ARGBASE+4(%esp),%esi       # src
                                                   >> 298         movl ARGBASE+8(%esp),%edi       # dst
                                                   >> 299 
                                                   >> 300         testl $2, %edi                  # Check alignment. 
                                                   >> 301         jz 2f                           # Jump if alignment is ok.
                                                   >> 302         subl $2, %ecx                   # Alignment uses up two bytes.
                                                   >> 303         jae 1f                          # Jump if we had at least two bytes.
                                                   >> 304         addl $2, %ecx                   # ecx was < 2.  Deal with it.
                                                   >> 305         jmp 4f
                                                   >> 306 SRC(1:  movw (%esi), %bx        )
                                                   >> 307         addl $2, %esi
                                                   >> 308 DST(    movw %bx, (%edi)        )
                                                   >> 309         addl $2, %edi
                                                   >> 310         addw %bx, %ax   
                                                   >> 311         adcl $0, %eax
                                                   >> 312 2:
                                                   >> 313         movl %ecx, FP(%esp)
                                                   >> 314         shrl $5, %ecx
                                                   >> 315         jz 2f
                                                   >> 316         testl %esi, %esi
                                                   >> 317 SRC(1:  movl (%esi), %ebx       )
                                                   >> 318 SRC(    movl 4(%esi), %edx      )
                                                   >> 319         adcl %ebx, %eax
                                                   >> 320 DST(    movl %ebx, (%edi)       )
                                                   >> 321         adcl %edx, %eax
                                                   >> 322 DST(    movl %edx, 4(%edi)      )
                                                   >> 323 
                                                   >> 324 SRC(    movl 8(%esi), %ebx      )
                                                   >> 325 SRC(    movl 12(%esi), %edx     )
                                                   >> 326         adcl %ebx, %eax
                                                   >> 327 DST(    movl %ebx, 8(%edi)      )
                                                   >> 328         adcl %edx, %eax
                                                   >> 329 DST(    movl %edx, 12(%edi)     )
                                                   >> 330 
                                                   >> 331 SRC(    movl 16(%esi), %ebx     )
                                                   >> 332 SRC(    movl 20(%esi), %edx     )
                                                   >> 333         adcl %ebx, %eax
                                                   >> 334 DST(    movl %ebx, 16(%edi)     )
                                                   >> 335         adcl %edx, %eax
                                                   >> 336 DST(    movl %edx, 20(%edi)     )
                                                   >> 337 
                                                   >> 338 SRC(    movl 24(%esi), %ebx     )
                                                   >> 339 SRC(    movl 28(%esi), %edx     )
                                                   >> 340         adcl %ebx, %eax
                                                   >> 341 DST(    movl %ebx, 24(%edi)     )
                                                   >> 342         adcl %edx, %eax
                                                   >> 343 DST(    movl %edx, 28(%edi)     )
                                                   >> 344 
                                                   >> 345         lea 32(%esi), %esi
                                                   >> 346         lea 32(%edi), %edi
                                                   >> 347         dec %ecx
                                                   >> 348         jne 1b
                                                   >> 349         adcl $0, %eax
                                                   >> 350 2:      movl FP(%esp), %edx
                                                   >> 351         movl %edx, %ecx
                                                   >> 352         andl $0x1c, %edx
                                                   >> 353         je 4f
                                                   >> 354         shrl $2, %edx                   # This clears CF
                                                   >> 355 SRC(3:  movl (%esi), %ebx       )
                                                   >> 356         adcl %ebx, %eax
                                                   >> 357 DST(    movl %ebx, (%edi)       )
                                                   >> 358         lea 4(%esi), %esi
                                                   >> 359         lea 4(%edi), %edi
                                                   >> 360         dec %edx
                                                   >> 361         jne 3b
                                                   >> 362         adcl $0, %eax
                                                   >> 363 4:      andl $3, %ecx
                                                   >> 364         jz 7f
                                                   >> 365         cmpl $2, %ecx
                                                   >> 366         jb 5f
                                                   >> 367 SRC(    movw (%esi), %cx        )
                                                   >> 368         leal 2(%esi), %esi
                                                   >> 369 DST(    movw %cx, (%edi)        )
                                                   >> 370         leal 2(%edi), %edi
                                                   >> 371         je 6f
                                                   >> 372         shll $16,%ecx
                                                   >> 373 SRC(5:  movb (%esi), %cl        )
                                                   >> 374 DST(    movb %cl, (%edi)        )
                                                   >> 375 6:      addl %ecx, %eax
                                                   >> 376         adcl $0, %eax
                                                   >> 377 7:
                                                   >> 378 5000:
187                                                   379 
188     This function is optimized for 4-byte alig !! 380 # Exception handler:
189     alignments work, but not nearly as efficie !! 381 .section .fixup, "ax"                                                   
190  */                                            << 
191                                                   382 
192 ENTRY(csum_partial_copy_generic)               !! 383 6001:
                                                   >> 384         movl ARGBASE+20(%esp), %ebx     # src_err_ptr
                                                   >> 385         movl $-EFAULT, (%ebx)
                                                   >> 386 
                                                   >> 387         # zero the complete destination - computing the rest
                                                   >> 388         # is too much work 
                                                   >> 389         movl ARGBASE+8(%esp), %edi      # dst
                                                   >> 390         movl ARGBASE+12(%esp), %ecx     # len
                                                   >> 391         xorl %eax,%eax
                                                   >> 392         rep ; stosb
                                                   >> 393 
                                                   >> 394         jmp 5000b
                                                   >> 395 
                                                   >> 396 6002:
                                                   >> 397         movl ARGBASE+24(%esp), %ebx     # dst_err_ptr
                                                   >> 398         movl $-EFAULT,(%ebx)
                                                   >> 399         jmp 5000b
193                                                   400 
194         abi_entry_default                      !! 401 .previous
195         movi    a5, -1                         << 
196         or      a10, a2, a3                    << 
197                                                << 
198         /* We optimize the following alignment << 
199         aligned case.  Two bbsi.l instructions << 
200         (commented out below).  However, both  << 
201         of the imm8 range, so the assembler re << 
202         equivalent bbci.l, j combinations, whi << 
203         slower. */                             << 
204                                                << 
205         extui   a9, a10, 0, 2                  << 
206         beqz    a9, 1f          /* branch if b << 
207         bbsi.l  a10, 0, 5f      /* branch if o << 
208         j       3f              /* one address << 
209                                                << 
210 /*      _bbsi.l a10, 0, 5f */   /* branch if o << 
211 /*      _bbsi.l a10, 1, 3f */   /* branch if 2 << 
212                                                << 
213 1:                                             << 
214         /* src and dst are both 4-byte aligned << 
215         srli    a10, a4, 5      /* 32-byte chu << 
216 #if XCHAL_HAVE_LOOPS                           << 
217         loopgtz a10, 2f                        << 
218 #else                                          << 
219         beqz    a10, 2f                        << 
220         slli    a10, a10, 5                    << 
221         add     a10, a10, a2    /* a10 = end o << 
222 .Loop5:                                        << 
223 #endif                                         << 
224 EX(10f) l32i    a9, a2, 0                      << 
225 EX(10f) l32i    a8, a2, 4                      << 
226 EX(10f) s32i    a9, a3, 0                      << 
227 EX(10f) s32i    a8, a3, 4                      << 
228         ONES_ADD(a5, a9)                       << 
229         ONES_ADD(a5, a8)                       << 
230 EX(10f) l32i    a9, a2, 8                      << 
231 EX(10f) l32i    a8, a2, 12                     << 
232 EX(10f) s32i    a9, a3, 8                      << 
233 EX(10f) s32i    a8, a3, 12                     << 
234         ONES_ADD(a5, a9)                       << 
235         ONES_ADD(a5, a8)                       << 
236 EX(10f) l32i    a9, a2, 16                     << 
237 EX(10f) l32i    a8, a2, 20                     << 
238 EX(10f) s32i    a9, a3, 16                     << 
239 EX(10f) s32i    a8, a3, 20                     << 
240         ONES_ADD(a5, a9)                       << 
241         ONES_ADD(a5, a8)                       << 
242 EX(10f) l32i    a9, a2, 24                     << 
243 EX(10f) l32i    a8, a2, 28                     << 
244 EX(10f) s32i    a9, a3, 24                     << 
245 EX(10f) s32i    a8, a3, 28                     << 
246         ONES_ADD(a5, a9)                       << 
247         ONES_ADD(a5, a8)                       << 
248         addi    a2, a2, 32                     << 
249         addi    a3, a3, 32                     << 
250 #if !XCHAL_HAVE_LOOPS                          << 
251         blt     a2, a10, .Loop5                << 
252 #endif                                         << 
253 2:                                             << 
254         extui   a10, a4, 2, 3   /* remaining 4 << 
255         extui   a4, a4, 0, 2    /* reset len f << 
256 #if XCHAL_HAVE_LOOPS                           << 
257         loopgtz a10, 3f                        << 
258 #else                                          << 
259         beqz    a10, 3f                        << 
260         slli    a10, a10, 2                    << 
261         add     a10, a10, a2    /* a10 = end o << 
262 .Loop6:                                        << 
263 #endif                                         << 
264 EX(10f) l32i    a9, a2, 0                      << 
265 EX(10f) s32i    a9, a3, 0                      << 
266         ONES_ADD(a5, a9)                       << 
267         addi    a2, a2, 4                      << 
268         addi    a3, a3, 4                      << 
269 #if !XCHAL_HAVE_LOOPS                          << 
270         blt     a2, a10, .Loop6                << 
271 #endif                                         << 
272 3:                                             << 
273         /*                                     << 
274         Control comes to here in two cases: (1 << 
275         to here from the 4-byte alignment case << 
276         one 2-byte chunk.  (2) It branches to  << 
277         either src or dst is 2-byte aligned, a << 
278         here, except for perhaps a trailing od << 
279         inefficient, so align your addresses t << 
280                                                << 
281         a2 = src                               << 
282         a3 = dst                               << 
283         a4 = len                               << 
284         a5 = sum                               << 
285         */                                     << 
286         srli    a10, a4, 1      /* 2-byte chun << 
287 #if XCHAL_HAVE_LOOPS                           << 
288         loopgtz a10, 4f                        << 
289 #else                                          << 
290         beqz    a10, 4f                        << 
291         slli    a10, a10, 1                    << 
292         add     a10, a10, a2    /* a10 = end o << 
293 .Loop7:                                        << 
294 #endif                                         << 
295 EX(10f) l16ui   a9, a2, 0                      << 
296 EX(10f) s16i    a9, a3, 0                      << 
297         ONES_ADD(a5, a9)                       << 
298         addi    a2, a2, 2                      << 
299         addi    a3, a3, 2                      << 
300 #if !XCHAL_HAVE_LOOPS                          << 
301         blt     a2, a10, .Loop7                << 
302 #endif                                         << 
303 4:                                             << 
304         /* This section processes a possible t << 
305         _bbci.l a4, 0, 8f       /* 1-byte chun << 
306 EX(10f) l8ui    a9, a2, 0                      << 
307 EX(10f) s8i     a9, a3, 0                      << 
308 #ifdef __XTENSA_EB__                           << 
309         slli    a9, a9, 8       /* shift byte  << 
310 #endif                                         << 
311         ONES_ADD(a5, a9)                       << 
312 8:                                             << 
313         mov     a2, a5                         << 
314         abi_ret_default                        << 
315                                                   402 
316 5:                                             !! 403         popl %ebx
317         /* Control branch to here when either  !! 404         popl %esi
318         process all bytes using 8-bit accesses !! 405         popl %edi
319         so don't feed us an odd address. */    !! 406         popl %ecx                       # equivalent to addl $4,%esp
320                                                !! 407         ret     
321         srli    a10, a4, 1      /* handle in p << 
322 #if XCHAL_HAVE_LOOPS                           << 
323         loopgtz a10, 6f                        << 
324 #else                                          << 
325         beqz    a10, 6f                        << 
326         slli    a10, a10, 1                    << 
327         add     a10, a10, a2    /* a10 = end o << 
328 .Loop8:                                        << 
329 #endif                                         << 
330 EX(10f) l8ui    a9, a2, 0                      << 
331 EX(10f) l8ui    a8, a2, 1                      << 
332 EX(10f) s8i     a9, a3, 0                      << 
333 EX(10f) s8i     a8, a3, 1                      << 
334 #ifdef __XTENSA_EB__                           << 
335         slli    a9, a9, 8       /* combine int << 
336 #else                           /* for checksu << 
337         slli    a8, a8, 8                      << 
338 #endif                                         << 
339         or      a9, a9, a8                     << 
340         ONES_ADD(a5, a9)                       << 
341         addi    a2, a2, 2                      << 
342         addi    a3, a3, 2                      << 
343 #if !XCHAL_HAVE_LOOPS                          << 
344         blt     a2, a10, .Loop8                << 
345 #endif                                         << 
346 6:                                             << 
347         j       4b              /* process the << 
348                                                   408 
349 ENDPROC(csum_partial_copy_generic)             !! 409 #else
350 EXPORT_SYMBOL(csum_partial_copy_generic)       << 
351                                                   410 
                                                   >> 411 /* Version for PentiumII/PPro */
352                                                   412 
353 # Exception handler:                           !! 413 #define ROUND1(x) \
                                                   >> 414         SRC(movl x(%esi), %ebx  )       ;       \
                                                   >> 415         addl %ebx, %eax                 ;       \
                                                   >> 416         DST(movl %ebx, x(%edi)  )       ; 
                                                   >> 417 
                                                   >> 418 #define ROUND(x) \
                                                   >> 419         SRC(movl x(%esi), %ebx  )       ;       \
                                                   >> 420         adcl %ebx, %eax                 ;       \
                                                   >> 421         DST(movl %ebx, x(%edi)  )       ;
                                                   >> 422 
                                                   >> 423 #define ARGBASE 12
                                                   >> 424                 
                                                   >> 425 csum_partial_copy_generic:
                                                   >> 426         pushl %ebx
                                                   >> 427         pushl %edi
                                                   >> 428         pushl %esi
                                                   >> 429         movl ARGBASE+4(%esp),%esi       #src
                                                   >> 430         movl ARGBASE+8(%esp),%edi       #dst    
                                                   >> 431         movl ARGBASE+12(%esp),%ecx      #len
                                                   >> 432         movl ARGBASE+16(%esp),%eax      #sum
                                                   >> 433 #       movl %ecx, %edx  
                                                   >> 434         movl %ecx, %ebx  
                                                   >> 435         movl %esi, %edx
                                                   >> 436         shrl $6, %ecx     
                                                   >> 437         andl $0x3c, %ebx  
                                                   >> 438         negl %ebx
                                                   >> 439         subl %ebx, %esi  
                                                   >> 440         subl %ebx, %edi  
                                                   >> 441         lea  -1(%esi),%edx
                                                   >> 442         andl $-32,%edx
                                                   >> 443         lea 3f(%ebx,%ebx), %ebx
                                                   >> 444         testl %esi, %esi 
                                                   >> 445         jmp *%ebx
                                                   >> 446 1:      addl $64,%esi
                                                   >> 447         addl $64,%edi 
                                                   >> 448         SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
                                                   >> 449         ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)    
                                                   >> 450         ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)    
                                                   >> 451         ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)    
                                                   >> 452         ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4)     
                                                   >> 453 3:      adcl $0,%eax
                                                   >> 454         addl $64, %edx
                                                   >> 455         dec %ecx
                                                   >> 456         jge 1b
                                                   >> 457 4:      movl ARGBASE+12(%esp),%edx      #len
                                                   >> 458         andl $3, %edx
                                                   >> 459         jz 7f
                                                   >> 460         cmpl $2, %edx
                                                   >> 461         jb 5f
                                                   >> 462 SRC(    movw (%esi), %dx         )
                                                   >> 463         leal 2(%esi), %esi
                                                   >> 464 DST(    movw %dx, (%edi)         )
                                                   >> 465         leal 2(%edi), %edi
                                                   >> 466         je 6f
                                                   >> 467         shll $16,%edx
                                                   >> 468 5:
                                                   >> 469 SRC(    movb (%esi), %dl         )
                                                   >> 470 DST(    movb %dl, (%edi)         )
                                                   >> 471 6:      addl %edx, %eax
                                                   >> 472         adcl $0, %eax
                                                   >> 473 7:
354 .section .fixup, "ax"                             474 .section .fixup, "ax"
355 10:                                            !! 475 6001:   movl    ARGBASE+20(%esp), %ebx  # src_err_ptr   
356         movi    a2, 0                          !! 476         movl $-EFAULT, (%ebx)
357         abi_ret_default                        !! 477         # zero the complete destination (computing the rest is too much work)
358                                                !! 478         movl ARGBASE+8(%esp),%edi       # dst
359 .previous                                      !! 479         movl ARGBASE+12(%esp),%ecx      # len
                                                   >> 480         xorl %eax,%eax
                                                   >> 481         rep; stosb
                                                   >> 482         jmp 7b
                                                   >> 483 6002:   movl ARGBASE+24(%esp), %ebx     # dst_err_ptr
                                                   >> 484         movl $-EFAULT, (%ebx)
                                                   >> 485         jmp  7b                 
                                                   >> 486 .previous                               
                                                   >> 487 
                                                   >> 488         popl %esi
                                                   >> 489         popl %edi
                                                   >> 490         popl %ebx
                                                   >> 491         ret
                                                   >> 492                                 
                                                   >> 493 #undef ROUND
                                                   >> 494 #undef ROUND1           
                                                   >> 495                 
                                                   >> 496 #endif
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php