~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
csum_partial.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~
Diff markup

Differences between /arch/mips/lib/csum_partial.S (Architecture m68k) and /arch/mips/lib/csum_partial.S (Architecture mips)

  1 /*                                                  1 /*
  2  * This file is subject to the terms and condi      2  * This file is subject to the terms and conditions of the GNU General Public
  3  * License.  See the file "COPYING" in the mai      3  * License.  See the file "COPYING" in the main directory of this archive
  4  * for more details.                                4  * for more details.
  5  *                                                  5  *
  6  * Quick'n'dirty IP checksum ...                    6  * Quick'n'dirty IP checksum ...
  7  *                                                  7  *
  8  * Copyright (C) 1998, 1999 Ralf Baechle            8  * Copyright (C) 1998, 1999 Ralf Baechle
  9  * Copyright (C) 1999 Silicon Graphics, Inc.        9  * Copyright (C) 1999 Silicon Graphics, Inc.
 10  * Copyright (C) 2007  Maciej W. Rozycki           10  * Copyright (C) 2007  Maciej W. Rozycki
 11  * Copyright (C) 2014 Imagination Technologies     11  * Copyright (C) 2014 Imagination Technologies Ltd.
 12  */                                                12  */
 13 #include <linux/errno.h>                           13 #include <linux/errno.h>
 14 #include <linux/export.h>                          14 #include <linux/export.h>
 15 #include <asm/asm.h>                               15 #include <asm/asm.h>
 16 #include <asm/asm-offsets.h>                       16 #include <asm/asm-offsets.h>
 17 #include <asm/regdef.h>                            17 #include <asm/regdef.h>
 18                                                    18 
 19 #ifdef CONFIG_64BIT                                19 #ifdef CONFIG_64BIT
 20 /*                                                 20 /*
 21  * As we are sharing code base with the mips32     21  * As we are sharing code base with the mips32 tree (which use the o32 ABI
 22  * register definitions). We need to redefine      22  * register definitions). We need to redefine the register definitions from
 23  * the n64 ABI register naming to the o32 ABI      23  * the n64 ABI register naming to the o32 ABI register naming.
 24  */                                                24  */
 25 #undef t0                                          25 #undef t0
 26 #undef t1                                          26 #undef t1
 27 #undef t2                                          27 #undef t2
 28 #undef t3                                          28 #undef t3
 29 #define t0      $8                                 29 #define t0      $8
 30 #define t1      $9                                 30 #define t1      $9
 31 #define t2      $10                                31 #define t2      $10
 32 #define t3      $11                                32 #define t3      $11
 33 #define t4      $12                                33 #define t4      $12
 34 #define t5      $13                                34 #define t5      $13
 35 #define t6      $14                                35 #define t6      $14
 36 #define t7      $15                                36 #define t7      $15
 37                                                    37 
 38 #define USE_DOUBLE                                 38 #define USE_DOUBLE
 39 #endif                                             39 #endif
 40                                                    40 
 41 #ifdef USE_DOUBLE                                  41 #ifdef USE_DOUBLE
 42                                                    42 
 43 #define LOAD   ld                                  43 #define LOAD   ld
 44 #define LOAD32 lwu                                 44 #define LOAD32 lwu
 45 #define ADD    daddu                               45 #define ADD    daddu
 46 #define NBYTES 8                                   46 #define NBYTES 8
 47                                                    47 
 48 #else                                              48 #else
 49                                                    49 
 50 #define LOAD   lw                                  50 #define LOAD   lw
 51 #define LOAD32 lw                                  51 #define LOAD32 lw
 52 #define ADD    addu                                52 #define ADD    addu
 53 #define NBYTES 4                                   53 #define NBYTES 4
 54                                                    54 
 55 #endif /* USE_DOUBLE */                            55 #endif /* USE_DOUBLE */
 56                                                    56 
 57 #define UNIT(unit)  ((unit)*NBYTES)                57 #define UNIT(unit)  ((unit)*NBYTES)
 58                                                    58 
 59 #define ADDC(sum,reg)                              59 #define ADDC(sum,reg)                                           \
 60         .set    push;                              60         .set    push;                                           \
 61         .set    noat;                              61         .set    noat;                                           \
 62         ADD     sum, reg;                          62         ADD     sum, reg;                                       \
 63         sltu    v1, sum, reg;                      63         sltu    v1, sum, reg;                                   \
 64         ADD     sum, v1;                           64         ADD     sum, v1;                                        \
 65         .set    pop                                65         .set    pop
 66                                                    66 
 67 #define ADDC32(sum,reg)                            67 #define ADDC32(sum,reg)                                         \
 68         .set    push;                              68         .set    push;                                           \
 69         .set    noat;                              69         .set    noat;                                           \
 70         addu    sum, reg;                          70         addu    sum, reg;                                       \
 71         sltu    v1, sum, reg;                      71         sltu    v1, sum, reg;                                   \
 72         addu    sum, v1;                           72         addu    sum, v1;                                        \
 73         .set    pop                                73         .set    pop
 74                                                    74 
 75 #define CSUM_BIGCHUNK1(src, offset, sum, _t0,      75 #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)    \
 76         LOAD    _t0, (offset + UNIT(0))(src);      76         LOAD    _t0, (offset + UNIT(0))(src);                   \
 77         LOAD    _t1, (offset + UNIT(1))(src);      77         LOAD    _t1, (offset + UNIT(1))(src);                   \
 78         LOAD    _t2, (offset + UNIT(2))(src);      78         LOAD    _t2, (offset + UNIT(2))(src);                   \
 79         LOAD    _t3, (offset + UNIT(3))(src);      79         LOAD    _t3, (offset + UNIT(3))(src);                   \
 80         ADDC(_t0, _t1);                            80         ADDC(_t0, _t1);                                         \
 81         ADDC(_t2, _t3);                            81         ADDC(_t2, _t3);                                         \
 82         ADDC(sum, _t0);                            82         ADDC(sum, _t0);                                         \
 83         ADDC(sum, _t2)                             83         ADDC(sum, _t2)
 84                                                    84 
 85 #ifdef USE_DOUBLE                                  85 #ifdef USE_DOUBLE
 86 #define CSUM_BIGCHUNK(src, offset, sum, _t0, _     86 #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3)     \
 87         CSUM_BIGCHUNK1(src, offset, sum, _t0,      87         CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
 88 #else                                              88 #else
 89 #define CSUM_BIGCHUNK(src, offset, sum, _t0, _     89 #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3)     \
 90         CSUM_BIGCHUNK1(src, offset, sum, _t0,      90         CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3);   \
 91         CSUM_BIGCHUNK1(src, offset + 0x10, sum     91         CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
 92 #endif                                             92 #endif
 93                                                    93 
 94 /*                                                 94 /*
 95  * a0: source address                              95  * a0: source address
 96  * a1: length of the area to checksum              96  * a1: length of the area to checksum
 97  * a2: partial checksum                            97  * a2: partial checksum
 98  */                                                98  */
 99                                                    99 
100 #define src a0                                    100 #define src a0
101 #define sum v0                                    101 #define sum v0
102                                                   102 
103         .text                                     103         .text
104         .set    noreorder                         104         .set    noreorder
105         .align  5                                 105         .align  5
106 LEAF(csum_partial)                                106 LEAF(csum_partial)
107 EXPORT_SYMBOL(csum_partial)                       107 EXPORT_SYMBOL(csum_partial)
108         move    sum, zero                         108         move    sum, zero
109         move    t7, zero                          109         move    t7, zero
110                                                   110 
111         sltiu   t8, a1, 0x8                       111         sltiu   t8, a1, 0x8
112         bnez    t8, .Lsmall_csumcpy               112         bnez    t8, .Lsmall_csumcpy             /* < 8 bytes to copy */
113          move   t2, a1                            113          move   t2, a1
114                                                   114 
115         andi    t7, src, 0x1                      115         andi    t7, src, 0x1                    /* odd buffer? */
116                                                   116 
117 .Lhword_align:                                    117 .Lhword_align:
118         beqz    t7, .Lword_align                  118         beqz    t7, .Lword_align
119          andi   t8, src, 0x2                      119          andi   t8, src, 0x2
120                                                   120 
121         lbu     t0, (src)                         121         lbu     t0, (src)
122         LONG_SUBU       a1, a1, 0x1               122         LONG_SUBU       a1, a1, 0x1
123 #ifdef __MIPSEL__                                 123 #ifdef __MIPSEL__
124         sll     t0, t0, 8                         124         sll     t0, t0, 8
125 #endif                                            125 #endif
126         ADDC(sum, t0)                             126         ADDC(sum, t0)
127         PTR_ADDU        src, src, 0x1             127         PTR_ADDU        src, src, 0x1
128         andi    t8, src, 0x2                      128         andi    t8, src, 0x2
129                                                   129 
130 .Lword_align:                                     130 .Lword_align:
131         beqz    t8, .Ldword_align                 131         beqz    t8, .Ldword_align
132          sltiu  t8, a1, 56                        132          sltiu  t8, a1, 56
133                                                   133 
134         lhu     t0, (src)                         134         lhu     t0, (src)
135         LONG_SUBU       a1, a1, 0x2               135         LONG_SUBU       a1, a1, 0x2
136         ADDC(sum, t0)                             136         ADDC(sum, t0)
137         sltiu   t8, a1, 56                        137         sltiu   t8, a1, 56
138         PTR_ADDU        src, src, 0x2             138         PTR_ADDU        src, src, 0x2
139                                                   139 
140 .Ldword_align:                                    140 .Ldword_align:
141         bnez    t8, .Ldo_end_words                141         bnez    t8, .Ldo_end_words
142          move   t8, a1                            142          move   t8, a1
143                                                   143 
144         andi    t8, src, 0x4                      144         andi    t8, src, 0x4
145         beqz    t8, .Lqword_align                 145         beqz    t8, .Lqword_align
146          andi   t8, src, 0x8                      146          andi   t8, src, 0x8
147                                                   147 
148         LOAD32  t0, 0x00(src)                     148         LOAD32  t0, 0x00(src)
149         LONG_SUBU       a1, a1, 0x4               149         LONG_SUBU       a1, a1, 0x4
150         ADDC(sum, t0)                             150         ADDC(sum, t0)
151         PTR_ADDU        src, src, 0x4             151         PTR_ADDU        src, src, 0x4
152         andi    t8, src, 0x8                      152         andi    t8, src, 0x8
153                                                   153 
154 .Lqword_align:                                    154 .Lqword_align:
155         beqz    t8, .Loword_align                 155         beqz    t8, .Loword_align
156          andi   t8, src, 0x10                     156          andi   t8, src, 0x10
157                                                   157 
158 #ifdef USE_DOUBLE                                 158 #ifdef USE_DOUBLE
159         ld      t0, 0x00(src)                     159         ld      t0, 0x00(src)
160         LONG_SUBU       a1, a1, 0x8               160         LONG_SUBU       a1, a1, 0x8
161         ADDC(sum, t0)                             161         ADDC(sum, t0)
162 #else                                             162 #else
163         lw      t0, 0x00(src)                     163         lw      t0, 0x00(src)
164         lw      t1, 0x04(src)                     164         lw      t1, 0x04(src)
165         LONG_SUBU       a1, a1, 0x8               165         LONG_SUBU       a1, a1, 0x8
166         ADDC(sum, t0)                             166         ADDC(sum, t0)
167         ADDC(sum, t1)                             167         ADDC(sum, t1)
168 #endif                                            168 #endif
169         PTR_ADDU        src, src, 0x8             169         PTR_ADDU        src, src, 0x8
170         andi    t8, src, 0x10                     170         andi    t8, src, 0x10
171                                                   171 
172 .Loword_align:                                    172 .Loword_align:
173         beqz    t8, .Lbegin_movement              173         beqz    t8, .Lbegin_movement
174          LONG_SRL       t8, a1, 0x7               174          LONG_SRL       t8, a1, 0x7
175                                                   175 
176 #ifdef USE_DOUBLE                                 176 #ifdef USE_DOUBLE
177         ld      t0, 0x00(src)                     177         ld      t0, 0x00(src)
178         ld      t1, 0x08(src)                     178         ld      t1, 0x08(src)
179         ADDC(sum, t0)                             179         ADDC(sum, t0)
180         ADDC(sum, t1)                             180         ADDC(sum, t1)
181 #else                                             181 #else
182         CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1,    182         CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
183 #endif                                            183 #endif
184         LONG_SUBU       a1, a1, 0x10              184         LONG_SUBU       a1, a1, 0x10
185         PTR_ADDU        src, src, 0x10            185         PTR_ADDU        src, src, 0x10
186         LONG_SRL        t8, a1, 0x7               186         LONG_SRL        t8, a1, 0x7
187                                                   187 
188 .Lbegin_movement:                                 188 .Lbegin_movement:
189         beqz    t8, 1f                            189         beqz    t8, 1f
190          andi   t2, a1, 0x40                      190          andi   t2, a1, 0x40
191                                                   191 
192 .Lmove_128bytes:                                  192 .Lmove_128bytes:
193         CSUM_BIGCHUNK(src, 0x00, sum, t0, t1,     193         CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
194         CSUM_BIGCHUNK(src, 0x20, sum, t0, t1,     194         CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
195         CSUM_BIGCHUNK(src, 0x40, sum, t0, t1,     195         CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
196         CSUM_BIGCHUNK(src, 0x60, sum, t0, t1,     196         CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
197         LONG_SUBU       t8, t8, 0x01              197         LONG_SUBU       t8, t8, 0x01
198         .set    reorder                           198         .set    reorder                         /* DADDI_WAR */
199         PTR_ADDU        src, src, 0x80            199         PTR_ADDU        src, src, 0x80
200         bnez    t8, .Lmove_128bytes               200         bnez    t8, .Lmove_128bytes
201         .set    noreorder                         201         .set    noreorder
202                                                   202 
203 1:                                                203 1:
204         beqz    t2, 1f                            204         beqz    t2, 1f
205          andi   t2, a1, 0x20                      205          andi   t2, a1, 0x20
206                                                   206 
207 .Lmove_64bytes:                                   207 .Lmove_64bytes:
208         CSUM_BIGCHUNK(src, 0x00, sum, t0, t1,     208         CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
209         CSUM_BIGCHUNK(src, 0x20, sum, t0, t1,     209         CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
210         PTR_ADDU        src, src, 0x40            210         PTR_ADDU        src, src, 0x40
211                                                   211 
212 1:                                                212 1:
213         beqz    t2, .Ldo_end_words                213         beqz    t2, .Ldo_end_words
214          andi   t8, a1, 0x1c                      214          andi   t8, a1, 0x1c
215                                                   215 
216 .Lmove_32bytes:                                   216 .Lmove_32bytes:
217         CSUM_BIGCHUNK(src, 0x00, sum, t0, t1,     217         CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
218         andi    t8, a1, 0x1c                      218         andi    t8, a1, 0x1c
219         PTR_ADDU        src, src, 0x20            219         PTR_ADDU        src, src, 0x20
220                                                   220 
221 .Ldo_end_words:                                   221 .Ldo_end_words:
222         beqz    t8, .Lsmall_csumcpy               222         beqz    t8, .Lsmall_csumcpy
223          andi   t2, a1, 0x3                       223          andi   t2, a1, 0x3
224         LONG_SRL        t8, t8, 0x2               224         LONG_SRL        t8, t8, 0x2
225                                                   225 
226 .Lend_words:                                      226 .Lend_words:
227         LOAD32  t0, (src)                         227         LOAD32  t0, (src)
228         LONG_SUBU       t8, t8, 0x1               228         LONG_SUBU       t8, t8, 0x1
229         ADDC(sum, t0)                             229         ADDC(sum, t0)
230         .set    reorder                           230         .set    reorder                         /* DADDI_WAR */
231         PTR_ADDU        src, src, 0x4             231         PTR_ADDU        src, src, 0x4
232         bnez    t8, .Lend_words                   232         bnez    t8, .Lend_words
233         .set    noreorder                         233         .set    noreorder
234                                                   234 
235 /* unknown src alignment and < 8 bytes to go      235 /* unknown src alignment and < 8 bytes to go  */
236 .Lsmall_csumcpy:                                  236 .Lsmall_csumcpy:
237         move    a1, t2                            237         move    a1, t2
238                                                   238 
239         andi    t0, a1, 4                         239         andi    t0, a1, 4
240         beqz    t0, 1f                            240         beqz    t0, 1f
241          andi   t0, a1, 2                         241          andi   t0, a1, 2
242                                                   242 
243         /* Still a full word to go  */            243         /* Still a full word to go  */
244         ulw     t1, (src)                         244         ulw     t1, (src)
245         PTR_ADDIU       src, 4                    245         PTR_ADDIU       src, 4
246 #ifdef USE_DOUBLE                                 246 #ifdef USE_DOUBLE
247         dsll    t1, t1, 32                        247         dsll    t1, t1, 32                      /* clear lower 32bit */
248 #endif                                            248 #endif
249         ADDC(sum, t1)                             249         ADDC(sum, t1)
250                                                   250 
251 1:      move    t1, zero                          251 1:      move    t1, zero
252         beqz    t0, 1f                            252         beqz    t0, 1f
253          andi   t0, a1, 1                         253          andi   t0, a1, 1
254                                                   254 
255         /* Still a halfword to go  */             255         /* Still a halfword to go  */
256         ulhu    t1, (src)                         256         ulhu    t1, (src)
257         PTR_ADDIU       src, 2                    257         PTR_ADDIU       src, 2
258                                                   258 
259 1:      beqz    t0, 1f                            259 1:      beqz    t0, 1f
260          sll    t1, t1, 16                        260          sll    t1, t1, 16
261                                                   261 
262         lbu     t2, (src)                         262         lbu     t2, (src)
263          nop                                      263          nop
264                                                   264 
265 #ifdef __MIPSEB__                                 265 #ifdef __MIPSEB__
266         sll     t2, t2, 8                         266         sll     t2, t2, 8
267 #endif                                            267 #endif
268         or      t1, t2                            268         or      t1, t2
269                                                   269 
270 1:      ADDC(sum, t1)                             270 1:      ADDC(sum, t1)
271                                                   271 
272         /* fold checksum */                       272         /* fold checksum */
273 #ifdef USE_DOUBLE                                 273 #ifdef USE_DOUBLE
274         dsll32  v1, sum, 0                        274         dsll32  v1, sum, 0
275         daddu   sum, v1                           275         daddu   sum, v1
276         sltu    v1, sum, v1                       276         sltu    v1, sum, v1
277         dsra32  sum, sum, 0                       277         dsra32  sum, sum, 0
278         addu    sum, v1                           278         addu    sum, v1
279 #endif                                            279 #endif
280                                                   280 
281         /* odd buffer alignment? */               281         /* odd buffer alignment? */
282 #if defined(CONFIG_CPU_MIPSR2) || defined(CONF    282 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
283     defined(CONFIG_CPU_LOONGSON64)                283     defined(CONFIG_CPU_LOONGSON64)
284         .set    push                              284         .set    push
285         .set    arch=mips32r2                     285         .set    arch=mips32r2
286         wsbh    v1, sum                           286         wsbh    v1, sum
287         movn    sum, v1, t7                       287         movn    sum, v1, t7
288         .set    pop                               288         .set    pop
289 #else                                             289 #else
290         beqz    t7, 1f                  /* odd    290         beqz    t7, 1f                  /* odd buffer alignment? */
291          lui    v1, 0x00ff                        291          lui    v1, 0x00ff
292         addu    v1, 0x00ff                        292         addu    v1, 0x00ff
293         and     t0, sum, v1                       293         and     t0, sum, v1
294         sll     t0, t0, 8                         294         sll     t0, t0, 8
295         srl     sum, sum, 8                       295         srl     sum, sum, 8
296         and     sum, sum, v1                      296         and     sum, sum, v1
297         or      sum, sum, t0                      297         or      sum, sum, t0
298 1:                                                298 1:
299 #endif                                            299 #endif
300         .set    reorder                           300         .set    reorder
301         /* Add the passed partial csum.  */       301         /* Add the passed partial csum.  */
302         ADDC32(sum, a2)                           302         ADDC32(sum, a2)
303         jr      ra                                303         jr      ra
304         .set    noreorder                         304         .set    noreorder
305         END(csum_partial)                         305         END(csum_partial)
306                                                   306 
307                                                   307 
308 /*                                                308 /*
309  * checksum and copy routines based on memcpy.    309  * checksum and copy routines based on memcpy.S
310  *                                                310  *
311  *      csum_partial_copy_nocheck(src, dst, le    311  *      csum_partial_copy_nocheck(src, dst, len)
312  *      __csum_partial_copy_kernel(src, dst, l    312  *      __csum_partial_copy_kernel(src, dst, len)
313  *                                                313  *
314  * See "Spec" in memcpy.S for details.  Unlike    314  * See "Spec" in memcpy.S for details.  Unlike __copy_user, all
315  * function in this file use the standard call    315  * function in this file use the standard calling convention.
316  */                                               316  */
317                                                   317 
318 #define src a0                                    318 #define src a0
319 #define dst a1                                    319 #define dst a1
320 #define len a2                                    320 #define len a2
321 #define sum v0                                    321 #define sum v0
322 #define odd t8                                    322 #define odd t8
323                                                   323 
324 /*                                                324 /*
325  * All exception handlers simply return 0.        325  * All exception handlers simply return 0.
326  */                                               326  */
327                                                   327 
328 /* Instruction type */                            328 /* Instruction type */
329 #define LD_INSN 1                                 329 #define LD_INSN 1
330 #define ST_INSN 2                                 330 #define ST_INSN 2
331 #define LEGACY_MODE 1                             331 #define LEGACY_MODE 1
332 #define EVA_MODE    2                             332 #define EVA_MODE    2
333 #define USEROP   1                                333 #define USEROP   1
334 #define KERNELOP 2                                334 #define KERNELOP 2
335                                                   335 
336 /*                                                336 /*
337  * Wrapper to add an entry in the exception ta    337  * Wrapper to add an entry in the exception table
338  * in case the insn causes a memory exception.    338  * in case the insn causes a memory exception.
339  * Arguments:                                     339  * Arguments:
340  * insn    : Load/store instruction               340  * insn    : Load/store instruction
341  * type    : Instruction type                     341  * type    : Instruction type
342  * reg     : Register                             342  * reg     : Register
343  * addr    : Address                              343  * addr    : Address
344  * handler : Exception handler                    344  * handler : Exception handler
345  */                                               345  */
346 #define EXC(insn, type, reg, addr)                346 #define EXC(insn, type, reg, addr)              \
347         .if \mode == LEGACY_MODE;                 347         .if \mode == LEGACY_MODE;               \
348 9:              insn reg, addr;                   348 9:              insn reg, addr;                 \
349                 .section __ex_table,"a";          349                 .section __ex_table,"a";        \
350                 PTR_WD  9b, .L_exc;               350                 PTR_WD  9b, .L_exc;             \
351                 .previous;                        351                 .previous;                      \
352         /* This is enabled in EVA mode */         352         /* This is enabled in EVA mode */       \
353         .else;                                    353         .else;                                  \
354                 /* If loading from user or sto    354                 /* If loading from user or storing to user */   \
355                 .if ((\from == USEROP) && (typ    355                 .if ((\from == USEROP) && (type == LD_INSN)) || \
356                     ((\to == USEROP) && (type     356                     ((\to == USEROP) && (type == ST_INSN));     \
357 9:                      __BUILD_EVA_INSN(insn#    357 9:                      __BUILD_EVA_INSN(insn##e, reg, addr);   \
358                         .section __ex_table,"a    358                         .section __ex_table,"a";                \
359                         PTR_WD  9b, .L_exc;       359                         PTR_WD  9b, .L_exc;                     \
360                         .previous;                360                         .previous;                              \
361                 .else;                            361                 .else;                                          \
362                         /* EVA without excepti    362                         /* EVA without exception */             \
363                         insn reg, addr;           363                         insn reg, addr;                         \
364                 .endif;                           364                 .endif;                                         \
365         .endif                                    365         .endif
366                                                   366 
367 #undef LOAD                                       367 #undef LOAD
368                                                   368 
369 #ifdef USE_DOUBLE                                 369 #ifdef USE_DOUBLE
370                                                   370 
371 #define LOADK   ld /* No exception */             371 #define LOADK   ld /* No exception */
372 #define LOAD(reg, addr)         EXC(ld, LD_INS    372 #define LOAD(reg, addr)         EXC(ld, LD_INSN, reg, addr)
373 #define LOADBU(reg, addr)       EXC(lbu, LD_IN    373 #define LOADBU(reg, addr)       EXC(lbu, LD_INSN, reg, addr)
374 #define LOADL(reg, addr)        EXC(ldl, LD_IN    374 #define LOADL(reg, addr)        EXC(ldl, LD_INSN, reg, addr)
375 #define LOADR(reg, addr)        EXC(ldr, LD_IN    375 #define LOADR(reg, addr)        EXC(ldr, LD_INSN, reg, addr)
376 #define STOREB(reg, addr)       EXC(sb, ST_INS    376 #define STOREB(reg, addr)       EXC(sb, ST_INSN, reg, addr)
377 #define STOREL(reg, addr)       EXC(sdl, ST_IN    377 #define STOREL(reg, addr)       EXC(sdl, ST_INSN, reg, addr)
378 #define STORER(reg, addr)       EXC(sdr, ST_IN    378 #define STORER(reg, addr)       EXC(sdr, ST_INSN, reg, addr)
379 #define STORE(reg, addr)        EXC(sd, ST_INS    379 #define STORE(reg, addr)        EXC(sd, ST_INSN, reg, addr)
380 #define ADD    daddu                              380 #define ADD    daddu
381 #define SUB    dsubu                              381 #define SUB    dsubu
382 #define SRL    dsrl                               382 #define SRL    dsrl
383 #define SLL    dsll                               383 #define SLL    dsll
384 #define SLLV   dsllv                              384 #define SLLV   dsllv
385 #define SRLV   dsrlv                              385 #define SRLV   dsrlv
386 #define NBYTES 8                                  386 #define NBYTES 8
387 #define LOG_NBYTES 3                              387 #define LOG_NBYTES 3
388                                                   388 
389 #else                                             389 #else
390                                                   390 
391 #define LOADK   lw /* No exception */             391 #define LOADK   lw /* No exception */
392 #define LOAD(reg, addr)         EXC(lw, LD_INS    392 #define LOAD(reg, addr)         EXC(lw, LD_INSN, reg, addr)
393 #define LOADBU(reg, addr)       EXC(lbu, LD_IN    393 #define LOADBU(reg, addr)       EXC(lbu, LD_INSN, reg, addr)
394 #define LOADL(reg, addr)        EXC(lwl, LD_IN    394 #define LOADL(reg, addr)        EXC(lwl, LD_INSN, reg, addr)
395 #define LOADR(reg, addr)        EXC(lwr, LD_IN    395 #define LOADR(reg, addr)        EXC(lwr, LD_INSN, reg, addr)
396 #define STOREB(reg, addr)       EXC(sb, ST_INS    396 #define STOREB(reg, addr)       EXC(sb, ST_INSN, reg, addr)
397 #define STOREL(reg, addr)       EXC(swl, ST_IN    397 #define STOREL(reg, addr)       EXC(swl, ST_INSN, reg, addr)
398 #define STORER(reg, addr)       EXC(swr, ST_IN    398 #define STORER(reg, addr)       EXC(swr, ST_INSN, reg, addr)
399 #define STORE(reg, addr)        EXC(sw, ST_INS    399 #define STORE(reg, addr)        EXC(sw, ST_INSN, reg, addr)
400 #define ADD    addu                               400 #define ADD    addu
401 #define SUB    subu                               401 #define SUB    subu
402 #define SRL    srl                                402 #define SRL    srl
403 #define SLL    sll                                403 #define SLL    sll
404 #define SLLV   sllv                               404 #define SLLV   sllv
405 #define SRLV   srlv                               405 #define SRLV   srlv
406 #define NBYTES 4                                  406 #define NBYTES 4
407 #define LOG_NBYTES 2                              407 #define LOG_NBYTES 2
408                                                   408 
409 #endif /* USE_DOUBLE */                           409 #endif /* USE_DOUBLE */
410                                                   410 
411 #ifdef CONFIG_CPU_LITTLE_ENDIAN                   411 #ifdef CONFIG_CPU_LITTLE_ENDIAN
412 #define LDFIRST LOADR                             412 #define LDFIRST LOADR
413 #define LDREST  LOADL                             413 #define LDREST  LOADL
414 #define STFIRST STORER                            414 #define STFIRST STORER
415 #define STREST  STOREL                            415 #define STREST  STOREL
416 #define SHIFT_DISCARD SLLV                        416 #define SHIFT_DISCARD SLLV
417 #define SHIFT_DISCARD_REVERT SRLV                 417 #define SHIFT_DISCARD_REVERT SRLV
418 #else                                             418 #else
419 #define LDFIRST LOADL                             419 #define LDFIRST LOADL
420 #define LDREST  LOADR                             420 #define LDREST  LOADR
421 #define STFIRST STOREL                            421 #define STFIRST STOREL
422 #define STREST  STORER                            422 #define STREST  STORER
423 #define SHIFT_DISCARD SRLV                        423 #define SHIFT_DISCARD SRLV
424 #define SHIFT_DISCARD_REVERT SLLV                 424 #define SHIFT_DISCARD_REVERT SLLV
425 #endif                                            425 #endif
426                                                   426 
427 #define FIRST(unit) ((unit)*NBYTES)               427 #define FIRST(unit) ((unit)*NBYTES)
428 #define REST(unit)  (FIRST(unit)+NBYTES-1)        428 #define REST(unit)  (FIRST(unit)+NBYTES-1)
429                                                   429 
430 #define ADDRMASK (NBYTES-1)                       430 #define ADDRMASK (NBYTES-1)
431                                                   431 
432 #ifndef CONFIG_CPU_DADDI_WORKAROUNDS              432 #ifndef CONFIG_CPU_DADDI_WORKAROUNDS
433         .set    noat                              433         .set    noat
434 #else                                             434 #else
435         .set    at=v1                             435         .set    at=v1
436 #endif                                            436 #endif
437                                                   437 
438         .macro __BUILD_CSUM_PARTIAL_COPY_USER     438         .macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to
439                                                   439 
440         li      sum, -1                           440         li      sum, -1
441         move    odd, zero                         441         move    odd, zero
442         /*                                        442         /*
443          * Note: dst & src may be unaligned, l    443          * Note: dst & src may be unaligned, len may be 0
444          * Temps                                  444          * Temps
445          */                                       445          */
446         /*                                        446         /*
447          * The "issue break"s below are very a    447          * The "issue break"s below are very approximate.
448          * Issue delays for dcache fills will     448          * Issue delays for dcache fills will perturb the schedule, as will
449          * load queue full replay traps, etc.     449          * load queue full replay traps, etc.
450          *                                        450          *
451          * If len < NBYTES use byte operations    451          * If len < NBYTES use byte operations.
452          */                                       452          */
453         sltu    t2, len, NBYTES                   453         sltu    t2, len, NBYTES
454         and     t1, dst, ADDRMASK                 454         and     t1, dst, ADDRMASK
455         bnez    t2, .Lcopy_bytes_checklen\@       455         bnez    t2, .Lcopy_bytes_checklen\@
456          and    t0, src, ADDRMASK                 456          and    t0, src, ADDRMASK
457         andi    odd, dst, 0x1                     457         andi    odd, dst, 0x1                   /* odd buffer? */
458         bnez    t1, .Ldst_unaligned\@             458         bnez    t1, .Ldst_unaligned\@
459          nop                                      459          nop
460         bnez    t0, .Lsrc_unaligned_dst_aligne    460         bnez    t0, .Lsrc_unaligned_dst_aligned\@
461         /*                                        461         /*
462          * use delay slot for fall-through        462          * use delay slot for fall-through
463          * src and dst are aligned; need to co    463          * src and dst are aligned; need to compute rem
464          */                                       464          */
465 .Lboth_aligned\@:                                 465 .Lboth_aligned\@:
466          SRL    t0, len, LOG_NBYTES+3    # +3     466          SRL    t0, len, LOG_NBYTES+3    # +3 for 8 units/iter
467         beqz    t0, .Lcleanup_both_aligned\@ #    467         beqz    t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
468          nop                                      468          nop
469         SUB     len, 8*NBYTES           # subt    469         SUB     len, 8*NBYTES           # subtract here for bgez loop
470         .align  4                                 470         .align  4
471 1:                                                471 1:
472         LOAD(t0, UNIT(0)(src))                    472         LOAD(t0, UNIT(0)(src))
473         LOAD(t1, UNIT(1)(src))                    473         LOAD(t1, UNIT(1)(src))
474         LOAD(t2, UNIT(2)(src))                    474         LOAD(t2, UNIT(2)(src))
475         LOAD(t3, UNIT(3)(src))                    475         LOAD(t3, UNIT(3)(src))
476         LOAD(t4, UNIT(4)(src))                    476         LOAD(t4, UNIT(4)(src))
477         LOAD(t5, UNIT(5)(src))                    477         LOAD(t5, UNIT(5)(src))
478         LOAD(t6, UNIT(6)(src))                    478         LOAD(t6, UNIT(6)(src))
479         LOAD(t7, UNIT(7)(src))                    479         LOAD(t7, UNIT(7)(src))
480         SUB     len, len, 8*NBYTES                480         SUB     len, len, 8*NBYTES
481         ADD     src, src, 8*NBYTES                481         ADD     src, src, 8*NBYTES
482         STORE(t0, UNIT(0)(dst))                   482         STORE(t0, UNIT(0)(dst))
483         ADDC(t0, t1)                              483         ADDC(t0, t1)
484         STORE(t1, UNIT(1)(dst))                   484         STORE(t1, UNIT(1)(dst))
485         ADDC(sum, t0)                             485         ADDC(sum, t0)
486         STORE(t2, UNIT(2)(dst))                   486         STORE(t2, UNIT(2)(dst))
487         ADDC(t2, t3)                              487         ADDC(t2, t3)
488         STORE(t3, UNIT(3)(dst))                   488         STORE(t3, UNIT(3)(dst))
489         ADDC(sum, t2)                             489         ADDC(sum, t2)
490         STORE(t4, UNIT(4)(dst))                   490         STORE(t4, UNIT(4)(dst))
491         ADDC(t4, t5)                              491         ADDC(t4, t5)
492         STORE(t5, UNIT(5)(dst))                   492         STORE(t5, UNIT(5)(dst))
493         ADDC(sum, t4)                             493         ADDC(sum, t4)
494         STORE(t6, UNIT(6)(dst))                   494         STORE(t6, UNIT(6)(dst))
495         ADDC(t6, t7)                              495         ADDC(t6, t7)
496         STORE(t7, UNIT(7)(dst))                   496         STORE(t7, UNIT(7)(dst))
497         ADDC(sum, t6)                             497         ADDC(sum, t6)
498         .set    reorder                           498         .set    reorder                         /* DADDI_WAR */
499         ADD     dst, dst, 8*NBYTES                499         ADD     dst, dst, 8*NBYTES
500         bgez    len, 1b                           500         bgez    len, 1b
501         .set    noreorder                         501         .set    noreorder
502         ADD     len, 8*NBYTES           # reve    502         ADD     len, 8*NBYTES           # revert len (see above)
503                                                   503 
504         /*                                        504         /*
505          * len == the number of bytes left to     505          * len == the number of bytes left to copy < 8*NBYTES
506          */                                       506          */
507 .Lcleanup_both_aligned\@:                         507 .Lcleanup_both_aligned\@:
508 #define rem t7                                    508 #define rem t7
509         beqz    len, .Ldone\@                     509         beqz    len, .Ldone\@
510          sltu   t0, len, 4*NBYTES                 510          sltu   t0, len, 4*NBYTES
511         bnez    t0, .Lless_than_4units\@          511         bnez    t0, .Lless_than_4units\@
512          and    rem, len, (NBYTES-1)    # rem     512          and    rem, len, (NBYTES-1)    # rem = len % NBYTES
513         /*                                        513         /*
514          * len >= 4*NBYTES                        514          * len >= 4*NBYTES
515          */                                       515          */
516         LOAD(t0, UNIT(0)(src))                    516         LOAD(t0, UNIT(0)(src))
517         LOAD(t1, UNIT(1)(src))                    517         LOAD(t1, UNIT(1)(src))
518         LOAD(t2, UNIT(2)(src))                    518         LOAD(t2, UNIT(2)(src))
519         LOAD(t3, UNIT(3)(src))                    519         LOAD(t3, UNIT(3)(src))
520         SUB     len, len, 4*NBYTES                520         SUB     len, len, 4*NBYTES
521         ADD     src, src, 4*NBYTES                521         ADD     src, src, 4*NBYTES
522         STORE(t0, UNIT(0)(dst))                   522         STORE(t0, UNIT(0)(dst))
523         ADDC(t0, t1)                              523         ADDC(t0, t1)
524         STORE(t1, UNIT(1)(dst))                   524         STORE(t1, UNIT(1)(dst))
525         ADDC(sum, t0)                             525         ADDC(sum, t0)
526         STORE(t2, UNIT(2)(dst))                   526         STORE(t2, UNIT(2)(dst))
527         ADDC(t2, t3)                              527         ADDC(t2, t3)
528         STORE(t3, UNIT(3)(dst))                   528         STORE(t3, UNIT(3)(dst))
529         ADDC(sum, t2)                             529         ADDC(sum, t2)
530         .set    reorder                           530         .set    reorder                         /* DADDI_WAR */
531         ADD     dst, dst, 4*NBYTES                531         ADD     dst, dst, 4*NBYTES
532         beqz    len, .Ldone\@                     532         beqz    len, .Ldone\@
533         .set    noreorder                         533         .set    noreorder
534 .Lless_than_4units\@:                             534 .Lless_than_4units\@:
535         /*                                        535         /*
536          * rem = len % NBYTES                     536          * rem = len % NBYTES
537          */                                       537          */
538         beq     rem, len, .Lcopy_bytes\@          538         beq     rem, len, .Lcopy_bytes\@
539          nop                                      539          nop
540 1:                                                540 1:
541         LOAD(t0, 0(src))                          541         LOAD(t0, 0(src))
542         ADD     src, src, NBYTES                  542         ADD     src, src, NBYTES
543         SUB     len, len, NBYTES                  543         SUB     len, len, NBYTES
544         STORE(t0, 0(dst))                         544         STORE(t0, 0(dst))
545         ADDC(sum, t0)                             545         ADDC(sum, t0)
546         .set    reorder                           546         .set    reorder                         /* DADDI_WAR */
547         ADD     dst, dst, NBYTES                  547         ADD     dst, dst, NBYTES
548         bne     rem, len, 1b                      548         bne     rem, len, 1b
549         .set    noreorder                         549         .set    noreorder
550                                                   550 
551         /*                                        551         /*
552          * src and dst are aligned, need to co    552          * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
553          * A loop would do only a byte at a ti    553          * A loop would do only a byte at a time with possible branch
554          * mispredicts.  Can't do an explicit     554          * mispredicts.  Can't do an explicit LOAD dst,mask,or,STORE
555          * because can't assume read-access to    555          * because can't assume read-access to dst.  Instead, use
556          * STREST dst, which doesn't require r    556          * STREST dst, which doesn't require read access to dst.
557          *                                        557          *
558          * This code should perform better tha    558          * This code should perform better than a simple loop on modern,
559          * wide-issue mips processors because     559          * wide-issue mips processors because the code has fewer branches and
560          * more instruction-level parallelism.    560          * more instruction-level parallelism.
561          */                                       561          */
562 #define bits t2                                   562 #define bits t2
563         beqz    len, .Ldone\@                     563         beqz    len, .Ldone\@
564          ADD    t1, dst, len    # t1 is just p    564          ADD    t1, dst, len    # t1 is just past last byte of dst
565         li      bits, 8*NBYTES                    565         li      bits, 8*NBYTES
566         SLL     rem, len, 3     # rem = number    566         SLL     rem, len, 3     # rem = number of bits to keep
567         LOAD(t0, 0(src))                          567         LOAD(t0, 0(src))
568         SUB     bits, bits, rem # bits = numbe    568         SUB     bits, bits, rem # bits = number of bits to discard
569         SHIFT_DISCARD t0, t0, bits                569         SHIFT_DISCARD t0, t0, bits
570         STREST(t0, -1(t1))                        570         STREST(t0, -1(t1))
571         SHIFT_DISCARD_REVERT t0, t0, bits         571         SHIFT_DISCARD_REVERT t0, t0, bits
572         .set reorder                              572         .set reorder
573         ADDC(sum, t0)                             573         ADDC(sum, t0)
574         b       .Ldone\@                          574         b       .Ldone\@
575         .set noreorder                            575         .set noreorder
576 .Ldst_unaligned\@:                                576 .Ldst_unaligned\@:
577         /*                                        577         /*
578          * dst is unaligned                       578          * dst is unaligned
579          * t0 = src & ADDRMASK                    579          * t0 = src & ADDRMASK
580          * t1 = dst & ADDRMASK; T1 > 0            580          * t1 = dst & ADDRMASK; T1 > 0
581          * len >= NBYTES                          581          * len >= NBYTES
582          *                                        582          *
583          * Copy enough bytes to align dst         583          * Copy enough bytes to align dst
584          * Set match = (src and dst have same     584          * Set match = (src and dst have same alignment)
585          */                                       585          */
586 #define match rem                                 586 #define match rem
587         LDFIRST(t3, FIRST(0)(src))                587         LDFIRST(t3, FIRST(0)(src))
588         ADD     t2, zero, NBYTES                  588         ADD     t2, zero, NBYTES
589         LDREST(t3, REST(0)(src))                  589         LDREST(t3, REST(0)(src))
590         SUB     t2, t2, t1      # t2 = number     590         SUB     t2, t2, t1      # t2 = number of bytes copied
591         xor     match, t0, t1                     591         xor     match, t0, t1
592         STFIRST(t3, FIRST(0)(dst))                592         STFIRST(t3, FIRST(0)(dst))
593         SLL     t4, t1, 3               # t4 =    593         SLL     t4, t1, 3               # t4 = number of bits to discard
594         SHIFT_DISCARD t3, t3, t4                  594         SHIFT_DISCARD t3, t3, t4
595         /* no SHIFT_DISCARD_REVERT to handle o    595         /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
596         ADDC(sum, t3)                             596         ADDC(sum, t3)
597         beq     len, t2, .Ldone\@                 597         beq     len, t2, .Ldone\@
598          SUB    len, len, t2                      598          SUB    len, len, t2
599         ADD     dst, dst, t2                      599         ADD     dst, dst, t2
600         beqz    match, .Lboth_aligned\@           600         beqz    match, .Lboth_aligned\@
601          ADD    src, src, t2                      601          ADD    src, src, t2
602                                                   602 
603 .Lsrc_unaligned_dst_aligned\@:                    603 .Lsrc_unaligned_dst_aligned\@:
604         SRL     t0, len, LOG_NBYTES+2    # +2     604         SRL     t0, len, LOG_NBYTES+2    # +2 for 4 units/iter
605         beqz    t0, .Lcleanup_src_unaligned\@     605         beqz    t0, .Lcleanup_src_unaligned\@
606          and    rem, len, (4*NBYTES-1)   # rem    606          and    rem, len, (4*NBYTES-1)   # rem = len % 4*NBYTES
607 1:                                                607 1:
608 /*                                                608 /*
609  * Avoid consecutive LD*'s to the same registe    609  * Avoid consecutive LD*'s to the same register since some mips
610  * implementations can't issue them in the sam    610  * implementations can't issue them in the same cycle.
611  * It's OK to load FIRST(N+1) before REST(N) b    611  * It's OK to load FIRST(N+1) before REST(N) because the two addresses
612  * are to the same unit (unless src is aligned    612  * are to the same unit (unless src is aligned, but it's not).
613  */                                               613  */
614         LDFIRST(t0, FIRST(0)(src))                614         LDFIRST(t0, FIRST(0)(src))
615         LDFIRST(t1, FIRST(1)(src))                615         LDFIRST(t1, FIRST(1)(src))
616         SUB     len, len, 4*NBYTES                616         SUB     len, len, 4*NBYTES
617         LDREST(t0, REST(0)(src))                  617         LDREST(t0, REST(0)(src))
618         LDREST(t1, REST(1)(src))                  618         LDREST(t1, REST(1)(src))
619         LDFIRST(t2, FIRST(2)(src))                619         LDFIRST(t2, FIRST(2)(src))
620         LDFIRST(t3, FIRST(3)(src))                620         LDFIRST(t3, FIRST(3)(src))
621         LDREST(t2, REST(2)(src))                  621         LDREST(t2, REST(2)(src))
622         LDREST(t3, REST(3)(src))                  622         LDREST(t3, REST(3)(src))
623         ADD     src, src, 4*NBYTES                623         ADD     src, src, 4*NBYTES
624 #ifdef CONFIG_CPU_SB1                             624 #ifdef CONFIG_CPU_SB1
625         nop                             # impr    625         nop                             # improves slotting
626 #endif                                            626 #endif
627         STORE(t0, UNIT(0)(dst))                   627         STORE(t0, UNIT(0)(dst))
628         ADDC(t0, t1)                              628         ADDC(t0, t1)
629         STORE(t1, UNIT(1)(dst))                   629         STORE(t1, UNIT(1)(dst))
630         ADDC(sum, t0)                             630         ADDC(sum, t0)
631         STORE(t2, UNIT(2)(dst))                   631         STORE(t2, UNIT(2)(dst))
632         ADDC(t2, t3)                              632         ADDC(t2, t3)
633         STORE(t3, UNIT(3)(dst))                   633         STORE(t3, UNIT(3)(dst))
634         ADDC(sum, t2)                             634         ADDC(sum, t2)
635         .set    reorder                           635         .set    reorder                         /* DADDI_WAR */
636         ADD     dst, dst, 4*NBYTES                636         ADD     dst, dst, 4*NBYTES
637         bne     len, rem, 1b                      637         bne     len, rem, 1b
638         .set    noreorder                         638         .set    noreorder
639                                                   639 
640 .Lcleanup_src_unaligned\@:                        640 .Lcleanup_src_unaligned\@:
641         beqz    len, .Ldone\@                     641         beqz    len, .Ldone\@
642          and    rem, len, NBYTES-1  # rem = le    642          and    rem, len, NBYTES-1  # rem = len % NBYTES
643         beq     rem, len, .Lcopy_bytes\@          643         beq     rem, len, .Lcopy_bytes\@
644          nop                                      644          nop
645 1:                                                645 1:
646         LDFIRST(t0, FIRST(0)(src))                646         LDFIRST(t0, FIRST(0)(src))
647         LDREST(t0, REST(0)(src))                  647         LDREST(t0, REST(0)(src))
648         ADD     src, src, NBYTES                  648         ADD     src, src, NBYTES
649         SUB     len, len, NBYTES                  649         SUB     len, len, NBYTES
650         STORE(t0, 0(dst))                         650         STORE(t0, 0(dst))
651         ADDC(sum, t0)                             651         ADDC(sum, t0)
652         .set    reorder                           652         .set    reorder                         /* DADDI_WAR */
653         ADD     dst, dst, NBYTES                  653         ADD     dst, dst, NBYTES
654         bne     len, rem, 1b                      654         bne     len, rem, 1b
655         .set    noreorder                         655         .set    noreorder
656                                                   656 
657 .Lcopy_bytes_checklen\@:                          657 .Lcopy_bytes_checklen\@:
658         beqz    len, .Ldone\@                     658         beqz    len, .Ldone\@
659          nop                                      659          nop
660 .Lcopy_bytes\@:                                   660 .Lcopy_bytes\@:
661         /* 0 < len < NBYTES  */                   661         /* 0 < len < NBYTES  */
662 #ifdef CONFIG_CPU_LITTLE_ENDIAN                   662 #ifdef CONFIG_CPU_LITTLE_ENDIAN
663 #define SHIFT_START 0                             663 #define SHIFT_START 0
664 #define SHIFT_INC 8                               664 #define SHIFT_INC 8
665 #else                                             665 #else
666 #define SHIFT_START 8*(NBYTES-1)                  666 #define SHIFT_START 8*(NBYTES-1)
667 #define SHIFT_INC -8                              667 #define SHIFT_INC -8
668 #endif                                            668 #endif
669         move    t2, zero        # partial word    669         move    t2, zero        # partial word
670         li      t3, SHIFT_START # shift           670         li      t3, SHIFT_START # shift
671 #define COPY_BYTE(N)                    \         671 #define COPY_BYTE(N)                    \
672         LOADBU(t0, N(src));             \         672         LOADBU(t0, N(src));             \
673         SUB     len, len, 1;            \         673         SUB     len, len, 1;            \
674         STOREB(t0, N(dst));             \         674         STOREB(t0, N(dst));             \
675         SLLV    t0, t0, t3;             \         675         SLLV    t0, t0, t3;             \
676         addu    t3, SHIFT_INC;          \         676         addu    t3, SHIFT_INC;          \
677         beqz    len, .Lcopy_bytes_done\@; \       677         beqz    len, .Lcopy_bytes_done\@; \
678          or     t2, t0                            678          or     t2, t0
679                                                   679 
680         COPY_BYTE(0)                              680         COPY_BYTE(0)
681         COPY_BYTE(1)                              681         COPY_BYTE(1)
682 #ifdef USE_DOUBLE                                 682 #ifdef USE_DOUBLE
683         COPY_BYTE(2)                              683         COPY_BYTE(2)
684         COPY_BYTE(3)                              684         COPY_BYTE(3)
685         COPY_BYTE(4)                              685         COPY_BYTE(4)
686         COPY_BYTE(5)                              686         COPY_BYTE(5)
687 #endif                                            687 #endif
688         LOADBU(t0, NBYTES-2(src))                 688         LOADBU(t0, NBYTES-2(src))
689         SUB     len, len, 1                       689         SUB     len, len, 1
690         STOREB(t0, NBYTES-2(dst))                 690         STOREB(t0, NBYTES-2(dst))
691         SLLV    t0, t0, t3                        691         SLLV    t0, t0, t3
692         or      t2, t0                            692         or      t2, t0
693 .Lcopy_bytes_done\@:                              693 .Lcopy_bytes_done\@:
694         ADDC(sum, t2)                             694         ADDC(sum, t2)
695 .Ldone\@:                                         695 .Ldone\@:
696         /* fold checksum */                       696         /* fold checksum */
697         .set    push                              697         .set    push
698         .set    noat                              698         .set    noat
699 #ifdef USE_DOUBLE                                 699 #ifdef USE_DOUBLE
700         dsll32  v1, sum, 0                        700         dsll32  v1, sum, 0
701         daddu   sum, v1                           701         daddu   sum, v1
702         sltu    v1, sum, v1                       702         sltu    v1, sum, v1
703         dsra32  sum, sum, 0                       703         dsra32  sum, sum, 0
704         addu    sum, v1                           704         addu    sum, v1
705 #endif                                            705 #endif
706                                                   706 
707 #if defined(CONFIG_CPU_MIPSR2) || defined(CONF    707 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
708     defined(CONFIG_CPU_LOONGSON64)                708     defined(CONFIG_CPU_LOONGSON64)
709         .set    push                              709         .set    push
710         .set    arch=mips32r2                     710         .set    arch=mips32r2
711         wsbh    v1, sum                           711         wsbh    v1, sum
712         movn    sum, v1, odd                      712         movn    sum, v1, odd
713         .set    pop                               713         .set    pop
714 #else                                             714 #else
715         beqz    odd, 1f                 /* odd    715         beqz    odd, 1f                 /* odd buffer alignment? */
716          lui    v1, 0x00ff                        716          lui    v1, 0x00ff
717         addu    v1, 0x00ff                        717         addu    v1, 0x00ff
718         and     t0, sum, v1                       718         and     t0, sum, v1
719         sll     t0, t0, 8                         719         sll     t0, t0, 8
720         srl     sum, sum, 8                       720         srl     sum, sum, 8
721         and     sum, sum, v1                      721         and     sum, sum, v1
722         or      sum, sum, t0                      722         or      sum, sum, t0
723 1:                                                723 1:
724 #endif                                            724 #endif
725         .set    pop                               725         .set    pop
726         .set reorder                              726         .set reorder
727         jr      ra                                727         jr      ra
728         .set noreorder                            728         .set noreorder
729         .endm                                     729         .endm
730                                                   730 
731         .set noreorder                            731         .set noreorder
732 .L_exc:                                           732 .L_exc:
733         jr      ra                                733         jr      ra
734          li     v0, 0                             734          li     v0, 0
735                                                   735 
736 FEXPORT(__csum_partial_copy_nocheck)              736 FEXPORT(__csum_partial_copy_nocheck)
737 EXPORT_SYMBOL(__csum_partial_copy_nocheck)        737 EXPORT_SYMBOL(__csum_partial_copy_nocheck)
738 #ifndef CONFIG_EVA                                738 #ifndef CONFIG_EVA
739 FEXPORT(__csum_partial_copy_to_user)              739 FEXPORT(__csum_partial_copy_to_user)
740 EXPORT_SYMBOL(__csum_partial_copy_to_user)        740 EXPORT_SYMBOL(__csum_partial_copy_to_user)
741 FEXPORT(__csum_partial_copy_from_user)            741 FEXPORT(__csum_partial_copy_from_user)
742 EXPORT_SYMBOL(__csum_partial_copy_from_user)      742 EXPORT_SYMBOL(__csum_partial_copy_from_user)
743 #endif                                            743 #endif
744 __BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USE    744 __BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP
745                                                   745 
746 #ifdef CONFIG_EVA                                 746 #ifdef CONFIG_EVA
747 LEAF(__csum_partial_copy_to_user)                 747 LEAF(__csum_partial_copy_to_user)
748 __BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNEL    748 __BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNELOP USEROP
749 END(__csum_partial_copy_to_user)                  749 END(__csum_partial_copy_to_user)
750                                                   750 
751 LEAF(__csum_partial_copy_from_user)               751 LEAF(__csum_partial_copy_from_user)
752 __BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP    752 __BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP KERNELOP
753 END(__csum_partial_copy_from_user)                753 END(__csum_partial_copy_from_user)
754 #endif                                            754 #endif
~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.
TOMOYO Linux Cross Reference Linux/arch/mips/lib/csum_partial.S

Diff markup

Differences between /arch/mips/lib/csum_partial.S (Architecture m68k) and /arch/mips/lib/csum_partial.S (Architecture mips)

TOMOYO Linux Cross Reference
Linux/arch/mips/lib/csum_partial.S