1 /* SPDX-License-Identifier: GPL-2.0 */ << 2 /* U1memcpy.S: UltraSPARC-I/II/IIi/IIe optimiz 1 /* U1memcpy.S: UltraSPARC-I/II/IIi/IIe optimized memcpy. 3 * 2 * 4 * Copyright (C) 1997, 2004 David S. Miller (d 3 * Copyright (C) 1997, 2004 David S. Miller (davem@redhat.com) 5 * Copyright (C) 1996, 1997, 1998, 1999 Jakub 4 * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz) 6 */ 5 */ 7 6 8 #ifdef __KERNEL__ 7 #ifdef __KERNEL__ 9 #include <linux/export.h> << 10 #include <linux/linkage.h> << 11 #include <asm/visasm.h> 8 #include <asm/visasm.h> 12 #include <asm/asi.h> 9 #include <asm/asi.h> 13 #define GLOBAL_SPARE g7 10 #define GLOBAL_SPARE g7 14 #else 11 #else 15 #define GLOBAL_SPARE g5 12 #define GLOBAL_SPARE g5 16 #define ASI_BLK_P 0xf0 13 #define ASI_BLK_P 0xf0 17 #define FPRS_FEF 0x04 14 #define FPRS_FEF 0x04 18 #ifdef MEMCPY_DEBUG 15 #ifdef MEMCPY_DEBUG 19 #define VISEntry rd %fprs, %o5; wr %g0, FPRS_F 16 #define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ 20 clr %g1; clr %g2; clr %g3; su 17 clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0; 21 #define VISExit and %o5, FPRS_FEF, %o5; wr %o5 18 #define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs 22 #else 19 #else 23 #define VISEntry rd %fprs, %o5; wr %g0, FPRS_F 20 #define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs 24 #define VISExit and %o5, FPRS_FEF, %o5; wr %o5 21 #define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs 25 #endif 22 #endif 26 #endif 23 #endif 27 24 28 #ifndef EX_LD 25 #ifndef EX_LD 29 #define EX_LD(x,y) x !! 26 #define EX_LD(x) x 30 #endif << 31 #ifndef EX_LD_FP << 32 #define EX_LD_FP(x,y) x << 33 #endif 27 #endif 34 28 35 #ifndef EX_ST 29 #ifndef EX_ST 36 #define EX_ST(x,y) x !! 30 #define EX_ST(x) x 37 #endif 31 #endif 38 #ifndef EX_ST_FP !! 32 39 #define EX_ST_FP(x,y) x !! 33 #ifndef EX_RETVAL >> 34 #define EX_RETVAL(x) x 40 #endif 35 #endif 41 36 42 #ifndef LOAD 37 #ifndef LOAD 43 #define LOAD(type,addr,dest) type [addr], d 38 #define LOAD(type,addr,dest) type [addr], dest 44 #endif 39 #endif 45 40 46 #ifndef LOAD_BLK 41 #ifndef LOAD_BLK 47 #define LOAD_BLK(addr,dest) ldda [addr] AS 42 #define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_P, dest 48 #endif 43 #endif 49 44 50 #ifndef STORE 45 #ifndef STORE 51 #define STORE(type,src,addr) type src, [add 46 #define STORE(type,src,addr) type src, [addr] 52 #endif 47 #endif 53 48 54 #ifndef STORE_BLK 49 #ifndef STORE_BLK 55 #define STORE_BLK(src,addr) stda src, [add 50 #define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_P 56 #endif 51 #endif 57 52 58 #ifndef FUNC_NAME 53 #ifndef FUNC_NAME 59 #define FUNC_NAME memcpy 54 #define FUNC_NAME memcpy 60 #endif 55 #endif 61 56 62 #ifndef PREAMBLE 57 #ifndef PREAMBLE 63 #define PREAMBLE 58 #define PREAMBLE 64 #endif 59 #endif 65 60 66 #ifndef XCC 61 #ifndef XCC 67 #define XCC xcc 62 #define XCC xcc 68 #endif 63 #endif 69 64 70 #define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, 65 #define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9) \ 71 faligndata %f1, %f2, %f48 66 faligndata %f1, %f2, %f48; \ 72 faligndata %f2, %f3, %f50 67 faligndata %f2, %f3, %f50; \ 73 faligndata %f3, %f4, %f52 68 faligndata %f3, %f4, %f52; \ 74 faligndata %f4, %f5, %f54 69 faligndata %f4, %f5, %f54; \ 75 faligndata %f5, %f6, %f56 70 faligndata %f5, %f6, %f56; \ 76 faligndata %f6, %f7, %f58 71 faligndata %f6, %f7, %f58; \ 77 faligndata %f7, %f8, %f60 72 faligndata %f7, %f8, %f60; \ 78 faligndata %f8, %f9, %f62 73 faligndata %f8, %f9, %f62; 79 74 80 #define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc !! 75 #define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ 81 EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs !! 76 EX_LD(LOAD_BLK(%src, %fdest)); \ 82 EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_g !! 77 EX_ST(STORE_BLK(%fsrc, %dest)); \ 83 add %src, 0x40, %s !! 78 add %src, 0x40, %src; \ 84 subcc %GLOBAL_SPARE, !! 79 subcc %len, 0x40, %len; \ 85 be,pn %xcc, jmptgt; !! 80 be,pn %xcc, jmptgt; \ 86 add %dest, 0x40, % !! 81 add %dest, 0x40, %dest; \ 87 !! 82 88 #define LOOP_CHUNK1(src, dest, branch_dest) !! 83 #define LOOP_CHUNK1(src, dest, len, branch_dest) \ 89 MAIN_LOOP_CHUNK(src, dest, f0, f48, b !! 84 MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) 90 #define LOOP_CHUNK2(src, dest, branch_dest) !! 85 #define LOOP_CHUNK2(src, dest, len, branch_dest) \ 91 MAIN_LOOP_CHUNK(src, dest, f16, f48, b !! 86 MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) 92 #define LOOP_CHUNK3(src, dest, branch_dest) !! 87 #define LOOP_CHUNK3(src, dest, len, branch_dest) \ 93 MAIN_LOOP_CHUNK(src, dest, f32, f48, b !! 88 MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) 94 89 95 #define DO_SYNC membar #Sync; 90 #define DO_SYNC membar #Sync; 96 #define STORE_SYNC(dest, fsrc) 91 #define STORE_SYNC(dest, fsrc) \ 97 EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_g !! 92 EX_ST(STORE_BLK(%fsrc, %dest)); \ 98 add %dest, 0x40, % 93 add %dest, 0x40, %dest; \ 99 DO_SYNC 94 DO_SYNC 100 95 101 #define STORE_JUMP(dest, fsrc, target) 96 #define STORE_JUMP(dest, fsrc, target) \ 102 EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_g !! 97 EX_ST(STORE_BLK(%fsrc, %dest)); \ 103 add %dest, 0x40, % 98 add %dest, 0x40, %dest; \ 104 ba,pt %xcc, target; 99 ba,pt %xcc, target; \ 105 nop; 100 nop; 106 101 107 #define FINISH_VISCHUNK(dest, f0, f1) !! 102 #define FINISH_VISCHUNK(dest, f0, f1, left) \ 108 subcc %g3, 8, %g3; !! 103 subcc %left, 8, %left;\ 109 bl,pn %xcc, 95f; !! 104 bl,pn %xcc, 95f; \ 110 faligndata %f0, %f1, %f48 !! 105 faligndata %f0, %f1, %f48; \ 111 EX_ST_FP(STORE(std, %f48, %dest), U1_g !! 106 EX_ST(STORE(std, %f48, %dest)); \ 112 add %dest, 8, %des 107 add %dest, 8, %dest; 113 108 114 #define UNEVEN_VISCHUNK_LAST(dest, f0, f1) !! 109 #define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ 115 subcc %g3, 8, %g3; !! 110 subcc %left, 8, %left; \ 116 bl,pn %xcc, 95f; !! 111 bl,pn %xcc, 95f; \ 117 fsrc2 %f0, %f1; !! 112 fsrc1 %f0, %f1; 118 113 119 #define UNEVEN_VISCHUNK(dest, f0, f1) !! 114 #define UNEVEN_VISCHUNK(dest, f0, f1, left) \ 120 UNEVEN_VISCHUNK_LAST(dest, f0, f1) !! 115 UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ 121 ba,a,pt %xcc, 93f; 116 ba,a,pt %xcc, 93f; 122 117 123 .register %g2,#scratch 118 .register %g2,#scratch 124 .register %g3,#scratch 119 .register %g3,#scratch 125 120 126 .text 121 .text 127 #ifndef EX_RETVAL << 128 #define EX_RETVAL(x) x << 129 ENTRY(U1_g1_1_fp) << 130 VISExitHalf << 131 add %g1, 1, %g1 << 132 add %g1, %g2, %g1 << 133 retl << 134 add %g1, %o2, %o0 << 135 ENDPROC(U1_g1_1_fp) << 136 ENTRY(U1_g2_0_fp) << 137 VISExitHalf << 138 retl << 139 add %g2, %o2, %o0 << 140 ENDPROC(U1_g2_0_fp) << 141 ENTRY(U1_g2_8_fp) << 142 VISExitHalf << 143 add %g2, 8, %g2 << 144 retl << 145 add %g2, %o2, %o0 << 146 ENDPROC(U1_g2_8_fp) << 147 ENTRY(U1_gs_0_fp) << 148 VISExitHalf << 149 add %GLOBAL_SPARE, %g3, %o << 150 retl << 151 add %o0, %o2, %o0 << 152 ENDPROC(U1_gs_0_fp) << 153 ENTRY(U1_gs_80_fp) << 154 VISExitHalf << 155 add %GLOBAL_SPARE, 0x80, % << 156 add %GLOBAL_SPARE, %g3, %o << 157 retl << 158 add %o0, %o2, %o0 << 159 ENDPROC(U1_gs_80_fp) << 160 ENTRY(U1_gs_40_fp) << 161 VISExitHalf << 162 add %GLOBAL_SPARE, 0x40, % << 163 add %GLOBAL_SPARE, %g3, %o << 164 retl << 165 add %o0, %o2, %o0 << 166 ENDPROC(U1_gs_40_fp) << 167 ENTRY(U1_g3_0_fp) << 168 VISExitHalf << 169 retl << 170 add %g3, %o2, %o0 << 171 ENDPROC(U1_g3_0_fp) << 172 ENTRY(U1_g3_8_fp) << 173 VISExitHalf << 174 add %g3, 8, %g3 << 175 retl << 176 add %g3, %o2, %o0 << 177 ENDPROC(U1_g3_8_fp) << 178 ENTRY(U1_o2_0_fp) << 179 VISExitHalf << 180 retl << 181 mov %o2, %o0 << 182 ENDPROC(U1_o2_0_fp) << 183 ENTRY(U1_o2_1_fp) << 184 VISExitHalf << 185 retl << 186 add %o2, 1, %o0 << 187 ENDPROC(U1_o2_1_fp) << 188 ENTRY(U1_gs_0) << 189 VISExitHalf << 190 retl << 191 add %GLOBAL_SPARE, %o2, %o << 192 ENDPROC(U1_gs_0) << 193 ENTRY(U1_gs_8) << 194 VISExitHalf << 195 add %GLOBAL_SPARE, %o2, %G << 196 retl << 197 add %GLOBAL_SPARE, 0x8, %o << 198 ENDPROC(U1_gs_8) << 199 ENTRY(U1_gs_10) << 200 VISExitHalf << 201 add %GLOBAL_SPARE, %o2, %G << 202 retl << 203 add %GLOBAL_SPARE, 0x10, % << 204 ENDPROC(U1_gs_10) << 205 ENTRY(U1_o2_0) << 206 retl << 207 mov %o2, %o0 << 208 ENDPROC(U1_o2_0) << 209 ENTRY(U1_o2_8) << 210 retl << 211 add %o2, 8, %o0 << 212 ENDPROC(U1_o2_8) << 213 ENTRY(U1_o2_4) << 214 retl << 215 add %o2, 4, %o0 << 216 ENDPROC(U1_o2_4) << 217 ENTRY(U1_o2_1) << 218 retl << 219 add %o2, 1, %o0 << 220 ENDPROC(U1_o2_1) << 221 ENTRY(U1_g1_0) << 222 retl << 223 add %g1, %o2, %o0 << 224 ENDPROC(U1_g1_0) << 225 ENTRY(U1_g1_1) << 226 add %g1, 1, %g1 << 227 retl << 228 add %g1, %o2, %o0 << 229 ENDPROC(U1_g1_1) << 230 ENTRY(U1_gs_0_o2_adj) << 231 and %o2, 7, %o2 << 232 retl << 233 add %GLOBAL_SPARE, %o2, %o << 234 ENDPROC(U1_gs_0_o2_adj) << 235 ENTRY(U1_gs_8_o2_adj) << 236 and %o2, 7, %o2 << 237 add %GLOBAL_SPARE, 8, %GLO << 238 retl << 239 add %GLOBAL_SPARE, %o2, %o << 240 ENDPROC(U1_gs_8_o2_adj) << 241 #endif << 242 << 243 .align 64 122 .align 64 244 123 245 .globl FUNC_NAME 124 .globl FUNC_NAME 246 .type FUNC_NAME,#function 125 .type FUNC_NAME,#function 247 FUNC_NAME: /* %o0=dst, %o1=src, % 126 FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 248 srlx %o2, 31, %g2 127 srlx %o2, 31, %g2 249 cmp %g2, 0 128 cmp %g2, 0 250 tne %xcc, 5 129 tne %xcc, 5 251 PREAMBLE 130 PREAMBLE 252 mov %o0, %o4 131 mov %o0, %o4 253 cmp %o2, 0 132 cmp %o2, 0 254 be,pn %XCC, 85f 133 be,pn %XCC, 85f 255 or %o0, %o1, %o3 134 or %o0, %o1, %o3 256 cmp %o2, 16 135 cmp %o2, 16 257 blu,a,pn %XCC, 80f 136 blu,a,pn %XCC, 80f 258 or %o3, %o2, %o3 137 or %o3, %o2, %o3 259 138 260 cmp %o2, (5 * 64) 139 cmp %o2, (5 * 64) 261 blu,pt %XCC, 70f 140 blu,pt %XCC, 70f 262 andcc %o3, 0x7, %g0 141 andcc %o3, 0x7, %g0 263 142 264 /* Clobbers o5/g1/g2/g3/g7/icc/xcc. * 143 /* Clobbers o5/g1/g2/g3/g7/icc/xcc. */ 265 VISEntry 144 VISEntry 266 145 267 /* Is 'dst' already aligned on an 64-b 146 /* Is 'dst' already aligned on an 64-byte boundary? */ 268 andcc %o0, 0x3f, %g2 147 andcc %o0, 0x3f, %g2 269 be,pt %XCC, 2f 148 be,pt %XCC, 2f 270 149 271 /* Compute abs((dst & 0x3f) - 0x40) in 150 /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number 272 * of bytes to copy to make 'dst' 64-b 151 * of bytes to copy to make 'dst' 64-byte aligned. We pre- 273 * subtract this from 'len'. 152 * subtract this from 'len'. 274 */ 153 */ 275 sub %o0, %o1, %GLOBAL_SPAR 154 sub %o0, %o1, %GLOBAL_SPARE 276 sub %g2, 0x40, %g2 155 sub %g2, 0x40, %g2 277 sub %g0, %g2, %g2 156 sub %g0, %g2, %g2 278 sub %o2, %g2, %o2 157 sub %o2, %g2, %o2 279 andcc %g2, 0x7, %g1 158 andcc %g2, 0x7, %g1 280 be,pt %icc, 2f 159 be,pt %icc, 2f 281 and %g2, 0x38, %g2 160 and %g2, 0x38, %g2 282 161 283 1: subcc %g1, 0x1, %g1 162 1: subcc %g1, 0x1, %g1 284 EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), !! 163 EX_LD(LOAD(ldub, %o1 + 0x00, %o3)) 285 EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL !! 164 EX_ST(STORE(stb, %o3, %o1 + %GLOBAL_SPARE)) 286 bgu,pt %XCC, 1b 165 bgu,pt %XCC, 1b 287 add %o1, 0x1, %o1 166 add %o1, 0x1, %o1 288 167 289 add %o1, %GLOBAL_SPARE, %o 168 add %o1, %GLOBAL_SPARE, %o0 290 169 291 2: cmp %g2, 0x0 170 2: cmp %g2, 0x0 292 and %o1, 0x7, %g1 171 and %o1, 0x7, %g1 293 be,pt %icc, 3f 172 be,pt %icc, 3f 294 alignaddr %o1, %g0, %o1 173 alignaddr %o1, %g0, %o1 295 174 296 EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_ !! 175 EX_LD(LOAD(ldd, %o1, %f4)) 297 1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1 !! 176 1: EX_LD(LOAD(ldd, %o1 + 0x8, %f6)) 298 add %o1, 0x8, %o1 177 add %o1, 0x8, %o1 299 subcc %g2, 0x8, %g2 178 subcc %g2, 0x8, %g2 300 faligndata %f4, %f6, %f0 179 faligndata %f4, %f6, %f0 301 EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8 !! 180 EX_ST(STORE(std, %f0, %o0)) 302 be,pn %icc, 3f 181 be,pn %icc, 3f 303 add %o0, 0x8, %o0 182 add %o0, 0x8, %o0 304 183 305 EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1 !! 184 EX_LD(LOAD(ldd, %o1 + 0x8, %f4)) 306 add %o1, 0x8, %o1 185 add %o1, 0x8, %o1 307 subcc %g2, 0x8, %g2 186 subcc %g2, 0x8, %g2 308 faligndata %f6, %f4, %f0 187 faligndata %f6, %f4, %f0 309 EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8 !! 188 EX_ST(STORE(std, %f0, %o0)) 310 bne,pt %icc, 1b 189 bne,pt %icc, 1b 311 add %o0, 0x8, %o0 190 add %o0, 0x8, %o0 312 191 313 /* Destination is 64-byte aligned. */ 192 /* Destination is 64-byte aligned. */ 314 3: 193 3: 315 membar #LoadStore | #StoreS 194 membar #LoadStore | #StoreStore | #StoreLoad 316 195 317 subcc %o2, 0x40, %GLOBAL_SPA 196 subcc %o2, 0x40, %GLOBAL_SPARE 318 add %o1, %g1, %g1 197 add %o1, %g1, %g1 319 andncc %GLOBAL_SPARE, (0x40 - 198 andncc %GLOBAL_SPARE, (0x40 - 1), %GLOBAL_SPARE 320 srl %g1, 3, %g2 199 srl %g1, 3, %g2 321 sub %o2, %GLOBAL_SPARE, %g 200 sub %o2, %GLOBAL_SPARE, %g3 322 andn %o1, (0x40 - 1), %o1 201 andn %o1, (0x40 - 1), %o1 323 and %g2, 7, %g2 202 and %g2, 7, %g2 324 andncc %g3, 0x7, %g3 203 andncc %g3, 0x7, %g3 325 fsrc2 %f0, %f2 !! 204 fmovd %f0, %f2 326 sub %g3, 0x8, %g3 205 sub %g3, 0x8, %g3 327 sub %o2, %GLOBAL_SPARE, %o 206 sub %o2, %GLOBAL_SPARE, %o2 328 207 329 add %g1, %GLOBAL_SPARE, %g 208 add %g1, %GLOBAL_SPARE, %g1 330 subcc %o2, %g3, %o2 209 subcc %o2, %g3, %o2 331 210 332 EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_f !! 211 EX_LD(LOAD_BLK(%o1, %f0)) 333 add %o1, 0x40, %o1 212 add %o1, 0x40, %o1 334 add %g1, %g3, %g1 213 add %g1, %g3, %g1 335 EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_ !! 214 EX_LD(LOAD_BLK(%o1, %f16)) 336 add %o1, 0x40, %o1 215 add %o1, 0x40, %o1 337 sub %GLOBAL_SPARE, 0x80, % 216 sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE 338 EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80 !! 217 EX_LD(LOAD_BLK(%o1, %f32)) 339 add %o1, 0x40, %o1 218 add %o1, 0x40, %o1 340 219 341 /* There are 8 instances of the unroll 220 /* There are 8 instances of the unrolled loop, 342 * one for each possible alignment of 221 * one for each possible alignment of the 343 * source buffer. Each loop instance 222 * source buffer. Each loop instance is 452 344 * bytes. 223 * bytes. 345 */ 224 */ 346 sll %g2, 3, %o3 225 sll %g2, 3, %o3 347 sub %o3, %g2, %o3 226 sub %o3, %g2, %o3 348 sllx %o3, 4, %o3 227 sllx %o3, 4, %o3 349 add %o3, %g2, %o3 228 add %o3, %g2, %o3 350 sllx %o3, 2, %g2 229 sllx %o3, 2, %g2 351 1: rd %pc, %o3 230 1: rd %pc, %o3 352 add %o3, %lo(1f - 1b), %o3 231 add %o3, %lo(1f - 1b), %o3 353 jmpl %o3 + %g2, %g0 232 jmpl %o3 + %g2, %g0 354 nop 233 nop 355 234 356 .align 64 235 .align 64 357 1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12, 236 1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 358 LOOP_CHUNK1(o1, o0, 1f) !! 237 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 359 FREG_FROB(f16,f18,f20,f22,f24,f26,f28, 238 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 360 LOOP_CHUNK2(o1, o0, 2f) !! 239 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 361 FREG_FROB(f32,f34,f36,f38,f40,f42,f44, 240 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 362 LOOP_CHUNK3(o1, o0, 3f) !! 241 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 363 ba,pt %xcc, 1b+4 242 ba,pt %xcc, 1b+4 364 faligndata %f0, %f2, %f48 243 faligndata %f0, %f2, %f48 365 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28, 244 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 366 STORE_SYNC(o0, f48) 245 STORE_SYNC(o0, f48) 367 FREG_FROB(f32,f34,f36,f38,f40,f42,f44, 246 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 368 STORE_JUMP(o0, f48, 40f) 247 STORE_JUMP(o0, f48, 40f) 369 2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44, 248 2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 370 STORE_SYNC(o0, f48) 249 STORE_SYNC(o0, f48) 371 FREG_FROB(f0, f2, f4, f6, f8, f10,f12, 250 FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 372 STORE_JUMP(o0, f48, 48f) 251 STORE_JUMP(o0, f48, 48f) 373 3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12, 252 3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 374 STORE_SYNC(o0, f48) 253 STORE_SYNC(o0, f48) 375 FREG_FROB(f16,f18,f20,f22,f24,f26,f28, 254 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 376 STORE_JUMP(o0, f48, 56f) 255 STORE_JUMP(o0, f48, 56f) 377 256 378 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14, 257 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 379 LOOP_CHUNK1(o1, o0, 1f) !! 258 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 380 FREG_FROB(f18,f20,f22,f24,f26,f28,f30, 259 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 381 LOOP_CHUNK2(o1, o0, 2f) !! 260 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 382 FREG_FROB(f34,f36,f38,f40,f42,f44,f46, 261 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 383 LOOP_CHUNK3(o1, o0, 3f) !! 262 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 384 ba,pt %xcc, 1b+4 263 ba,pt %xcc, 1b+4 385 faligndata %f2, %f4, %f48 264 faligndata %f2, %f4, %f48 386 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30, 265 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 387 STORE_SYNC(o0, f48) 266 STORE_SYNC(o0, f48) 388 FREG_FROB(f34,f36,f38,f40,f42,f44,f46, 267 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 389 STORE_JUMP(o0, f48, 41f) 268 STORE_JUMP(o0, f48, 41f) 390 2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46, 269 2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 391 STORE_SYNC(o0, f48) 270 STORE_SYNC(o0, f48) 392 FREG_FROB(f2, f4, f6, f8, f10,f12,f14, 271 FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 393 STORE_JUMP(o0, f48, 49f) 272 STORE_JUMP(o0, f48, 49f) 394 3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14, 273 3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 395 STORE_SYNC(o0, f48) 274 STORE_SYNC(o0, f48) 396 FREG_FROB(f18,f20,f22,f24,f26,f28,f30, 275 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 397 STORE_JUMP(o0, f48, 57f) 276 STORE_JUMP(o0, f48, 57f) 398 277 399 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16, 278 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 400 LOOP_CHUNK1(o1, o0, 1f) !! 279 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 401 FREG_FROB(f20,f22,f24,f26,f28,f30,f32, 280 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 402 LOOP_CHUNK2(o1, o0, 2f) !! 281 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 403 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, 282 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 404 LOOP_CHUNK3(o1, o0, 3f) !! 283 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 405 ba,pt %xcc, 1b+4 284 ba,pt %xcc, 1b+4 406 faligndata %f4, %f6, %f48 285 faligndata %f4, %f6, %f48 407 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32, 286 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 408 STORE_SYNC(o0, f48) 287 STORE_SYNC(o0, f48) 409 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, 288 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 410 STORE_JUMP(o0, f48, 42f) 289 STORE_JUMP(o0, f48, 42f) 411 2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, 290 2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 412 STORE_SYNC(o0, f48) 291 STORE_SYNC(o0, f48) 413 FREG_FROB(f4, f6, f8, f10,f12,f14,f16, 292 FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 414 STORE_JUMP(o0, f48, 50f) 293 STORE_JUMP(o0, f48, 50f) 415 3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16, 294 3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 416 STORE_SYNC(o0, f48) 295 STORE_SYNC(o0, f48) 417 FREG_FROB(f20,f22,f24,f26,f28,f30,f32, 296 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 418 STORE_JUMP(o0, f48, 58f) 297 STORE_JUMP(o0, f48, 58f) 419 298 420 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18, 299 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 421 LOOP_CHUNK1(o1, o0, 1f) !! 300 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 422 FREG_FROB(f22,f24,f26,f28,f30,f32,f34, 301 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 423 LOOP_CHUNK2(o1, o0, 2f) !! 302 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 424 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, 303 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 425 LOOP_CHUNK3(o1, o0, 3f) !! 304 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 426 ba,pt %xcc, 1b+4 305 ba,pt %xcc, 1b+4 427 faligndata %f6, %f8, %f48 306 faligndata %f6, %f8, %f48 428 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34, 307 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 429 STORE_SYNC(o0, f48) 308 STORE_SYNC(o0, f48) 430 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, 309 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 431 STORE_JUMP(o0, f48, 43f) 310 STORE_JUMP(o0, f48, 43f) 432 2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, 311 2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 433 STORE_SYNC(o0, f48) 312 STORE_SYNC(o0, f48) 434 FREG_FROB(f6, f8, f10,f12,f14,f16,f18, 313 FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 435 STORE_JUMP(o0, f48, 51f) 314 STORE_JUMP(o0, f48, 51f) 436 3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18, 315 3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 437 STORE_SYNC(o0, f48) 316 STORE_SYNC(o0, f48) 438 FREG_FROB(f22,f24,f26,f28,f30,f32,f34, 317 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 439 STORE_JUMP(o0, f48, 59f) 318 STORE_JUMP(o0, f48, 59f) 440 319 441 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20, 320 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 442 LOOP_CHUNK1(o1, o0, 1f) !! 321 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 443 FREG_FROB(f24,f26,f28,f30,f32,f34,f36, 322 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 444 LOOP_CHUNK2(o1, o0, 2f) !! 323 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 445 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, 324 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 446 LOOP_CHUNK3(o1, o0, 3f) !! 325 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 447 ba,pt %xcc, 1b+4 326 ba,pt %xcc, 1b+4 448 faligndata %f8, %f10, %f48 327 faligndata %f8, %f10, %f48 449 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36, 328 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 450 STORE_SYNC(o0, f48) 329 STORE_SYNC(o0, f48) 451 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, 330 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 452 STORE_JUMP(o0, f48, 44f) 331 STORE_JUMP(o0, f48, 44f) 453 2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, 332 2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 454 STORE_SYNC(o0, f48) 333 STORE_SYNC(o0, f48) 455 FREG_FROB(f8, f10,f12,f14,f16,f18,f20, 334 FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 456 STORE_JUMP(o0, f48, 52f) 335 STORE_JUMP(o0, f48, 52f) 457 3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20, 336 3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 458 STORE_SYNC(o0, f48) 337 STORE_SYNC(o0, f48) 459 FREG_FROB(f24,f26,f28,f30,f32,f34,f36, 338 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 460 STORE_JUMP(o0, f48, 60f) 339 STORE_JUMP(o0, f48, 60f) 461 340 462 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22, 341 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 463 LOOP_CHUNK1(o1, o0, 1f) !! 342 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 464 FREG_FROB(f26,f28,f30,f32,f34,f36,f38, 343 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 465 LOOP_CHUNK2(o1, o0, 2f) !! 344 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 466 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, 345 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 467 LOOP_CHUNK3(o1, o0, 3f) !! 346 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 468 ba,pt %xcc, 1b+4 347 ba,pt %xcc, 1b+4 469 faligndata %f10, %f12, %f48 348 faligndata %f10, %f12, %f48 470 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38, 349 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 471 STORE_SYNC(o0, f48) 350 STORE_SYNC(o0, f48) 472 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, 351 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 473 STORE_JUMP(o0, f48, 45f) 352 STORE_JUMP(o0, f48, 45f) 474 2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, 353 2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 475 STORE_SYNC(o0, f48) 354 STORE_SYNC(o0, f48) 476 FREG_FROB(f10,f12,f14,f16,f18,f20,f22, 355 FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 477 STORE_JUMP(o0, f48, 53f) 356 STORE_JUMP(o0, f48, 53f) 478 3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22, 357 3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 479 STORE_SYNC(o0, f48) 358 STORE_SYNC(o0, f48) 480 FREG_FROB(f26,f28,f30,f32,f34,f36,f38, 359 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 481 STORE_JUMP(o0, f48, 61f) 360 STORE_JUMP(o0, f48, 61f) 482 361 483 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24, 362 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 484 LOOP_CHUNK1(o1, o0, 1f) !! 363 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 485 FREG_FROB(f28,f30,f32,f34,f36,f38,f40, 364 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 486 LOOP_CHUNK2(o1, o0, 2f) !! 365 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 487 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, 366 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 488 LOOP_CHUNK3(o1, o0, 3f) !! 367 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 489 ba,pt %xcc, 1b+4 368 ba,pt %xcc, 1b+4 490 faligndata %f12, %f14, %f48 369 faligndata %f12, %f14, %f48 491 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40, 370 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 492 STORE_SYNC(o0, f48) 371 STORE_SYNC(o0, f48) 493 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, 372 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 494 STORE_JUMP(o0, f48, 46f) 373 STORE_JUMP(o0, f48, 46f) 495 2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, 374 2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 496 STORE_SYNC(o0, f48) 375 STORE_SYNC(o0, f48) 497 FREG_FROB(f12,f14,f16,f18,f20,f22,f24, 376 FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 498 STORE_JUMP(o0, f48, 54f) 377 STORE_JUMP(o0, f48, 54f) 499 3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24, 378 3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 500 STORE_SYNC(o0, f48) 379 STORE_SYNC(o0, f48) 501 FREG_FROB(f28,f30,f32,f34,f36,f38,f40, 380 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 502 STORE_JUMP(o0, f48, 62f) 381 STORE_JUMP(o0, f48, 62f) 503 382 504 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26, 383 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 505 LOOP_CHUNK1(o1, o0, 1f) !! 384 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 506 FREG_FROB(f30,f32,f34,f36,f38,f40,f42, 385 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 507 LOOP_CHUNK2(o1, o0, 2f) !! 386 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 508 FREG_FROB(f46,f0, f2, f4, f6, f8, f10, 387 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 509 LOOP_CHUNK3(o1, o0, 3f) !! 388 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 510 ba,pt %xcc, 1b+4 389 ba,pt %xcc, 1b+4 511 faligndata %f14, %f16, %f48 390 faligndata %f14, %f16, %f48 512 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42, 391 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 513 STORE_SYNC(o0, f48) 392 STORE_SYNC(o0, f48) 514 FREG_FROB(f46,f0, f2, f4, f6, f8, f10, 393 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 515 STORE_JUMP(o0, f48, 47f) 394 STORE_JUMP(o0, f48, 47f) 516 2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10, 395 2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 517 STORE_SYNC(o0, f48) 396 STORE_SYNC(o0, f48) 518 FREG_FROB(f14,f16,f18,f20,f22,f24,f26, 397 FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 519 STORE_JUMP(o0, f48, 55f) 398 STORE_JUMP(o0, f48, 55f) 520 3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26, 399 3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 521 STORE_SYNC(o0, f48) 400 STORE_SYNC(o0, f48) 522 FREG_FROB(f30,f32,f34,f36,f38,f40,f42, 401 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 523 STORE_JUMP(o0, f48, 63f) 402 STORE_JUMP(o0, f48, 63f) 524 403 525 40: FINISH_VISCHUNK(o0, f0, f2) !! 404 40: FINISH_VISCHUNK(o0, f0, f2, g3) 526 41: FINISH_VISCHUNK(o0, f2, f4) !! 405 41: FINISH_VISCHUNK(o0, f2, f4, g3) 527 42: FINISH_VISCHUNK(o0, f4, f6) !! 406 42: FINISH_VISCHUNK(o0, f4, f6, g3) 528 43: FINISH_VISCHUNK(o0, f6, f8) !! 407 43: FINISH_VISCHUNK(o0, f6, f8, g3) 529 44: FINISH_VISCHUNK(o0, f8, f10) !! 408 44: FINISH_VISCHUNK(o0, f8, f10, g3) 530 45: FINISH_VISCHUNK(o0, f10, f12) !! 409 45: FINISH_VISCHUNK(o0, f10, f12, g3) 531 46: FINISH_VISCHUNK(o0, f12, f14) !! 410 46: FINISH_VISCHUNK(o0, f12, f14, g3) 532 47: UNEVEN_VISCHUNK(o0, f14, f0) !! 411 47: UNEVEN_VISCHUNK(o0, f14, f0, g3) 533 48: FINISH_VISCHUNK(o0, f16, f18) !! 412 48: FINISH_VISCHUNK(o0, f16, f18, g3) 534 49: FINISH_VISCHUNK(o0, f18, f20) !! 413 49: FINISH_VISCHUNK(o0, f18, f20, g3) 535 50: FINISH_VISCHUNK(o0, f20, f22) !! 414 50: FINISH_VISCHUNK(o0, f20, f22, g3) 536 51: FINISH_VISCHUNK(o0, f22, f24) !! 415 51: FINISH_VISCHUNK(o0, f22, f24, g3) 537 52: FINISH_VISCHUNK(o0, f24, f26) !! 416 52: FINISH_VISCHUNK(o0, f24, f26, g3) 538 53: FINISH_VISCHUNK(o0, f26, f28) !! 417 53: FINISH_VISCHUNK(o0, f26, f28, g3) 539 54: FINISH_VISCHUNK(o0, f28, f30) !! 418 54: FINISH_VISCHUNK(o0, f28, f30, g3) 540 55: UNEVEN_VISCHUNK(o0, f30, f0) !! 419 55: UNEVEN_VISCHUNK(o0, f30, f0, g3) 541 56: FINISH_VISCHUNK(o0, f32, f34) !! 420 56: FINISH_VISCHUNK(o0, f32, f34, g3) 542 57: FINISH_VISCHUNK(o0, f34, f36) !! 421 57: FINISH_VISCHUNK(o0, f34, f36, g3) 543 58: FINISH_VISCHUNK(o0, f36, f38) !! 422 58: FINISH_VISCHUNK(o0, f36, f38, g3) 544 59: FINISH_VISCHUNK(o0, f38, f40) !! 423 59: FINISH_VISCHUNK(o0, f38, f40, g3) 545 60: FINISH_VISCHUNK(o0, f40, f42) !! 424 60: FINISH_VISCHUNK(o0, f40, f42, g3) 546 61: FINISH_VISCHUNK(o0, f42, f44) !! 425 61: FINISH_VISCHUNK(o0, f42, f44, g3) 547 62: FINISH_VISCHUNK(o0, f44, f46) !! 426 62: FINISH_VISCHUNK(o0, f44, f46, g3) 548 63: UNEVEN_VISCHUNK_LAST(o0, f46, f0) !! 427 63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) 549 428 550 93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_ !! 429 93: EX_LD(LOAD(ldd, %o1, %f2)) 551 add %o1, 8, %o1 430 add %o1, 8, %o1 552 subcc %g3, 8, %g3 431 subcc %g3, 8, %g3 553 faligndata %f0, %f2, %f8 432 faligndata %f0, %f2, %f8 554 EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8 !! 433 EX_ST(STORE(std, %f8, %o0)) 555 bl,pn %xcc, 95f 434 bl,pn %xcc, 95f 556 add %o0, 8, %o0 435 add %o0, 8, %o0 557 EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_ !! 436 EX_LD(LOAD(ldd, %o1, %f0)) 558 add %o1, 8, %o1 437 add %o1, 8, %o1 559 subcc %g3, 8, %g3 438 subcc %g3, 8, %g3 560 faligndata %f2, %f0, %f8 439 faligndata %f2, %f0, %f8 561 EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8 !! 440 EX_ST(STORE(std, %f8, %o0)) 562 bge,pt %xcc, 93b 441 bge,pt %xcc, 93b 563 add %o0, 8, %o0 442 add %o0, 8, %o0 564 443 565 95: brz,pt %o2, 2f 444 95: brz,pt %o2, 2f 566 mov %g1, %o1 445 mov %g1, %o1 567 446 568 1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0 !! 447 1: EX_LD(LOAD(ldub, %o1, %o3)) 569 add %o1, 1, %o1 448 add %o1, 1, %o1 570 subcc %o2, 1, %o2 449 subcc %o2, 1, %o2 571 EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1 !! 450 EX_ST(STORE(stb, %o3, %o0)) 572 bne,pt %xcc, 1b 451 bne,pt %xcc, 1b 573 add %o0, 1, %o0 452 add %o0, 1, %o0 574 453 575 2: membar #StoreLoad | #StoreSto 454 2: membar #StoreLoad | #StoreStore 576 VISExit 455 VISExit 577 retl 456 retl 578 mov EX_RETVAL(%o4), %o0 457 mov EX_RETVAL(%o4), %o0 579 458 580 .align 64 459 .align 64 581 70: /* 16 < len <= (5 * 64) */ 460 70: /* 16 < len <= (5 * 64) */ 582 bne,pn %XCC, 75f 461 bne,pn %XCC, 75f 583 sub %o0, %o1, %o3 462 sub %o0, %o1, %o3 584 463 585 72: andn %o2, 0xf, %GLOBAL_SPAR 464 72: andn %o2, 0xf, %GLOBAL_SPARE 586 and %o2, 0xf, %o2 465 and %o2, 0xf, %o2 587 1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_g !! 466 1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) 588 EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_g !! 467 EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) 589 subcc %GLOBAL_SPARE, 0x10, % 468 subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE 590 EX_ST(STORE(stx, %o5, %o1 + %o3), U1_g !! 469 EX_ST(STORE(stx, %o5, %o1 + %o3)) 591 add %o1, 0x8, %o1 470 add %o1, 0x8, %o1 592 EX_ST(STORE(stx, %g1, %o1 + %o3), U1_g !! 471 EX_ST(STORE(stx, %g1, %o1 + %o3)) 593 bgu,pt %XCC, 1b 472 bgu,pt %XCC, 1b 594 add %o1, 0x8, %o1 473 add %o1, 0x8, %o1 595 73: andcc %o2, 0x8, %g0 474 73: andcc %o2, 0x8, %g0 596 be,pt %XCC, 1f 475 be,pt %XCC, 1f 597 nop 476 nop 598 EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0) !! 477 EX_LD(LOAD(ldx, %o1, %o5)) 599 sub %o2, 0x8, %o2 478 sub %o2, 0x8, %o2 600 EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o !! 479 EX_ST(STORE(stx, %o5, %o1 + %o3)) 601 add %o1, 0x8, %o1 480 add %o1, 0x8, %o1 602 1: andcc %o2, 0x4, %g0 481 1: andcc %o2, 0x4, %g0 603 be,pt %XCC, 1f 482 be,pt %XCC, 1f 604 nop 483 nop 605 EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0) !! 484 EX_LD(LOAD(lduw, %o1, %o5)) 606 sub %o2, 0x4, %o2 485 sub %o2, 0x4, %o2 607 EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o !! 486 EX_ST(STORE(stw, %o5, %o1 + %o3)) 608 add %o1, 0x4, %o1 487 add %o1, 0x4, %o1 609 1: cmp %o2, 0 488 1: cmp %o2, 0 610 be,pt %XCC, 85f 489 be,pt %XCC, 85f 611 nop 490 nop 612 ba,pt %xcc, 90f 491 ba,pt %xcc, 90f 613 nop 492 nop 614 493 615 75: andcc %o0, 0x7, %g1 494 75: andcc %o0, 0x7, %g1 616 sub %g1, 0x8, %g1 495 sub %g1, 0x8, %g1 617 be,pn %icc, 2f 496 be,pn %icc, 2f 618 sub %g0, %g1, %g1 497 sub %g0, %g1, %g1 619 sub %o2, %g1, %o2 498 sub %o2, %g1, %o2 620 499 621 1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0) !! 500 1: EX_LD(LOAD(ldub, %o1, %o5)) 622 subcc %g1, 1, %g1 501 subcc %g1, 1, %g1 623 EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g !! 502 EX_ST(STORE(stb, %o5, %o1 + %o3)) 624 bgu,pt %icc, 1b 503 bgu,pt %icc, 1b 625 add %o1, 1, %o1 504 add %o1, 1, %o1 626 505 627 2: add %o1, %o3, %o0 506 2: add %o1, %o3, %o0 628 andcc %o1, 0x7, %g1 507 andcc %o1, 0x7, %g1 629 bne,pt %icc, 8f 508 bne,pt %icc, 8f 630 sll %g1, 3, %g1 509 sll %g1, 3, %g1 631 510 632 cmp %o2, 16 511 cmp %o2, 16 633 bgeu,pt %icc, 72b 512 bgeu,pt %icc, 72b 634 nop 513 nop 635 ba,a,pt %xcc, 73b 514 ba,a,pt %xcc, 73b 636 515 637 8: mov 64, %o3 516 8: mov 64, %o3 638 andn %o1, 0x7, %o1 517 andn %o1, 0x7, %o1 639 EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0) !! 518 EX_LD(LOAD(ldx, %o1, %g2)) 640 sub %o3, %g1, %o3 519 sub %o3, %g1, %o3 641 andn %o2, 0x7, %GLOBAL_SPAR 520 andn %o2, 0x7, %GLOBAL_SPARE 642 sllx %g2, %g1, %g2 521 sllx %g2, %g1, %g2 643 1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs !! 522 1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) 644 subcc %GLOBAL_SPARE, 0x8, %G 523 subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE 645 add %o1, 0x8, %o1 524 add %o1, 0x8, %o1 646 srlx %g3, %o3, %o5 525 srlx %g3, %o3, %o5 647 or %o5, %g2, %o5 526 or %o5, %g2, %o5 648 EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2 !! 527 EX_ST(STORE(stx, %o5, %o0)) 649 add %o0, 0x8, %o0 528 add %o0, 0x8, %o0 650 bgu,pt %icc, 1b 529 bgu,pt %icc, 1b 651 sllx %g3, %g1, %g2 530 sllx %g3, %g1, %g2 652 531 653 srl %g1, 3, %g1 532 srl %g1, 3, %g1 654 andcc %o2, 0x7, %o2 533 andcc %o2, 0x7, %o2 655 be,pn %icc, 85f 534 be,pn %icc, 85f 656 add %o1, %g1, %o1 535 add %o1, %g1, %o1 657 ba,pt %xcc, 90f 536 ba,pt %xcc, 90f 658 sub %o0, %o1, %o3 537 sub %o0, %o1, %o3 659 538 660 .align 64 539 .align 64 661 80: /* 0 < len <= 16 */ 540 80: /* 0 < len <= 16 */ 662 andcc %o3, 0x3, %g0 541 andcc %o3, 0x3, %g0 663 bne,pn %XCC, 90f 542 bne,pn %XCC, 90f 664 sub %o0, %o1, %o3 543 sub %o0, %o1, %o3 665 544 666 1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0) !! 545 1: EX_LD(LOAD(lduw, %o1, %g1)) 667 subcc %o2, 4, %o2 546 subcc %o2, 4, %o2 668 EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o !! 547 EX_ST(STORE(stw, %g1, %o1 + %o3)) 669 bgu,pt %XCC, 1b 548 bgu,pt %XCC, 1b 670 add %o1, 4, %o1 549 add %o1, 4, %o1 671 550 672 85: retl 551 85: retl 673 mov EX_RETVAL(%o4), %o0 552 mov EX_RETVAL(%o4), %o0 674 553 675 .align 32 554 .align 32 676 90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0) !! 555 90: EX_LD(LOAD(ldub, %o1, %g1)) 677 subcc %o2, 1, %o2 556 subcc %o2, 1, %o2 678 EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o !! 557 EX_ST(STORE(stb, %g1, %o1 + %o3)) 679 bgu,pt %XCC, 90b 558 bgu,pt %XCC, 90b 680 add %o1, 1, %o1 559 add %o1, 1, %o1 681 retl 560 retl 682 mov EX_RETVAL(%o4), %o0 561 mov EX_RETVAL(%o4), %o0 683 562 684 .size FUNC_NAME, .-FUNC_NAME 563 .size FUNC_NAME, .-FUNC_NAME 685 EXPORT_SYMBOL(FUNC_NAME) <<
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.