1 /* SPDX-License-Identifier: GPL-2.0 */ << 2 /* NG4memcpy.S: Niagara-4 optimized memcpy. 1 /* NG4memcpy.S: Niagara-4 optimized memcpy. 3 * 2 * 4 * Copyright (C) 2012 David S. Miller (davem@d 3 * Copyright (C) 2012 David S. Miller (davem@davemloft.net) 5 */ 4 */ 6 5 7 #ifdef __KERNEL__ 6 #ifdef __KERNEL__ 8 #include <linux/linkage.h> 7 #include <linux/linkage.h> 9 #include <asm/visasm.h> 8 #include <asm/visasm.h> 10 #include <asm/asi.h> 9 #include <asm/asi.h> 11 #define GLOBAL_SPARE %g7 10 #define GLOBAL_SPARE %g7 12 #else 11 #else 13 #define ASI_BLK_INIT_QUAD_LDD_P 0xe2 12 #define ASI_BLK_INIT_QUAD_LDD_P 0xe2 14 #define FPRS_FEF 0x04 13 #define FPRS_FEF 0x04 15 14 16 /* On T4 it is very expensive to access ASRs l 15 /* On T4 it is very expensive to access ASRs like %fprs and 17 * %asi, avoiding a read or a write can save ~ 16 * %asi, avoiding a read or a write can save ~50 cycles. 18 */ 17 */ 19 #define FPU_ENTER \ 18 #define FPU_ENTER \ 20 rd %fprs, %o5; \ 19 rd %fprs, %o5; \ 21 andcc %o5, FPRS_FEF, %g0; \ 20 andcc %o5, FPRS_FEF, %g0; \ 22 be,a,pn %icc, 999f; \ 21 be,a,pn %icc, 999f; \ 23 wr %g0, FPRS_FEF, %fprs; \ 22 wr %g0, FPRS_FEF, %fprs; \ 24 999: 23 999: 25 24 26 #ifdef MEMCPY_DEBUG 25 #ifdef MEMCPY_DEBUG 27 #define VISEntryHalf FPU_ENTER; \ 26 #define VISEntryHalf FPU_ENTER; \ 28 clr %g1; clr %g2; clr %g3 27 clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0; 29 #define VISExitHalf and %o5, FPRS_FEF, %o5; wr 28 #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs 30 #else 29 #else 31 #define VISEntryHalf FPU_ENTER 30 #define VISEntryHalf FPU_ENTER 32 #define VISExitHalf and %o5, FPRS_FEF, %o5; wr 31 #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs 33 #endif 32 #endif 34 33 35 #define GLOBAL_SPARE %g5 34 #define GLOBAL_SPARE %g5 36 #endif 35 #endif 37 36 38 #ifndef STORE_ASI 37 #ifndef STORE_ASI 39 #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA 38 #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA 40 #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_ 39 #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P 41 #else 40 #else 42 #define STORE_ASI 0x80 /* ASI 41 #define STORE_ASI 0x80 /* ASI_P */ 43 #endif 42 #endif 44 #endif 43 #endif 45 44 46 #if !defined(EX_LD) && !defined(EX_ST) 45 #if !defined(EX_LD) && !defined(EX_ST) 47 #define NON_USER_COPY 46 #define NON_USER_COPY 48 #endif 47 #endif 49 48 50 #ifndef EX_LD 49 #ifndef EX_LD 51 #define EX_LD(x,y) x 50 #define EX_LD(x,y) x 52 #endif 51 #endif 53 #ifndef EX_LD_FP 52 #ifndef EX_LD_FP 54 #define EX_LD_FP(x,y) x 53 #define EX_LD_FP(x,y) x 55 #endif 54 #endif 56 55 57 #ifndef EX_ST 56 #ifndef EX_ST 58 #define EX_ST(x,y) x 57 #define EX_ST(x,y) x 59 #endif 58 #endif 60 #ifndef EX_ST_FP 59 #ifndef EX_ST_FP 61 #define EX_ST_FP(x,y) x 60 #define EX_ST_FP(x,y) x 62 #endif 61 #endif 63 62 64 63 65 #ifndef LOAD 64 #ifndef LOAD 66 #define LOAD(type,addr,dest) type [addr], d 65 #define LOAD(type,addr,dest) type [addr], dest 67 #endif 66 #endif 68 67 69 #ifndef STORE 68 #ifndef STORE 70 #ifndef MEMCPY_DEBUG 69 #ifndef MEMCPY_DEBUG 71 #define STORE(type,src,addr) type src, [add 70 #define STORE(type,src,addr) type src, [addr] 72 #else 71 #else 73 #define STORE(type,src,addr) type##a src, [ 72 #define STORE(type,src,addr) type##a src, [addr] %asi 74 #endif 73 #endif 75 #endif 74 #endif 76 75 77 #ifndef STORE_INIT 76 #ifndef STORE_INIT 78 #define STORE_INIT(src,addr) stxa src, [add 77 #define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI 79 #endif 78 #endif 80 79 81 #ifndef FUNC_NAME 80 #ifndef FUNC_NAME 82 #define FUNC_NAME NG4memcpy 81 #define FUNC_NAME NG4memcpy 83 #endif 82 #endif 84 #ifndef PREAMBLE 83 #ifndef PREAMBLE 85 #define PREAMBLE 84 #define PREAMBLE 86 #endif 85 #endif 87 86 88 #ifndef XCC 87 #ifndef XCC 89 #define XCC xcc 88 #define XCC xcc 90 #endif 89 #endif 91 90 92 .register %g2,#scratch 91 .register %g2,#scratch 93 .register %g3,#scratch 92 .register %g3,#scratch 94 93 95 .text 94 .text 96 #ifndef EX_RETVAL 95 #ifndef EX_RETVAL 97 #define EX_RETVAL(x) x 96 #define EX_RETVAL(x) x >> 97 __restore_asi_fp: >> 98 VISExitHalf >> 99 __restore_asi: >> 100 retl >> 101 wr %g0, ASI_AIUS, %asi >> 102 >> 103 ENTRY(NG4_retl_o2) >> 104 ba,pt %xcc, __restore_asi >> 105 mov %o2, %o0 >> 106 ENDPROC(NG4_retl_o2) >> 107 ENTRY(NG4_retl_o2_plus_1) >> 108 ba,pt %xcc, __restore_asi >> 109 add %o2, 1, %o0 >> 110 ENDPROC(NG4_retl_o2_plus_1) >> 111 ENTRY(NG4_retl_o2_plus_4) >> 112 ba,pt %xcc, __restore_asi >> 113 add %o2, 4, %o0 >> 114 ENDPROC(NG4_retl_o2_plus_4) >> 115 ENTRY(NG4_retl_o2_plus_o5) >> 116 ba,pt %xcc, __restore_asi >> 117 add %o2, %o5, %o0 >> 118 ENDPROC(NG4_retl_o2_plus_o5) >> 119 ENTRY(NG4_retl_o2_plus_o5_plus_4) >> 120 add %o5, 4, %o5 >> 121 ba,pt %xcc, __restore_asi >> 122 add %o2, %o5, %o0 >> 123 ENDPROC(NG4_retl_o2_plus_o5_plus_4) >> 124 ENTRY(NG4_retl_o2_plus_o5_plus_8) >> 125 add %o5, 8, %o5 >> 126 ba,pt %xcc, __restore_asi >> 127 add %o2, %o5, %o0 >> 128 ENDPROC(NG4_retl_o2_plus_o5_plus_8) >> 129 ENTRY(NG4_retl_o2_plus_o5_plus_16) >> 130 add %o5, 16, %o5 >> 131 ba,pt %xcc, __restore_asi >> 132 add %o2, %o5, %o0 >> 133 ENDPROC(NG4_retl_o2_plus_o5_plus_16) >> 134 ENTRY(NG4_retl_o2_plus_o5_plus_24) >> 135 add %o5, 24, %o5 >> 136 ba,pt %xcc, __restore_asi >> 137 add %o2, %o5, %o0 >> 138 ENDPROC(NG4_retl_o2_plus_o5_plus_24) >> 139 ENTRY(NG4_retl_o2_plus_o5_plus_32) >> 140 add %o5, 32, %o5 >> 141 ba,pt %xcc, __restore_asi >> 142 add %o2, %o5, %o0 >> 143 ENDPROC(NG4_retl_o2_plus_o5_plus_32) >> 144 ENTRY(NG4_retl_o2_plus_g1) >> 145 ba,pt %xcc, __restore_asi >> 146 add %o2, %g1, %o0 >> 147 ENDPROC(NG4_retl_o2_plus_g1) >> 148 ENTRY(NG4_retl_o2_plus_g1_plus_1) >> 149 add %g1, 1, %g1 >> 150 ba,pt %xcc, __restore_asi >> 151 add %o2, %g1, %o0 >> 152 ENDPROC(NG4_retl_o2_plus_g1_plus_1) >> 153 ENTRY(NG4_retl_o2_plus_g1_plus_8) >> 154 add %g1, 8, %g1 >> 155 ba,pt %xcc, __restore_asi >> 156 add %o2, %g1, %o0 >> 157 ENDPROC(NG4_retl_o2_plus_g1_plus_8) >> 158 ENTRY(NG4_retl_o2_plus_o4) >> 159 ba,pt %xcc, __restore_asi >> 160 add %o2, %o4, %o0 >> 161 ENDPROC(NG4_retl_o2_plus_o4) >> 162 ENTRY(NG4_retl_o2_plus_o4_plus_8) >> 163 add %o4, 8, %o4 >> 164 ba,pt %xcc, __restore_asi >> 165 add %o2, %o4, %o0 >> 166 ENDPROC(NG4_retl_o2_plus_o4_plus_8) >> 167 ENTRY(NG4_retl_o2_plus_o4_plus_16) >> 168 add %o4, 16, %o4 >> 169 ba,pt %xcc, __restore_asi >> 170 add %o2, %o4, %o0 >> 171 ENDPROC(NG4_retl_o2_plus_o4_plus_16) >> 172 ENTRY(NG4_retl_o2_plus_o4_plus_24) >> 173 add %o4, 24, %o4 >> 174 ba,pt %xcc, __restore_asi >> 175 add %o2, %o4, %o0 >> 176 ENDPROC(NG4_retl_o2_plus_o4_plus_24) >> 177 ENTRY(NG4_retl_o2_plus_o4_plus_32) >> 178 add %o4, 32, %o4 >> 179 ba,pt %xcc, __restore_asi >> 180 add %o2, %o4, %o0 >> 181 ENDPROC(NG4_retl_o2_plus_o4_plus_32) >> 182 ENTRY(NG4_retl_o2_plus_o4_plus_40) >> 183 add %o4, 40, %o4 >> 184 ba,pt %xcc, __restore_asi >> 185 add %o2, %o4, %o0 >> 186 ENDPROC(NG4_retl_o2_plus_o4_plus_40) >> 187 ENTRY(NG4_retl_o2_plus_o4_plus_48) >> 188 add %o4, 48, %o4 >> 189 ba,pt %xcc, __restore_asi >> 190 add %o2, %o4, %o0 >> 191 ENDPROC(NG4_retl_o2_plus_o4_plus_48) >> 192 ENTRY(NG4_retl_o2_plus_o4_plus_56) >> 193 add %o4, 56, %o4 >> 194 ba,pt %xcc, __restore_asi >> 195 add %o2, %o4, %o0 >> 196 ENDPROC(NG4_retl_o2_plus_o4_plus_56) >> 197 ENTRY(NG4_retl_o2_plus_o4_plus_64) >> 198 add %o4, 64, %o4 >> 199 ba,pt %xcc, __restore_asi >> 200 add %o2, %o4, %o0 >> 201 ENDPROC(NG4_retl_o2_plus_o4_plus_64) >> 202 ENTRY(NG4_retl_o2_plus_o4_fp) >> 203 ba,pt %xcc, __restore_asi_fp >> 204 add %o2, %o4, %o0 >> 205 ENDPROC(NG4_retl_o2_plus_o4_fp) >> 206 ENTRY(NG4_retl_o2_plus_o4_plus_8_fp) >> 207 add %o4, 8, %o4 >> 208 ba,pt %xcc, __restore_asi_fp >> 209 add %o2, %o4, %o0 >> 210 ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp) >> 211 ENTRY(NG4_retl_o2_plus_o4_plus_16_fp) >> 212 add %o4, 16, %o4 >> 213 ba,pt %xcc, __restore_asi_fp >> 214 add %o2, %o4, %o0 >> 215 ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp) >> 216 ENTRY(NG4_retl_o2_plus_o4_plus_24_fp) >> 217 add %o4, 24, %o4 >> 218 ba,pt %xcc, __restore_asi_fp >> 219 add %o2, %o4, %o0 >> 220 ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp) >> 221 ENTRY(NG4_retl_o2_plus_o4_plus_32_fp) >> 222 add %o4, 32, %o4 >> 223 ba,pt %xcc, __restore_asi_fp >> 224 add %o2, %o4, %o0 >> 225 ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp) >> 226 ENTRY(NG4_retl_o2_plus_o4_plus_40_fp) >> 227 add %o4, 40, %o4 >> 228 ba,pt %xcc, __restore_asi_fp >> 229 add %o2, %o4, %o0 >> 230 ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp) >> 231 ENTRY(NG4_retl_o2_plus_o4_plus_48_fp) >> 232 add %o4, 48, %o4 >> 233 ba,pt %xcc, __restore_asi_fp >> 234 add %o2, %o4, %o0 >> 235 ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp) >> 236 ENTRY(NG4_retl_o2_plus_o4_plus_56_fp) >> 237 add %o4, 56, %o4 >> 238 ba,pt %xcc, __restore_asi_fp >> 239 add %o2, %o4, %o0 >> 240 ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp) >> 241 ENTRY(NG4_retl_o2_plus_o4_plus_64_fp) >> 242 add %o4, 64, %o4 >> 243 ba,pt %xcc, __restore_asi_fp >> 244 add %o2, %o4, %o0 >> 245 ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp) 98 #endif 246 #endif 99 .align 64 247 .align 64 100 248 101 .globl FUNC_NAME 249 .globl FUNC_NAME 102 .type FUNC_NAME,#function 250 .type FUNC_NAME,#function 103 FUNC_NAME: /* %o0=dst, %o1=src, %o2=len * 251 FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 104 #ifdef MEMCPY_DEBUG 252 #ifdef MEMCPY_DEBUG 105 wr %g0, 0x80, %asi 253 wr %g0, 0x80, %asi 106 #endif 254 #endif 107 srlx %o2, 31, %g2 255 srlx %o2, 31, %g2 108 cmp %g2, 0 256 cmp %g2, 0 109 tne %XCC, 5 257 tne %XCC, 5 110 PREAMBLE 258 PREAMBLE 111 mov %o0, %o3 259 mov %o0, %o3 112 brz,pn %o2, .Lexit 260 brz,pn %o2, .Lexit 113 cmp %o2, 3 261 cmp %o2, 3 114 ble,pn %icc, .Ltiny 262 ble,pn %icc, .Ltiny 115 cmp %o2, 19 263 cmp %o2, 19 116 ble,pn %icc, .Lsmall 264 ble,pn %icc, .Lsmall 117 or %o0, %o1, %g2 265 or %o0, %o1, %g2 118 cmp %o2, 128 266 cmp %o2, 128 119 bl,pn %icc, .Lmedium 267 bl,pn %icc, .Lmedium 120 nop 268 nop 121 269 122 .Llarge:/* len >= 0x80 */ 270 .Llarge:/* len >= 0x80 */ 123 /* First get dest 8 byte aligned. */ 271 /* First get dest 8 byte aligned. */ 124 sub %g0, %o0, %g1 272 sub %g0, %o0, %g1 125 and %g1, 0x7, %g1 273 and %g1, 0x7, %g1 126 brz,pt %g1, 51f 274 brz,pt %g1, 51f 127 sub %o2, %g1, %o2 275 sub %o2, %g1, %o2 128 276 129 277 130 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), mem !! 278 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) 131 add %o1, 1, %o1 279 add %o1, 1, %o1 132 subcc %g1, 1, %g1 280 subcc %g1, 1, %g1 133 add %o0, 1, %o0 281 add %o0, 1, %o0 134 bne,pt %icc, 1b 282 bne,pt %icc, 1b 135 EX_ST(STORE(stb, %g2, %o0 - 0x01), me !! 283 EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) 136 284 137 51: LOAD(prefetch, %o1 + 0x040, #n_reads_s 285 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) 138 LOAD(prefetch, %o1 + 0x080, #n_reads_s 286 LOAD(prefetch, %o1 + 0x080, #n_reads_strong) 139 LOAD(prefetch, %o1 + 0x0c0, #n_reads_s 287 LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong) 140 LOAD(prefetch, %o1 + 0x100, #n_reads_s 288 LOAD(prefetch, %o1 + 0x100, #n_reads_strong) 141 LOAD(prefetch, %o1 + 0x140, #n_reads_s 289 LOAD(prefetch, %o1 + 0x140, #n_reads_strong) 142 LOAD(prefetch, %o1 + 0x180, #n_reads_s 290 LOAD(prefetch, %o1 + 0x180, #n_reads_strong) 143 LOAD(prefetch, %o1 + 0x1c0, #n_reads_s 291 LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong) 144 LOAD(prefetch, %o1 + 0x200, #n_reads_s 292 LOAD(prefetch, %o1 + 0x200, #n_reads_strong) 145 293 146 /* Check if we can use the straight fu 294 /* Check if we can use the straight fully aligned 147 * loop, or we require the alignaddr/f 295 * loop, or we require the alignaddr/faligndata variant. 148 */ 296 */ 149 andcc %o1, 0x7, %o5 297 andcc %o1, 0x7, %o5 150 bne,pn %icc, .Llarge_src_unal 298 bne,pn %icc, .Llarge_src_unaligned 151 sub %g0, %o0, %g1 299 sub %g0, %o0, %g1 152 300 153 /* Legitimize the use of initializing 301 /* Legitimize the use of initializing stores by getting dest 154 * to be 64-byte aligned. 302 * to be 64-byte aligned. 155 */ 303 */ 156 and %g1, 0x3f, %g1 304 and %g1, 0x3f, %g1 157 brz,pt %g1, .Llarge_aligned 305 brz,pt %g1, .Llarge_aligned 158 sub %o2, %g1, %o2 306 sub %o2, %g1, %o2 159 307 160 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), memc !! 308 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) 161 add %o1, 8, %o1 309 add %o1, 8, %o1 162 subcc %g1, 8, %g1 310 subcc %g1, 8, %g1 163 add %o0, 8, %o0 311 add %o0, 8, %o0 164 bne,pt %icc, 1b 312 bne,pt %icc, 1b 165 EX_ST(STORE(stx, %g2, %o0 - 0x08), me !! 313 EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8) 166 314 167 .Llarge_aligned: 315 .Llarge_aligned: 168 /* len >= 0x80 && src 8-byte aligned & 316 /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ 169 andn %o2, 0x3f, %o4 317 andn %o2, 0x3f, %o4 170 sub %o2, %o4, %o2 318 sub %o2, %o4, %o2 171 319 172 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memc !! 320 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4) 173 add %o1, 0x40, %o1 321 add %o1, 0x40, %o1 174 EX_LD(LOAD(ldx, %o1 - 0x38, %g2), memc !! 322 EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4) 175 subcc %o4, 0x40, %o4 323 subcc %o4, 0x40, %o4 176 EX_LD(LOAD(ldx, %o1 - 0x30, %g3), memc !! 324 EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64) 177 EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPA !! 325 EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64) 178 EX_LD(LOAD(ldx, %o1 - 0x20, %o5), memc !! 326 EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64) 179 EX_ST(STORE_INIT(%g1, %o0), memcpy_ret !! 327 EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64) 180 add %o0, 0x08, %o0 328 add %o0, 0x08, %o0 181 EX_ST(STORE_INIT(%g2, %o0), memcpy_ret !! 329 EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56) 182 add %o0, 0x08, %o0 330 add %o0, 0x08, %o0 183 EX_LD(LOAD(ldx, %o1 - 0x18, %g2), memc !! 331 EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48) 184 EX_ST(STORE_INIT(%g3, %o0), memcpy_ret !! 332 EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48) 185 add %o0, 0x08, %o0 333 add %o0, 0x08, %o0 186 EX_LD(LOAD(ldx, %o1 - 0x10, %g3), memc !! 334 EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40) 187 EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), m !! 335 EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40) 188 add %o0, 0x08, %o0 336 add %o0, 0x08, %o0 189 EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPA !! 337 EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32) 190 EX_ST(STORE_INIT(%o5, %o0), memcpy_ret !! 338 EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32) 191 add %o0, 0x08, %o0 339 add %o0, 0x08, %o0 192 EX_ST(STORE_INIT(%g2, %o0), memcpy_ret !! 340 EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24) 193 add %o0, 0x08, %o0 341 add %o0, 0x08, %o0 194 EX_ST(STORE_INIT(%g3, %o0), memcpy_ret !! 342 EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16) 195 add %o0, 0x08, %o0 343 add %o0, 0x08, %o0 196 EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), m !! 344 EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8) 197 add %o0, 0x08, %o0 345 add %o0, 0x08, %o0 198 bne,pt %icc, 1b 346 bne,pt %icc, 1b 199 LOAD(prefetch, %o1 + 0x200, #n_reads_ 347 LOAD(prefetch, %o1 + 0x200, #n_reads_strong) 200 348 201 membar #StoreLoad | #StoreSto 349 membar #StoreLoad | #StoreStore 202 350 203 brz,pn %o2, .Lexit 351 brz,pn %o2, .Lexit 204 cmp %o2, 19 352 cmp %o2, 19 205 ble,pn %icc, .Lsmall_unaligne 353 ble,pn %icc, .Lsmall_unaligned 206 nop 354 nop 207 ba,a,pt %icc, .Lmedium_noprefe 355 ba,a,pt %icc, .Lmedium_noprefetch 208 356 209 .Lexit: retl 357 .Lexit: retl 210 mov EX_RETVAL(%o3), %o0 358 mov EX_RETVAL(%o3), %o0 211 359 212 .Llarge_src_unaligned: 360 .Llarge_src_unaligned: 213 #ifdef NON_USER_COPY 361 #ifdef NON_USER_COPY 214 VISEntryHalfFast(.Lmedium_vis_entry_fa 362 VISEntryHalfFast(.Lmedium_vis_entry_fail) 215 #else 363 #else 216 VISEntryHalf 364 VISEntryHalf 217 #endif 365 #endif 218 andn %o2, 0x3f, %o4 366 andn %o2, 0x3f, %o4 219 sub %o2, %o4, %o2 367 sub %o2, %o4, %o2 220 alignaddr %o1, %g0, %g1 368 alignaddr %o1, %g0, %g1 221 add %o1, %o4, %o1 369 add %o1, %o4, %o1 222 EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), m !! 370 EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4) 223 1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), m !! 371 1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4) 224 subcc %o4, 0x40, %o4 372 subcc %o4, 0x40, %o4 225 EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), m !! 373 EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64) 226 EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), m !! 374 EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64) 227 EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), m !! 375 EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64) 228 EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), !! 376 EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64) 229 EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), !! 377 EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64) 230 EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), !! 378 EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64) 231 faligndata %f0, %f2, %f16 379 faligndata %f0, %f2, %f16 232 EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), m !! 380 EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64) 233 faligndata %f2, %f4, %f18 381 faligndata %f2, %f4, %f18 234 add %g1, 0x40, %g1 382 add %g1, 0x40, %g1 235 faligndata %f4, %f6, %f20 383 faligndata %f4, %f6, %f20 236 faligndata %f6, %f8, %f22 384 faligndata %f6, %f8, %f22 237 faligndata %f8, %f10, %f24 385 faligndata %f8, %f10, %f24 238 faligndata %f10, %f12, %f26 386 faligndata %f10, %f12, %f26 239 faligndata %f12, %f14, %f28 387 faligndata %f12, %f14, %f28 240 faligndata %f14, %f0, %f30 388 faligndata %f14, %f0, %f30 241 EX_ST_FP(STORE(std, %f16, %o0 + 0x00), !! 389 EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64) 242 EX_ST_FP(STORE(std, %f18, %o0 + 0x08), !! 390 EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56) 243 EX_ST_FP(STORE(std, %f20, %o0 + 0x10), !! 391 EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48) 244 EX_ST_FP(STORE(std, %f22, %o0 + 0x18), !! 392 EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40) 245 EX_ST_FP(STORE(std, %f24, %o0 + 0x20), !! 393 EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32) 246 EX_ST_FP(STORE(std, %f26, %o0 + 0x28), !! 394 EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24) 247 EX_ST_FP(STORE(std, %f28, %o0 + 0x30), !! 395 EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16) 248 EX_ST_FP(STORE(std, %f30, %o0 + 0x38), !! 396 EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8) 249 add %o0, 0x40, %o0 397 add %o0, 0x40, %o0 250 bne,pt %icc, 1b 398 bne,pt %icc, 1b 251 LOAD(prefetch, %g1 + 0x200, #n_reads_ 399 LOAD(prefetch, %g1 + 0x200, #n_reads_strong) 252 #ifdef NON_USER_COPY 400 #ifdef NON_USER_COPY 253 VISExitHalfFast 401 VISExitHalfFast 254 #else 402 #else 255 VISExitHalf 403 VISExitHalf 256 #endif 404 #endif 257 brz,pn %o2, .Lexit 405 brz,pn %o2, .Lexit 258 cmp %o2, 19 406 cmp %o2, 19 259 ble,pn %icc, .Lsmall_unaligne 407 ble,pn %icc, .Lsmall_unaligned 260 nop 408 nop 261 ba,a,pt %icc, .Lmedium_unalign 409 ba,a,pt %icc, .Lmedium_unaligned 262 410 263 #ifdef NON_USER_COPY 411 #ifdef NON_USER_COPY 264 .Lmedium_vis_entry_fail: 412 .Lmedium_vis_entry_fail: 265 or %o0, %o1, %g2 413 or %o0, %o1, %g2 266 #endif 414 #endif 267 .Lmedium: 415 .Lmedium: 268 LOAD(prefetch, %o1 + 0x40, #n_reads_st 416 LOAD(prefetch, %o1 + 0x40, #n_reads_strong) 269 andcc %g2, 0x7, %g0 417 andcc %g2, 0x7, %g0 270 bne,pn %icc, .Lmedium_unalign 418 bne,pn %icc, .Lmedium_unaligned 271 nop 419 nop 272 .Lmedium_noprefetch: 420 .Lmedium_noprefetch: 273 andncc %o2, 0x20 - 1, %o5 421 andncc %o2, 0x20 - 1, %o5 274 be,pn %icc, 2f 422 be,pn %icc, 2f 275 sub %o2, %o5, %o2 423 sub %o2, %o5, %o2 276 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memc !! 424 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) 277 EX_LD(LOAD(ldx, %o1 + 0x08, %g2), memc !! 425 EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5) 278 EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPA !! 426 EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5) 279 EX_LD(LOAD(ldx, %o1 + 0x18, %o4), memc !! 427 EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5) 280 add %o1, 0x20, %o1 428 add %o1, 0x20, %o1 281 subcc %o5, 0x20, %o5 429 subcc %o5, 0x20, %o5 282 EX_ST(STORE(stx, %g1, %o0 + 0x00), mem !! 430 EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32) 283 EX_ST(STORE(stx, %g2, %o0 + 0x08), mem !! 431 EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24) 284 EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0 !! 432 EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24) 285 EX_ST(STORE(stx, %o4, %o0 + 0x18), mem !! 433 EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8) 286 bne,pt %icc, 1b 434 bne,pt %icc, 1b 287 add %o0, 0x20, %o0 435 add %o0, 0x20, %o0 288 2: andcc %o2, 0x18, %o5 436 2: andcc %o2, 0x18, %o5 289 be,pt %icc, 3f 437 be,pt %icc, 3f 290 sub %o2, %o5, %o2 438 sub %o2, %o5, %o2 291 439 292 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memc !! 440 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) 293 add %o1, 0x08, %o1 441 add %o1, 0x08, %o1 294 add %o0, 0x08, %o0 442 add %o0, 0x08, %o0 295 subcc %o5, 0x08, %o5 443 subcc %o5, 0x08, %o5 296 bne,pt %icc, 1b 444 bne,pt %icc, 1b 297 EX_ST(STORE(stx, %g1, %o0 - 0x08), me !! 445 EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8) 298 3: brz,pt %o2, .Lexit 446 3: brz,pt %o2, .Lexit 299 cmp %o2, 0x04 447 cmp %o2, 0x04 300 bl,pn %icc, .Ltiny 448 bl,pn %icc, .Ltiny 301 nop 449 nop 302 EX_LD(LOAD(lduw, %o1 + 0x00, %g1), mem !! 450 EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2) 303 add %o1, 0x04, %o1 451 add %o1, 0x04, %o1 304 add %o0, 0x04, %o0 452 add %o0, 0x04, %o0 305 subcc %o2, 0x04, %o2 453 subcc %o2, 0x04, %o2 306 bne,pn %icc, .Ltiny 454 bne,pn %icc, .Ltiny 307 EX_ST(STORE(stw, %g1, %o0 - 0x04), me !! 455 EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4) 308 ba,a,pt %icc, .Lexit 456 ba,a,pt %icc, .Lexit 309 .Lmedium_unaligned: 457 .Lmedium_unaligned: 310 /* First get dest 8 byte aligned. */ 458 /* First get dest 8 byte aligned. */ 311 sub %g0, %o0, %g1 459 sub %g0, %o0, %g1 312 and %g1, 0x7, %g1 460 and %g1, 0x7, %g1 313 brz,pt %g1, 2f 461 brz,pt %g1, 2f 314 sub %o2, %g1, %o2 462 sub %o2, %g1, %o2 315 463 316 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), mem !! 464 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) 317 add %o1, 1, %o1 465 add %o1, 1, %o1 318 subcc %g1, 1, %g1 466 subcc %g1, 1, %g1 319 add %o0, 1, %o0 467 add %o0, 1, %o0 320 bne,pt %icc, 1b 468 bne,pt %icc, 1b 321 EX_ST(STORE(stb, %g2, %o0 - 0x01), me !! 469 EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) 322 2: 470 2: 323 and %o1, 0x7, %g1 471 and %o1, 0x7, %g1 324 brz,pn %g1, .Lmedium_noprefet 472 brz,pn %g1, .Lmedium_noprefetch 325 sll %g1, 3, %g1 473 sll %g1, 3, %g1 326 mov 64, %g2 474 mov 64, %g2 327 sub %g2, %g1, %g2 475 sub %g2, %g1, %g2 328 andn %o1, 0x7, %o1 476 andn %o1, 0x7, %o1 329 EX_LD(LOAD(ldx, %o1 + 0x00, %o4), memc !! 477 EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2) 330 sllx %o4, %g1, %o4 478 sllx %o4, %g1, %o4 331 andn %o2, 0x08 - 1, %o5 479 andn %o2, 0x08 - 1, %o5 332 sub %o2, %o5, %o2 480 sub %o2, %o5, %o2 333 1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), memc !! 481 1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5) 334 add %o1, 0x08, %o1 482 add %o1, 0x08, %o1 335 subcc %o5, 0x08, %o5 483 subcc %o5, 0x08, %o5 336 srlx %g3, %g2, GLOBAL_SPARE 484 srlx %g3, %g2, GLOBAL_SPARE 337 or GLOBAL_SPARE, %o4, GLO 485 or GLOBAL_SPARE, %o4, GLOBAL_SPARE 338 EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0 !! 486 EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8) 339 add %o0, 0x08, %o0 487 add %o0, 0x08, %o0 340 bne,pt %icc, 1b 488 bne,pt %icc, 1b 341 sllx %g3, %g1, %o4 489 sllx %g3, %g1, %o4 342 srl %g1, 3, %g1 490 srl %g1, 3, %g1 343 add %o1, %g1, %o1 491 add %o1, %g1, %o1 344 brz,pn %o2, .Lexit 492 brz,pn %o2, .Lexit 345 nop 493 nop 346 ba,pt %icc, .Lsmall_unaligne 494 ba,pt %icc, .Lsmall_unaligned 347 495 348 .Ltiny: 496 .Ltiny: 349 EX_LD(LOAD(ldub, %o1 + 0x00, %g1), mem !! 497 EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) 350 subcc %o2, 1, %o2 498 subcc %o2, 1, %o2 351 be,pn %icc, .Lexit 499 be,pn %icc, .Lexit 352 EX_ST(STORE(stb, %g1, %o0 + 0x00), me !! 500 EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1) 353 EX_LD(LOAD(ldub, %o1 + 0x01, %g1), mem !! 501 EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2) 354 subcc %o2, 1, %o2 502 subcc %o2, 1, %o2 355 be,pn %icc, .Lexit 503 be,pn %icc, .Lexit 356 EX_ST(STORE(stb, %g1, %o0 + 0x01), me !! 504 EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1) 357 EX_LD(LOAD(ldub, %o1 + 0x02, %g1), mem !! 505 EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2) 358 ba,pt %icc, .Lexit 506 ba,pt %icc, .Lexit 359 EX_ST(STORE(stb, %g1, %o0 + 0x02), me !! 507 EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2) 360 508 361 .Lsmall: 509 .Lsmall: 362 andcc %g2, 0x3, %g0 510 andcc %g2, 0x3, %g0 363 bne,pn %icc, .Lsmall_unaligne 511 bne,pn %icc, .Lsmall_unaligned 364 andn %o2, 0x4 - 1, %o5 512 andn %o2, 0x4 - 1, %o5 365 sub %o2, %o5, %o2 513 sub %o2, %o5, %o2 366 1: 514 1: 367 EX_LD(LOAD(lduw, %o1 + 0x00, %g1), mem !! 515 EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) 368 add %o1, 0x04, %o1 516 add %o1, 0x04, %o1 369 subcc %o5, 0x04, %o5 517 subcc %o5, 0x04, %o5 370 add %o0, 0x04, %o0 518 add %o0, 0x04, %o0 371 bne,pt %icc, 1b 519 bne,pt %icc, 1b 372 EX_ST(STORE(stw, %g1, %o0 - 0x04), me !! 520 EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4) 373 brz,pt %o2, .Lexit 521 brz,pt %o2, .Lexit 374 nop 522 nop 375 ba,a,pt %icc, .Ltiny 523 ba,a,pt %icc, .Ltiny 376 524 377 .Lsmall_unaligned: 525 .Lsmall_unaligned: 378 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), mem !! 526 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) 379 add %o1, 1, %o1 527 add %o1, 1, %o1 380 add %o0, 1, %o0 528 add %o0, 1, %o0 381 subcc %o2, 1, %o2 529 subcc %o2, 1, %o2 382 bne,pt %icc, 1b 530 bne,pt %icc, 1b 383 EX_ST(STORE(stb, %g1, %o0 - 0x01), me !! 531 EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1) 384 ba,a,pt %icc, .Lexit 532 ba,a,pt %icc, .Lexit 385 nop << 386 .size FUNC_NAME, .-FUNC_NAME 533 .size FUNC_NAME, .-FUNC_NAME
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.