1 /* SPDX-License-Identifier: GPL-2.0 */ 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* NG4memcpy.S: Niagara-4 optimized memcpy. 2 /* NG4memcpy.S: Niagara-4 optimized memcpy. 3 * 3 * 4 * Copyright (C) 2012 David S. Miller (davem@d 4 * Copyright (C) 2012 David S. Miller (davem@davemloft.net) 5 */ 5 */ 6 6 7 #ifdef __KERNEL__ 7 #ifdef __KERNEL__ 8 #include <linux/linkage.h> 8 #include <linux/linkage.h> 9 #include <asm/visasm.h> 9 #include <asm/visasm.h> 10 #include <asm/asi.h> 10 #include <asm/asi.h> 11 #define GLOBAL_SPARE %g7 11 #define GLOBAL_SPARE %g7 12 #else 12 #else 13 #define ASI_BLK_INIT_QUAD_LDD_P 0xe2 13 #define ASI_BLK_INIT_QUAD_LDD_P 0xe2 14 #define FPRS_FEF 0x04 14 #define FPRS_FEF 0x04 15 15 16 /* On T4 it is very expensive to access ASRs l 16 /* On T4 it is very expensive to access ASRs like %fprs and 17 * %asi, avoiding a read or a write can save ~ 17 * %asi, avoiding a read or a write can save ~50 cycles. 18 */ 18 */ 19 #define FPU_ENTER \ 19 #define FPU_ENTER \ 20 rd %fprs, %o5; \ 20 rd %fprs, %o5; \ 21 andcc %o5, FPRS_FEF, %g0; \ 21 andcc %o5, FPRS_FEF, %g0; \ 22 be,a,pn %icc, 999f; \ 22 be,a,pn %icc, 999f; \ 23 wr %g0, FPRS_FEF, %fprs; \ 23 wr %g0, FPRS_FEF, %fprs; \ 24 999: 24 999: 25 25 26 #ifdef MEMCPY_DEBUG 26 #ifdef MEMCPY_DEBUG 27 #define VISEntryHalf FPU_ENTER; \ 27 #define VISEntryHalf FPU_ENTER; \ 28 clr %g1; clr %g2; clr %g3 28 clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0; 29 #define VISExitHalf and %o5, FPRS_FEF, %o5; wr 29 #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs 30 #else 30 #else 31 #define VISEntryHalf FPU_ENTER 31 #define VISEntryHalf FPU_ENTER 32 #define VISExitHalf and %o5, FPRS_FEF, %o5; wr 32 #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs 33 #endif 33 #endif 34 34 35 #define GLOBAL_SPARE %g5 35 #define GLOBAL_SPARE %g5 36 #endif 36 #endif 37 37 38 #ifndef STORE_ASI 38 #ifndef STORE_ASI 39 #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA 39 #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA 40 #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_ 40 #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P 41 #else 41 #else 42 #define STORE_ASI 0x80 /* ASI 42 #define STORE_ASI 0x80 /* ASI_P */ 43 #endif 43 #endif 44 #endif 44 #endif 45 45 46 #if !defined(EX_LD) && !defined(EX_ST) 46 #if !defined(EX_LD) && !defined(EX_ST) 47 #define NON_USER_COPY 47 #define NON_USER_COPY 48 #endif 48 #endif 49 49 50 #ifndef EX_LD 50 #ifndef EX_LD 51 #define EX_LD(x,y) x 51 #define EX_LD(x,y) x 52 #endif 52 #endif 53 #ifndef EX_LD_FP 53 #ifndef EX_LD_FP 54 #define EX_LD_FP(x,y) x 54 #define EX_LD_FP(x,y) x 55 #endif 55 #endif 56 56 57 #ifndef EX_ST 57 #ifndef EX_ST 58 #define EX_ST(x,y) x 58 #define EX_ST(x,y) x 59 #endif 59 #endif 60 #ifndef EX_ST_FP 60 #ifndef EX_ST_FP 61 #define EX_ST_FP(x,y) x 61 #define EX_ST_FP(x,y) x 62 #endif 62 #endif 63 63 64 64 65 #ifndef LOAD 65 #ifndef LOAD 66 #define LOAD(type,addr,dest) type [addr], d 66 #define LOAD(type,addr,dest) type [addr], dest 67 #endif 67 #endif 68 68 69 #ifndef STORE 69 #ifndef STORE 70 #ifndef MEMCPY_DEBUG 70 #ifndef MEMCPY_DEBUG 71 #define STORE(type,src,addr) type src, [add 71 #define STORE(type,src,addr) type src, [addr] 72 #else 72 #else 73 #define STORE(type,src,addr) type##a src, [ 73 #define STORE(type,src,addr) type##a src, [addr] %asi 74 #endif 74 #endif 75 #endif 75 #endif 76 76 77 #ifndef STORE_INIT 77 #ifndef STORE_INIT 78 #define STORE_INIT(src,addr) stxa src, [add 78 #define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI 79 #endif 79 #endif 80 80 81 #ifndef FUNC_NAME 81 #ifndef FUNC_NAME 82 #define FUNC_NAME NG4memcpy 82 #define FUNC_NAME NG4memcpy 83 #endif 83 #endif 84 #ifndef PREAMBLE 84 #ifndef PREAMBLE 85 #define PREAMBLE 85 #define PREAMBLE 86 #endif 86 #endif 87 87 88 #ifndef XCC 88 #ifndef XCC 89 #define XCC xcc 89 #define XCC xcc 90 #endif 90 #endif 91 91 92 .register %g2,#scratch 92 .register %g2,#scratch 93 .register %g3,#scratch 93 .register %g3,#scratch 94 94 95 .text 95 .text 96 #ifndef EX_RETVAL 96 #ifndef EX_RETVAL 97 #define EX_RETVAL(x) x 97 #define EX_RETVAL(x) x 98 #endif 98 #endif 99 .align 64 99 .align 64 100 100 101 .globl FUNC_NAME 101 .globl FUNC_NAME 102 .type FUNC_NAME,#function 102 .type FUNC_NAME,#function 103 FUNC_NAME: /* %o0=dst, %o1=src, %o2=len * 103 FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 104 #ifdef MEMCPY_DEBUG 104 #ifdef MEMCPY_DEBUG 105 wr %g0, 0x80, %asi 105 wr %g0, 0x80, %asi 106 #endif 106 #endif 107 srlx %o2, 31, %g2 107 srlx %o2, 31, %g2 108 cmp %g2, 0 108 cmp %g2, 0 109 tne %XCC, 5 109 tne %XCC, 5 110 PREAMBLE 110 PREAMBLE 111 mov %o0, %o3 111 mov %o0, %o3 112 brz,pn %o2, .Lexit 112 brz,pn %o2, .Lexit 113 cmp %o2, 3 113 cmp %o2, 3 114 ble,pn %icc, .Ltiny 114 ble,pn %icc, .Ltiny 115 cmp %o2, 19 115 cmp %o2, 19 116 ble,pn %icc, .Lsmall 116 ble,pn %icc, .Lsmall 117 or %o0, %o1, %g2 117 or %o0, %o1, %g2 118 cmp %o2, 128 118 cmp %o2, 128 119 bl,pn %icc, .Lmedium 119 bl,pn %icc, .Lmedium 120 nop 120 nop 121 121 122 .Llarge:/* len >= 0x80 */ 122 .Llarge:/* len >= 0x80 */ 123 /* First get dest 8 byte aligned. */ 123 /* First get dest 8 byte aligned. */ 124 sub %g0, %o0, %g1 124 sub %g0, %o0, %g1 125 and %g1, 0x7, %g1 125 and %g1, 0x7, %g1 126 brz,pt %g1, 51f 126 brz,pt %g1, 51f 127 sub %o2, %g1, %o2 127 sub %o2, %g1, %o2 128 128 129 129 130 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), mem 130 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1) 131 add %o1, 1, %o1 131 add %o1, 1, %o1 132 subcc %g1, 1, %g1 132 subcc %g1, 1, %g1 133 add %o0, 1, %o0 133 add %o0, 1, %o0 134 bne,pt %icc, 1b 134 bne,pt %icc, 1b 135 EX_ST(STORE(stb, %g2, %o0 - 0x01), me 135 EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1) 136 136 137 51: LOAD(prefetch, %o1 + 0x040, #n_reads_s 137 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) 138 LOAD(prefetch, %o1 + 0x080, #n_reads_s 138 LOAD(prefetch, %o1 + 0x080, #n_reads_strong) 139 LOAD(prefetch, %o1 + 0x0c0, #n_reads_s 139 LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong) 140 LOAD(prefetch, %o1 + 0x100, #n_reads_s 140 LOAD(prefetch, %o1 + 0x100, #n_reads_strong) 141 LOAD(prefetch, %o1 + 0x140, #n_reads_s 141 LOAD(prefetch, %o1 + 0x140, #n_reads_strong) 142 LOAD(prefetch, %o1 + 0x180, #n_reads_s 142 LOAD(prefetch, %o1 + 0x180, #n_reads_strong) 143 LOAD(prefetch, %o1 + 0x1c0, #n_reads_s 143 LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong) 144 LOAD(prefetch, %o1 + 0x200, #n_reads_s 144 LOAD(prefetch, %o1 + 0x200, #n_reads_strong) 145 145 146 /* Check if we can use the straight fu 146 /* Check if we can use the straight fully aligned 147 * loop, or we require the alignaddr/f 147 * loop, or we require the alignaddr/faligndata variant. 148 */ 148 */ 149 andcc %o1, 0x7, %o5 149 andcc %o1, 0x7, %o5 150 bne,pn %icc, .Llarge_src_unal 150 bne,pn %icc, .Llarge_src_unaligned 151 sub %g0, %o0, %g1 151 sub %g0, %o0, %g1 152 152 153 /* Legitimize the use of initializing 153 /* Legitimize the use of initializing stores by getting dest 154 * to be 64-byte aligned. 154 * to be 64-byte aligned. 155 */ 155 */ 156 and %g1, 0x3f, %g1 156 and %g1, 0x3f, %g1 157 brz,pt %g1, .Llarge_aligned 157 brz,pt %g1, .Llarge_aligned 158 sub %o2, %g1, %o2 158 sub %o2, %g1, %o2 159 159 160 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), memc 160 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1) 161 add %o1, 8, %o1 161 add %o1, 8, %o1 162 subcc %g1, 8, %g1 162 subcc %g1, 8, %g1 163 add %o0, 8, %o0 163 add %o0, 8, %o0 164 bne,pt %icc, 1b 164 bne,pt %icc, 1b 165 EX_ST(STORE(stx, %g2, %o0 - 0x08), me 165 EX_ST(STORE(stx, %g2, %o0 - 0x08), memcpy_retl_o2_plus_g1_plus_8) 166 166 167 .Llarge_aligned: 167 .Llarge_aligned: 168 /* len >= 0x80 && src 8-byte aligned & 168 /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ 169 andn %o2, 0x3f, %o4 169 andn %o2, 0x3f, %o4 170 sub %o2, %o4, %o2 170 sub %o2, %o4, %o2 171 171 172 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memc 172 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o4) 173 add %o1, 0x40, %o1 173 add %o1, 0x40, %o1 174 EX_LD(LOAD(ldx, %o1 - 0x38, %g2), memc 174 EX_LD(LOAD(ldx, %o1 - 0x38, %g2), memcpy_retl_o2_plus_o4) 175 subcc %o4, 0x40, %o4 175 subcc %o4, 0x40, %o4 176 EX_LD(LOAD(ldx, %o1 - 0x30, %g3), memc 176 EX_LD(LOAD(ldx, %o1 - 0x30, %g3), memcpy_retl_o2_plus_o4_plus_64) 177 EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPA 177 EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), memcpy_retl_o2_plus_o4_plus_64) 178 EX_LD(LOAD(ldx, %o1 - 0x20, %o5), memc 178 EX_LD(LOAD(ldx, %o1 - 0x20, %o5), memcpy_retl_o2_plus_o4_plus_64) 179 EX_ST(STORE_INIT(%g1, %o0), memcpy_ret 179 EX_ST(STORE_INIT(%g1, %o0), memcpy_retl_o2_plus_o4_plus_64) 180 add %o0, 0x08, %o0 180 add %o0, 0x08, %o0 181 EX_ST(STORE_INIT(%g2, %o0), memcpy_ret 181 EX_ST(STORE_INIT(%g2, %o0), memcpy_retl_o2_plus_o4_plus_56) 182 add %o0, 0x08, %o0 182 add %o0, 0x08, %o0 183 EX_LD(LOAD(ldx, %o1 - 0x18, %g2), memc 183 EX_LD(LOAD(ldx, %o1 - 0x18, %g2), memcpy_retl_o2_plus_o4_plus_48) 184 EX_ST(STORE_INIT(%g3, %o0), memcpy_ret 184 EX_ST(STORE_INIT(%g3, %o0), memcpy_retl_o2_plus_o4_plus_48) 185 add %o0, 0x08, %o0 185 add %o0, 0x08, %o0 186 EX_LD(LOAD(ldx, %o1 - 0x10, %g3), memc 186 EX_LD(LOAD(ldx, %o1 - 0x10, %g3), memcpy_retl_o2_plus_o4_plus_40) 187 EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), m 187 EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), memcpy_retl_o2_plus_o4_plus_40) 188 add %o0, 0x08, %o0 188 add %o0, 0x08, %o0 189 EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPA 189 EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), memcpy_retl_o2_plus_o4_plus_32) 190 EX_ST(STORE_INIT(%o5, %o0), memcpy_ret 190 EX_ST(STORE_INIT(%o5, %o0), memcpy_retl_o2_plus_o4_plus_32) 191 add %o0, 0x08, %o0 191 add %o0, 0x08, %o0 192 EX_ST(STORE_INIT(%g2, %o0), memcpy_ret 192 EX_ST(STORE_INIT(%g2, %o0), memcpy_retl_o2_plus_o4_plus_24) 193 add %o0, 0x08, %o0 193 add %o0, 0x08, %o0 194 EX_ST(STORE_INIT(%g3, %o0), memcpy_ret 194 EX_ST(STORE_INIT(%g3, %o0), memcpy_retl_o2_plus_o4_plus_16) 195 add %o0, 0x08, %o0 195 add %o0, 0x08, %o0 196 EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), m 196 EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), memcpy_retl_o2_plus_o4_plus_8) 197 add %o0, 0x08, %o0 197 add %o0, 0x08, %o0 198 bne,pt %icc, 1b 198 bne,pt %icc, 1b 199 LOAD(prefetch, %o1 + 0x200, #n_reads_ 199 LOAD(prefetch, %o1 + 0x200, #n_reads_strong) 200 200 201 membar #StoreLoad | #StoreSto 201 membar #StoreLoad | #StoreStore 202 202 203 brz,pn %o2, .Lexit 203 brz,pn %o2, .Lexit 204 cmp %o2, 19 204 cmp %o2, 19 205 ble,pn %icc, .Lsmall_unaligne 205 ble,pn %icc, .Lsmall_unaligned 206 nop 206 nop 207 ba,a,pt %icc, .Lmedium_noprefe 207 ba,a,pt %icc, .Lmedium_noprefetch 208 208 209 .Lexit: retl 209 .Lexit: retl 210 mov EX_RETVAL(%o3), %o0 210 mov EX_RETVAL(%o3), %o0 211 211 212 .Llarge_src_unaligned: 212 .Llarge_src_unaligned: 213 #ifdef NON_USER_COPY 213 #ifdef NON_USER_COPY 214 VISEntryHalfFast(.Lmedium_vis_entry_fa 214 VISEntryHalfFast(.Lmedium_vis_entry_fail) 215 #else 215 #else 216 VISEntryHalf 216 VISEntryHalf 217 #endif 217 #endif 218 andn %o2, 0x3f, %o4 218 andn %o2, 0x3f, %o4 219 sub %o2, %o4, %o2 219 sub %o2, %o4, %o2 220 alignaddr %o1, %g0, %g1 220 alignaddr %o1, %g0, %g1 221 add %o1, %o4, %o1 221 add %o1, %o4, %o1 222 EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), m 222 EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), memcpy_retl_o2_plus_o4) 223 1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), m 223 1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), memcpy_retl_o2_plus_o4) 224 subcc %o4, 0x40, %o4 224 subcc %o4, 0x40, %o4 225 EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), m 225 EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), memcpy_retl_o2_plus_o4_plus_64) 226 EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), m 226 EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), memcpy_retl_o2_plus_o4_plus_64) 227 EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), m 227 EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), memcpy_retl_o2_plus_o4_plus_64) 228 EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), 228 EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), memcpy_retl_o2_plus_o4_plus_64) 229 EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), 229 EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), memcpy_retl_o2_plus_o4_plus_64) 230 EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), 230 EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), memcpy_retl_o2_plus_o4_plus_64) 231 faligndata %f0, %f2, %f16 231 faligndata %f0, %f2, %f16 232 EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), m 232 EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), memcpy_retl_o2_plus_o4_plus_64) 233 faligndata %f2, %f4, %f18 233 faligndata %f2, %f4, %f18 234 add %g1, 0x40, %g1 234 add %g1, 0x40, %g1 235 faligndata %f4, %f6, %f20 235 faligndata %f4, %f6, %f20 236 faligndata %f6, %f8, %f22 236 faligndata %f6, %f8, %f22 237 faligndata %f8, %f10, %f24 237 faligndata %f8, %f10, %f24 238 faligndata %f10, %f12, %f26 238 faligndata %f10, %f12, %f26 239 faligndata %f12, %f14, %f28 239 faligndata %f12, %f14, %f28 240 faligndata %f14, %f0, %f30 240 faligndata %f14, %f0, %f30 241 EX_ST_FP(STORE(std, %f16, %o0 + 0x00), 241 EX_ST_FP(STORE(std, %f16, %o0 + 0x00), memcpy_retl_o2_plus_o4_plus_64) 242 EX_ST_FP(STORE(std, %f18, %o0 + 0x08), 242 EX_ST_FP(STORE(std, %f18, %o0 + 0x08), memcpy_retl_o2_plus_o4_plus_56) 243 EX_ST_FP(STORE(std, %f20, %o0 + 0x10), 243 EX_ST_FP(STORE(std, %f20, %o0 + 0x10), memcpy_retl_o2_plus_o4_plus_48) 244 EX_ST_FP(STORE(std, %f22, %o0 + 0x18), 244 EX_ST_FP(STORE(std, %f22, %o0 + 0x18), memcpy_retl_o2_plus_o4_plus_40) 245 EX_ST_FP(STORE(std, %f24, %o0 + 0x20), 245 EX_ST_FP(STORE(std, %f24, %o0 + 0x20), memcpy_retl_o2_plus_o4_plus_32) 246 EX_ST_FP(STORE(std, %f26, %o0 + 0x28), 246 EX_ST_FP(STORE(std, %f26, %o0 + 0x28), memcpy_retl_o2_plus_o4_plus_24) 247 EX_ST_FP(STORE(std, %f28, %o0 + 0x30), 247 EX_ST_FP(STORE(std, %f28, %o0 + 0x30), memcpy_retl_o2_plus_o4_plus_16) 248 EX_ST_FP(STORE(std, %f30, %o0 + 0x38), 248 EX_ST_FP(STORE(std, %f30, %o0 + 0x38), memcpy_retl_o2_plus_o4_plus_8) 249 add %o0, 0x40, %o0 249 add %o0, 0x40, %o0 250 bne,pt %icc, 1b 250 bne,pt %icc, 1b 251 LOAD(prefetch, %g1 + 0x200, #n_reads_ 251 LOAD(prefetch, %g1 + 0x200, #n_reads_strong) 252 #ifdef NON_USER_COPY 252 #ifdef NON_USER_COPY 253 VISExitHalfFast 253 VISExitHalfFast 254 #else 254 #else 255 VISExitHalf 255 VISExitHalf 256 #endif 256 #endif 257 brz,pn %o2, .Lexit 257 brz,pn %o2, .Lexit 258 cmp %o2, 19 258 cmp %o2, 19 259 ble,pn %icc, .Lsmall_unaligne 259 ble,pn %icc, .Lsmall_unaligned 260 nop 260 nop 261 ba,a,pt %icc, .Lmedium_unalign 261 ba,a,pt %icc, .Lmedium_unaligned 262 262 263 #ifdef NON_USER_COPY 263 #ifdef NON_USER_COPY 264 .Lmedium_vis_entry_fail: 264 .Lmedium_vis_entry_fail: 265 or %o0, %o1, %g2 265 or %o0, %o1, %g2 266 #endif 266 #endif 267 .Lmedium: 267 .Lmedium: 268 LOAD(prefetch, %o1 + 0x40, #n_reads_st 268 LOAD(prefetch, %o1 + 0x40, #n_reads_strong) 269 andcc %g2, 0x7, %g0 269 andcc %g2, 0x7, %g0 270 bne,pn %icc, .Lmedium_unalign 270 bne,pn %icc, .Lmedium_unaligned 271 nop 271 nop 272 .Lmedium_noprefetch: 272 .Lmedium_noprefetch: 273 andncc %o2, 0x20 - 1, %o5 273 andncc %o2, 0x20 - 1, %o5 274 be,pn %icc, 2f 274 be,pn %icc, 2f 275 sub %o2, %o5, %o2 275 sub %o2, %o5, %o2 276 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memc 276 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5) 277 EX_LD(LOAD(ldx, %o1 + 0x08, %g2), memc 277 EX_LD(LOAD(ldx, %o1 + 0x08, %g2), memcpy_retl_o2_plus_o5) 278 EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPA 278 EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), memcpy_retl_o2_plus_o5) 279 EX_LD(LOAD(ldx, %o1 + 0x18, %o4), memc 279 EX_LD(LOAD(ldx, %o1 + 0x18, %o4), memcpy_retl_o2_plus_o5) 280 add %o1, 0x20, %o1 280 add %o1, 0x20, %o1 281 subcc %o5, 0x20, %o5 281 subcc %o5, 0x20, %o5 282 EX_ST(STORE(stx, %g1, %o0 + 0x00), mem 282 EX_ST(STORE(stx, %g1, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_32) 283 EX_ST(STORE(stx, %g2, %o0 + 0x08), mem 283 EX_ST(STORE(stx, %g2, %o0 + 0x08), memcpy_retl_o2_plus_o5_plus_24) 284 EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0 284 EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_24) 285 EX_ST(STORE(stx, %o4, %o0 + 0x18), mem 285 EX_ST(STORE(stx, %o4, %o0 + 0x18), memcpy_retl_o2_plus_o5_plus_8) 286 bne,pt %icc, 1b 286 bne,pt %icc, 1b 287 add %o0, 0x20, %o0 287 add %o0, 0x20, %o0 288 2: andcc %o2, 0x18, %o5 288 2: andcc %o2, 0x18, %o5 289 be,pt %icc, 3f 289 be,pt %icc, 3f 290 sub %o2, %o5, %o2 290 sub %o2, %o5, %o2 291 291 292 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memc 292 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5) 293 add %o1, 0x08, %o1 293 add %o1, 0x08, %o1 294 add %o0, 0x08, %o0 294 add %o0, 0x08, %o0 295 subcc %o5, 0x08, %o5 295 subcc %o5, 0x08, %o5 296 bne,pt %icc, 1b 296 bne,pt %icc, 1b 297 EX_ST(STORE(stx, %g1, %o0 - 0x08), me 297 EX_ST(STORE(stx, %g1, %o0 - 0x08), memcpy_retl_o2_plus_o5_plus_8) 298 3: brz,pt %o2, .Lexit 298 3: brz,pt %o2, .Lexit 299 cmp %o2, 0x04 299 cmp %o2, 0x04 300 bl,pn %icc, .Ltiny 300 bl,pn %icc, .Ltiny 301 nop 301 nop 302 EX_LD(LOAD(lduw, %o1 + 0x00, %g1), mem 302 EX_LD(LOAD(lduw, %o1 + 0x00, %g1), memcpy_retl_o2) 303 add %o1, 0x04, %o1 303 add %o1, 0x04, %o1 304 add %o0, 0x04, %o0 304 add %o0, 0x04, %o0 305 subcc %o2, 0x04, %o2 305 subcc %o2, 0x04, %o2 306 bne,pn %icc, .Ltiny 306 bne,pn %icc, .Ltiny 307 EX_ST(STORE(stw, %g1, %o0 - 0x04), me 307 EX_ST(STORE(stw, %g1, %o0 - 0x04), memcpy_retl_o2_plus_4) 308 ba,a,pt %icc, .Lexit 308 ba,a,pt %icc, .Lexit 309 .Lmedium_unaligned: 309 .Lmedium_unaligned: 310 /* First get dest 8 byte aligned. */ 310 /* First get dest 8 byte aligned. */ 311 sub %g0, %o0, %g1 311 sub %g0, %o0, %g1 312 and %g1, 0x7, %g1 312 and %g1, 0x7, %g1 313 brz,pt %g1, 2f 313 brz,pt %g1, 2f 314 sub %o2, %g1, %o2 314 sub %o2, %g1, %o2 315 315 316 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), mem 316 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1) 317 add %o1, 1, %o1 317 add %o1, 1, %o1 318 subcc %g1, 1, %g1 318 subcc %g1, 1, %g1 319 add %o0, 1, %o0 319 add %o0, 1, %o0 320 bne,pt %icc, 1b 320 bne,pt %icc, 1b 321 EX_ST(STORE(stb, %g2, %o0 - 0x01), me 321 EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1) 322 2: 322 2: 323 and %o1, 0x7, %g1 323 and %o1, 0x7, %g1 324 brz,pn %g1, .Lmedium_noprefet 324 brz,pn %g1, .Lmedium_noprefetch 325 sll %g1, 3, %g1 325 sll %g1, 3, %g1 326 mov 64, %g2 326 mov 64, %g2 327 sub %g2, %g1, %g2 327 sub %g2, %g1, %g2 328 andn %o1, 0x7, %o1 328 andn %o1, 0x7, %o1 329 EX_LD(LOAD(ldx, %o1 + 0x00, %o4), memc 329 EX_LD(LOAD(ldx, %o1 + 0x00, %o4), memcpy_retl_o2) 330 sllx %o4, %g1, %o4 330 sllx %o4, %g1, %o4 331 andn %o2, 0x08 - 1, %o5 331 andn %o2, 0x08 - 1, %o5 332 sub %o2, %o5, %o2 332 sub %o2, %o5, %o2 333 1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), memc 333 1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), memcpy_retl_o2_plus_o5) 334 add %o1, 0x08, %o1 334 add %o1, 0x08, %o1 335 subcc %o5, 0x08, %o5 335 subcc %o5, 0x08, %o5 336 srlx %g3, %g2, GLOBAL_SPARE 336 srlx %g3, %g2, GLOBAL_SPARE 337 or GLOBAL_SPARE, %o4, GLO 337 or GLOBAL_SPARE, %o4, GLOBAL_SPARE 338 EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0 338 EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_8) 339 add %o0, 0x08, %o0 339 add %o0, 0x08, %o0 340 bne,pt %icc, 1b 340 bne,pt %icc, 1b 341 sllx %g3, %g1, %o4 341 sllx %g3, %g1, %o4 342 srl %g1, 3, %g1 342 srl %g1, 3, %g1 343 add %o1, %g1, %o1 343 add %o1, %g1, %o1 344 brz,pn %o2, .Lexit 344 brz,pn %o2, .Lexit 345 nop 345 nop 346 ba,pt %icc, .Lsmall_unaligne 346 ba,pt %icc, .Lsmall_unaligned 347 347 348 .Ltiny: 348 .Ltiny: 349 EX_LD(LOAD(ldub, %o1 + 0x00, %g1), mem 349 EX_LD(LOAD(ldub, %o1 + 0x00, %g1), memcpy_retl_o2) 350 subcc %o2, 1, %o2 350 subcc %o2, 1, %o2 351 be,pn %icc, .Lexit 351 be,pn %icc, .Lexit 352 EX_ST(STORE(stb, %g1, %o0 + 0x00), me 352 EX_ST(STORE(stb, %g1, %o0 + 0x00), memcpy_retl_o2_plus_1) 353 EX_LD(LOAD(ldub, %o1 + 0x01, %g1), mem 353 EX_LD(LOAD(ldub, %o1 + 0x01, %g1), memcpy_retl_o2) 354 subcc %o2, 1, %o2 354 subcc %o2, 1, %o2 355 be,pn %icc, .Lexit 355 be,pn %icc, .Lexit 356 EX_ST(STORE(stb, %g1, %o0 + 0x01), me 356 EX_ST(STORE(stb, %g1, %o0 + 0x01), memcpy_retl_o2_plus_1) 357 EX_LD(LOAD(ldub, %o1 + 0x02, %g1), mem 357 EX_LD(LOAD(ldub, %o1 + 0x02, %g1), memcpy_retl_o2) 358 ba,pt %icc, .Lexit 358 ba,pt %icc, .Lexit 359 EX_ST(STORE(stb, %g1, %o0 + 0x02), me 359 EX_ST(STORE(stb, %g1, %o0 + 0x02), memcpy_retl_o2) 360 360 361 .Lsmall: 361 .Lsmall: 362 andcc %g2, 0x3, %g0 362 andcc %g2, 0x3, %g0 363 bne,pn %icc, .Lsmall_unaligne 363 bne,pn %icc, .Lsmall_unaligned 364 andn %o2, 0x4 - 1, %o5 364 andn %o2, 0x4 - 1, %o5 365 sub %o2, %o5, %o2 365 sub %o2, %o5, %o2 366 1: 366 1: 367 EX_LD(LOAD(lduw, %o1 + 0x00, %g1), mem 367 EX_LD(LOAD(lduw, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5) 368 add %o1, 0x04, %o1 368 add %o1, 0x04, %o1 369 subcc %o5, 0x04, %o5 369 subcc %o5, 0x04, %o5 370 add %o0, 0x04, %o0 370 add %o0, 0x04, %o0 371 bne,pt %icc, 1b 371 bne,pt %icc, 1b 372 EX_ST(STORE(stw, %g1, %o0 - 0x04), me 372 EX_ST(STORE(stw, %g1, %o0 - 0x04), memcpy_retl_o2_plus_o5_plus_4) 373 brz,pt %o2, .Lexit 373 brz,pt %o2, .Lexit 374 nop 374 nop 375 ba,a,pt %icc, .Ltiny 375 ba,a,pt %icc, .Ltiny 376 376 377 .Lsmall_unaligned: 377 .Lsmall_unaligned: 378 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), mem 378 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), memcpy_retl_o2) 379 add %o1, 1, %o1 379 add %o1, 1, %o1 380 add %o0, 1, %o0 380 add %o0, 1, %o0 381 subcc %o2, 1, %o2 381 subcc %o2, 1, %o2 382 bne,pt %icc, 1b 382 bne,pt %icc, 1b 383 EX_ST(STORE(stb, %g1, %o0 - 0x01), me 383 EX_ST(STORE(stb, %g1, %o0 - 0x01), memcpy_retl_o2_plus_1) 384 ba,a,pt %icc, .Lexit 384 ba,a,pt %icc, .Lexit 385 nop 385 nop 386 .size FUNC_NAME, .-FUNC_NAME 386 .size FUNC_NAME, .-FUNC_NAME
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.