1 /* SPDX-License-Identifier: GPL-2.0 */ << 2 /* NG4memcpy.S: Niagara-4 optimized memcpy. 1 /* NG4memcpy.S: Niagara-4 optimized memcpy. 3 * 2 * 4 * Copyright (C) 2012 David S. Miller (davem@d 3 * Copyright (C) 2012 David S. Miller (davem@davemloft.net) 5 */ 4 */ 6 5 7 #ifdef __KERNEL__ 6 #ifdef __KERNEL__ 8 #include <linux/linkage.h> << 9 #include <asm/visasm.h> 7 #include <asm/visasm.h> 10 #include <asm/asi.h> 8 #include <asm/asi.h> 11 #define GLOBAL_SPARE %g7 9 #define GLOBAL_SPARE %g7 12 #else 10 #else 13 #define ASI_BLK_INIT_QUAD_LDD_P 0xe2 11 #define ASI_BLK_INIT_QUAD_LDD_P 0xe2 14 #define FPRS_FEF 0x04 12 #define FPRS_FEF 0x04 15 13 16 /* On T4 it is very expensive to access ASRs l 14 /* On T4 it is very expensive to access ASRs like %fprs and 17 * %asi, avoiding a read or a write can save ~ 15 * %asi, avoiding a read or a write can save ~50 cycles. 18 */ 16 */ 19 #define FPU_ENTER \ 17 #define FPU_ENTER \ 20 rd %fprs, %o5; \ 18 rd %fprs, %o5; \ 21 andcc %o5, FPRS_FEF, %g0; \ 19 andcc %o5, FPRS_FEF, %g0; \ 22 be,a,pn %icc, 999f; \ 20 be,a,pn %icc, 999f; \ 23 wr %g0, FPRS_FEF, %fprs; \ 21 wr %g0, FPRS_FEF, %fprs; \ 24 999: 22 999: 25 23 26 #ifdef MEMCPY_DEBUG 24 #ifdef MEMCPY_DEBUG 27 #define VISEntryHalf FPU_ENTER; \ 25 #define VISEntryHalf FPU_ENTER; \ 28 clr %g1; clr %g2; clr %g3 26 clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0; 29 #define VISExitHalf and %o5, FPRS_FEF, %o5; wr 27 #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs 30 #else 28 #else 31 #define VISEntryHalf FPU_ENTER 29 #define VISEntryHalf FPU_ENTER 32 #define VISExitHalf and %o5, FPRS_FEF, %o5; wr 30 #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs 33 #endif 31 #endif 34 32 35 #define GLOBAL_SPARE %g5 33 #define GLOBAL_SPARE %g5 36 #endif 34 #endif 37 35 38 #ifndef STORE_ASI 36 #ifndef STORE_ASI 39 #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA 37 #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA 40 #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_ 38 #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P 41 #else 39 #else 42 #define STORE_ASI 0x80 /* ASI 40 #define STORE_ASI 0x80 /* ASI_P */ 43 #endif 41 #endif 44 #endif 42 #endif 45 43 46 #if !defined(EX_LD) && !defined(EX_ST) 44 #if !defined(EX_LD) && !defined(EX_ST) 47 #define NON_USER_COPY 45 #define NON_USER_COPY 48 #endif 46 #endif 49 47 50 #ifndef EX_LD 48 #ifndef EX_LD 51 #define EX_LD(x,y) x !! 49 #define EX_LD(x) x 52 #endif << 53 #ifndef EX_LD_FP << 54 #define EX_LD_FP(x,y) x << 55 #endif 50 #endif 56 51 57 #ifndef EX_ST 52 #ifndef EX_ST 58 #define EX_ST(x,y) x !! 53 #define EX_ST(x) x 59 #endif << 60 #ifndef EX_ST_FP << 61 #define EX_ST_FP(x,y) x << 62 #endif 54 #endif 63 55 >> 56 #ifndef EX_RETVAL >> 57 #define EX_RETVAL(x) x >> 58 #endif 64 59 65 #ifndef LOAD 60 #ifndef LOAD 66 #define LOAD(type,addr,dest) type [addr], d 61 #define LOAD(type,addr,dest) type [addr], dest 67 #endif 62 #endif 68 63 69 #ifndef STORE 64 #ifndef STORE 70 #ifndef MEMCPY_DEBUG 65 #ifndef MEMCPY_DEBUG 71 #define STORE(type,src,addr) type src, [add 66 #define STORE(type,src,addr) type src, [addr] 72 #else 67 #else 73 #define STORE(type,src,addr) type##a src, [ 68 #define STORE(type,src,addr) type##a src, [addr] %asi 74 #endif 69 #endif 75 #endif 70 #endif 76 71 77 #ifndef STORE_INIT 72 #ifndef STORE_INIT 78 #define STORE_INIT(src,addr) stxa src, [add 73 #define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI 79 #endif 74 #endif 80 75 81 #ifndef FUNC_NAME 76 #ifndef FUNC_NAME 82 #define FUNC_NAME NG4memcpy 77 #define FUNC_NAME NG4memcpy 83 #endif 78 #endif 84 #ifndef PREAMBLE 79 #ifndef PREAMBLE 85 #define PREAMBLE 80 #define PREAMBLE 86 #endif 81 #endif 87 82 88 #ifndef XCC 83 #ifndef XCC 89 #define XCC xcc 84 #define XCC xcc 90 #endif 85 #endif 91 86 92 .register %g2,#scratch 87 .register %g2,#scratch 93 .register %g3,#scratch 88 .register %g3,#scratch 94 89 95 .text 90 .text 96 #ifndef EX_RETVAL << 97 #define EX_RETVAL(x) x << 98 #endif << 99 .align 64 91 .align 64 100 92 101 .globl FUNC_NAME 93 .globl FUNC_NAME 102 .type FUNC_NAME,#function 94 .type FUNC_NAME,#function 103 FUNC_NAME: /* %o0=dst, %o1=src, %o2=len * 95 FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 104 #ifdef MEMCPY_DEBUG 96 #ifdef MEMCPY_DEBUG 105 wr %g0, 0x80, %asi 97 wr %g0, 0x80, %asi 106 #endif 98 #endif 107 srlx %o2, 31, %g2 99 srlx %o2, 31, %g2 108 cmp %g2, 0 100 cmp %g2, 0 109 tne %XCC, 5 101 tne %XCC, 5 110 PREAMBLE 102 PREAMBLE 111 mov %o0, %o3 103 mov %o0, %o3 112 brz,pn %o2, .Lexit 104 brz,pn %o2, .Lexit 113 cmp %o2, 3 105 cmp %o2, 3 114 ble,pn %icc, .Ltiny 106 ble,pn %icc, .Ltiny 115 cmp %o2, 19 107 cmp %o2, 19 116 ble,pn %icc, .Lsmall 108 ble,pn %icc, .Lsmall 117 or %o0, %o1, %g2 109 or %o0, %o1, %g2 118 cmp %o2, 128 110 cmp %o2, 128 119 bl,pn %icc, .Lmedium 111 bl,pn %icc, .Lmedium 120 nop 112 nop 121 113 122 .Llarge:/* len >= 0x80 */ 114 .Llarge:/* len >= 0x80 */ 123 /* First get dest 8 byte aligned. */ 115 /* First get dest 8 byte aligned. */ 124 sub %g0, %o0, %g1 116 sub %g0, %o0, %g1 125 and %g1, 0x7, %g1 117 and %g1, 0x7, %g1 126 brz,pt %g1, 51f 118 brz,pt %g1, 51f 127 sub %o2, %g1, %o2 119 sub %o2, %g1, %o2 128 120 129 !! 121 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) 130 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), mem << 131 add %o1, 1, %o1 122 add %o1, 1, %o1 132 subcc %g1, 1, %g1 123 subcc %g1, 1, %g1 133 add %o0, 1, %o0 124 add %o0, 1, %o0 134 bne,pt %icc, 1b 125 bne,pt %icc, 1b 135 EX_ST(STORE(stb, %g2, %o0 - 0x01), me !! 126 EX_ST(STORE(stb, %g2, %o0 - 0x01)) 136 127 137 51: LOAD(prefetch, %o1 + 0x040, #n_reads_s 128 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) 138 LOAD(prefetch, %o1 + 0x080, #n_reads_s 129 LOAD(prefetch, %o1 + 0x080, #n_reads_strong) 139 LOAD(prefetch, %o1 + 0x0c0, #n_reads_s 130 LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong) 140 LOAD(prefetch, %o1 + 0x100, #n_reads_s 131 LOAD(prefetch, %o1 + 0x100, #n_reads_strong) 141 LOAD(prefetch, %o1 + 0x140, #n_reads_s 132 LOAD(prefetch, %o1 + 0x140, #n_reads_strong) 142 LOAD(prefetch, %o1 + 0x180, #n_reads_s 133 LOAD(prefetch, %o1 + 0x180, #n_reads_strong) 143 LOAD(prefetch, %o1 + 0x1c0, #n_reads_s 134 LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong) 144 LOAD(prefetch, %o1 + 0x200, #n_reads_s 135 LOAD(prefetch, %o1 + 0x200, #n_reads_strong) 145 136 146 /* Check if we can use the straight fu 137 /* Check if we can use the straight fully aligned 147 * loop, or we require the alignaddr/f 138 * loop, or we require the alignaddr/faligndata variant. 148 */ 139 */ 149 andcc %o1, 0x7, %o5 140 andcc %o1, 0x7, %o5 150 bne,pn %icc, .Llarge_src_unal 141 bne,pn %icc, .Llarge_src_unaligned 151 sub %g0, %o0, %g1 142 sub %g0, %o0, %g1 152 143 153 /* Legitimize the use of initializing 144 /* Legitimize the use of initializing stores by getting dest 154 * to be 64-byte aligned. 145 * to be 64-byte aligned. 155 */ 146 */ 156 and %g1, 0x3f, %g1 147 and %g1, 0x3f, %g1 157 brz,pt %g1, .Llarge_aligned 148 brz,pt %g1, .Llarge_aligned 158 sub %o2, %g1, %o2 149 sub %o2, %g1, %o2 159 150 160 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), memc !! 151 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) 161 add %o1, 8, %o1 152 add %o1, 8, %o1 162 subcc %g1, 8, %g1 153 subcc %g1, 8, %g1 163 add %o0, 8, %o0 154 add %o0, 8, %o0 164 bne,pt %icc, 1b 155 bne,pt %icc, 1b 165 EX_ST(STORE(stx, %g2, %o0 - 0x08), me !! 156 EX_ST(STORE(stx, %g2, %o0 - 0x08)) 166 157 167 .Llarge_aligned: 158 .Llarge_aligned: 168 /* len >= 0x80 && src 8-byte aligned & 159 /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ 169 andn %o2, 0x3f, %o4 160 andn %o2, 0x3f, %o4 170 sub %o2, %o4, %o2 161 sub %o2, %o4, %o2 171 162 172 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memc !! 163 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) 173 add %o1, 0x40, %o1 164 add %o1, 0x40, %o1 174 EX_LD(LOAD(ldx, %o1 - 0x38, %g2), memc !! 165 EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) 175 subcc %o4, 0x40, %o4 166 subcc %o4, 0x40, %o4 176 EX_LD(LOAD(ldx, %o1 - 0x30, %g3), memc !! 167 EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) 177 EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPA !! 168 EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) 178 EX_LD(LOAD(ldx, %o1 - 0x20, %o5), memc !! 169 EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) 179 EX_ST(STORE_INIT(%g1, %o0), memcpy_ret !! 170 EX_ST(STORE_INIT(%g1, %o0)) 180 add %o0, 0x08, %o0 171 add %o0, 0x08, %o0 181 EX_ST(STORE_INIT(%g2, %o0), memcpy_ret !! 172 EX_ST(STORE_INIT(%g2, %o0)) 182 add %o0, 0x08, %o0 173 add %o0, 0x08, %o0 183 EX_LD(LOAD(ldx, %o1 - 0x18, %g2), memc !! 174 EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) 184 EX_ST(STORE_INIT(%g3, %o0), memcpy_ret !! 175 EX_ST(STORE_INIT(%g3, %o0)) 185 add %o0, 0x08, %o0 176 add %o0, 0x08, %o0 186 EX_LD(LOAD(ldx, %o1 - 0x10, %g3), memc !! 177 EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) 187 EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), m !! 178 EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) 188 add %o0, 0x08, %o0 179 add %o0, 0x08, %o0 189 EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPA !! 180 EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) 190 EX_ST(STORE_INIT(%o5, %o0), memcpy_ret !! 181 EX_ST(STORE_INIT(%o5, %o0)) 191 add %o0, 0x08, %o0 182 add %o0, 0x08, %o0 192 EX_ST(STORE_INIT(%g2, %o0), memcpy_ret !! 183 EX_ST(STORE_INIT(%g2, %o0)) 193 add %o0, 0x08, %o0 184 add %o0, 0x08, %o0 194 EX_ST(STORE_INIT(%g3, %o0), memcpy_ret !! 185 EX_ST(STORE_INIT(%g3, %o0)) 195 add %o0, 0x08, %o0 186 add %o0, 0x08, %o0 196 EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), m !! 187 EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) 197 add %o0, 0x08, %o0 188 add %o0, 0x08, %o0 198 bne,pt %icc, 1b 189 bne,pt %icc, 1b 199 LOAD(prefetch, %o1 + 0x200, #n_reads_ 190 LOAD(prefetch, %o1 + 0x200, #n_reads_strong) 200 191 201 membar #StoreLoad | #StoreSto 192 membar #StoreLoad | #StoreStore 202 193 203 brz,pn %o2, .Lexit 194 brz,pn %o2, .Lexit 204 cmp %o2, 19 195 cmp %o2, 19 205 ble,pn %icc, .Lsmall_unaligne 196 ble,pn %icc, .Lsmall_unaligned 206 nop 197 nop 207 ba,a,pt %icc, .Lmedium_noprefe 198 ba,a,pt %icc, .Lmedium_noprefetch 208 199 209 .Lexit: retl 200 .Lexit: retl 210 mov EX_RETVAL(%o3), %o0 201 mov EX_RETVAL(%o3), %o0 211 202 212 .Llarge_src_unaligned: 203 .Llarge_src_unaligned: 213 #ifdef NON_USER_COPY 204 #ifdef NON_USER_COPY 214 VISEntryHalfFast(.Lmedium_vis_entry_fa 205 VISEntryHalfFast(.Lmedium_vis_entry_fail) 215 #else 206 #else 216 VISEntryHalf 207 VISEntryHalf 217 #endif 208 #endif 218 andn %o2, 0x3f, %o4 209 andn %o2, 0x3f, %o4 219 sub %o2, %o4, %o2 210 sub %o2, %o4, %o2 220 alignaddr %o1, %g0, %g1 211 alignaddr %o1, %g0, %g1 221 add %o1, %o4, %o1 212 add %o1, %o4, %o1 222 EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), m !! 213 EX_LD(LOAD(ldd, %g1 + 0x00, %f0)) 223 1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), m !! 214 1: EX_LD(LOAD(ldd, %g1 + 0x08, %f2)) 224 subcc %o4, 0x40, %o4 215 subcc %o4, 0x40, %o4 225 EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), m !! 216 EX_LD(LOAD(ldd, %g1 + 0x10, %f4)) 226 EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), m !! 217 EX_LD(LOAD(ldd, %g1 + 0x18, %f6)) 227 EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), m !! 218 EX_LD(LOAD(ldd, %g1 + 0x20, %f8)) 228 EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), !! 219 EX_LD(LOAD(ldd, %g1 + 0x28, %f10)) 229 EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), !! 220 EX_LD(LOAD(ldd, %g1 + 0x30, %f12)) 230 EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), !! 221 EX_LD(LOAD(ldd, %g1 + 0x38, %f14)) 231 faligndata %f0, %f2, %f16 222 faligndata %f0, %f2, %f16 232 EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), m !! 223 EX_LD(LOAD(ldd, %g1 + 0x40, %f0)) 233 faligndata %f2, %f4, %f18 224 faligndata %f2, %f4, %f18 234 add %g1, 0x40, %g1 225 add %g1, 0x40, %g1 235 faligndata %f4, %f6, %f20 226 faligndata %f4, %f6, %f20 236 faligndata %f6, %f8, %f22 227 faligndata %f6, %f8, %f22 237 faligndata %f8, %f10, %f24 228 faligndata %f8, %f10, %f24 238 faligndata %f10, %f12, %f26 229 faligndata %f10, %f12, %f26 239 faligndata %f12, %f14, %f28 230 faligndata %f12, %f14, %f28 240 faligndata %f14, %f0, %f30 231 faligndata %f14, %f0, %f30 241 EX_ST_FP(STORE(std, %f16, %o0 + 0x00), !! 232 EX_ST(STORE(std, %f16, %o0 + 0x00)) 242 EX_ST_FP(STORE(std, %f18, %o0 + 0x08), !! 233 EX_ST(STORE(std, %f18, %o0 + 0x08)) 243 EX_ST_FP(STORE(std, %f20, %o0 + 0x10), !! 234 EX_ST(STORE(std, %f20, %o0 + 0x10)) 244 EX_ST_FP(STORE(std, %f22, %o0 + 0x18), !! 235 EX_ST(STORE(std, %f22, %o0 + 0x18)) 245 EX_ST_FP(STORE(std, %f24, %o0 + 0x20), !! 236 EX_ST(STORE(std, %f24, %o0 + 0x20)) 246 EX_ST_FP(STORE(std, %f26, %o0 + 0x28), !! 237 EX_ST(STORE(std, %f26, %o0 + 0x28)) 247 EX_ST_FP(STORE(std, %f28, %o0 + 0x30), !! 238 EX_ST(STORE(std, %f28, %o0 + 0x30)) 248 EX_ST_FP(STORE(std, %f30, %o0 + 0x38), !! 239 EX_ST(STORE(std, %f30, %o0 + 0x38)) 249 add %o0, 0x40, %o0 240 add %o0, 0x40, %o0 250 bne,pt %icc, 1b 241 bne,pt %icc, 1b 251 LOAD(prefetch, %g1 + 0x200, #n_reads_ 242 LOAD(prefetch, %g1 + 0x200, #n_reads_strong) 252 #ifdef NON_USER_COPY 243 #ifdef NON_USER_COPY 253 VISExitHalfFast 244 VISExitHalfFast 254 #else 245 #else 255 VISExitHalf 246 VISExitHalf 256 #endif 247 #endif 257 brz,pn %o2, .Lexit 248 brz,pn %o2, .Lexit 258 cmp %o2, 19 249 cmp %o2, 19 259 ble,pn %icc, .Lsmall_unaligne 250 ble,pn %icc, .Lsmall_unaligned 260 nop 251 nop 261 ba,a,pt %icc, .Lmedium_unalign 252 ba,a,pt %icc, .Lmedium_unaligned 262 253 263 #ifdef NON_USER_COPY 254 #ifdef NON_USER_COPY 264 .Lmedium_vis_entry_fail: 255 .Lmedium_vis_entry_fail: 265 or %o0, %o1, %g2 256 or %o0, %o1, %g2 266 #endif 257 #endif 267 .Lmedium: 258 .Lmedium: 268 LOAD(prefetch, %o1 + 0x40, #n_reads_st 259 LOAD(prefetch, %o1 + 0x40, #n_reads_strong) 269 andcc %g2, 0x7, %g0 260 andcc %g2, 0x7, %g0 270 bne,pn %icc, .Lmedium_unalign 261 bne,pn %icc, .Lmedium_unaligned 271 nop 262 nop 272 .Lmedium_noprefetch: 263 .Lmedium_noprefetch: 273 andncc %o2, 0x20 - 1, %o5 264 andncc %o2, 0x20 - 1, %o5 274 be,pn %icc, 2f 265 be,pn %icc, 2f 275 sub %o2, %o5, %o2 266 sub %o2, %o5, %o2 276 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memc !! 267 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) 277 EX_LD(LOAD(ldx, %o1 + 0x08, %g2), memc !! 268 EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) 278 EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPA !! 269 EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) 279 EX_LD(LOAD(ldx, %o1 + 0x18, %o4), memc !! 270 EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) 280 add %o1, 0x20, %o1 271 add %o1, 0x20, %o1 281 subcc %o5, 0x20, %o5 272 subcc %o5, 0x20, %o5 282 EX_ST(STORE(stx, %g1, %o0 + 0x00), mem !! 273 EX_ST(STORE(stx, %g1, %o0 + 0x00)) 283 EX_ST(STORE(stx, %g2, %o0 + 0x08), mem !! 274 EX_ST(STORE(stx, %g2, %o0 + 0x08)) 284 EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0 !! 275 EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) 285 EX_ST(STORE(stx, %o4, %o0 + 0x18), mem !! 276 EX_ST(STORE(stx, %o4, %o0 + 0x18)) 286 bne,pt %icc, 1b 277 bne,pt %icc, 1b 287 add %o0, 0x20, %o0 278 add %o0, 0x20, %o0 288 2: andcc %o2, 0x18, %o5 279 2: andcc %o2, 0x18, %o5 289 be,pt %icc, 3f 280 be,pt %icc, 3f 290 sub %o2, %o5, %o2 281 sub %o2, %o5, %o2 291 !! 282 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) 292 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memc << 293 add %o1, 0x08, %o1 283 add %o1, 0x08, %o1 294 add %o0, 0x08, %o0 284 add %o0, 0x08, %o0 295 subcc %o5, 0x08, %o5 285 subcc %o5, 0x08, %o5 296 bne,pt %icc, 1b 286 bne,pt %icc, 1b 297 EX_ST(STORE(stx, %g1, %o0 - 0x08), me !! 287 EX_ST(STORE(stx, %g1, %o0 - 0x08)) 298 3: brz,pt %o2, .Lexit 288 3: brz,pt %o2, .Lexit 299 cmp %o2, 0x04 289 cmp %o2, 0x04 300 bl,pn %icc, .Ltiny 290 bl,pn %icc, .Ltiny 301 nop 291 nop 302 EX_LD(LOAD(lduw, %o1 + 0x00, %g1), mem !! 292 EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) 303 add %o1, 0x04, %o1 293 add %o1, 0x04, %o1 304 add %o0, 0x04, %o0 294 add %o0, 0x04, %o0 305 subcc %o2, 0x04, %o2 295 subcc %o2, 0x04, %o2 306 bne,pn %icc, .Ltiny 296 bne,pn %icc, .Ltiny 307 EX_ST(STORE(stw, %g1, %o0 - 0x04), me !! 297 EX_ST(STORE(stw, %g1, %o0 - 0x04)) 308 ba,a,pt %icc, .Lexit 298 ba,a,pt %icc, .Lexit 309 .Lmedium_unaligned: 299 .Lmedium_unaligned: 310 /* First get dest 8 byte aligned. */ 300 /* First get dest 8 byte aligned. */ 311 sub %g0, %o0, %g1 301 sub %g0, %o0, %g1 312 and %g1, 0x7, %g1 302 and %g1, 0x7, %g1 313 brz,pt %g1, 2f 303 brz,pt %g1, 2f 314 sub %o2, %g1, %o2 304 sub %o2, %g1, %o2 315 305 316 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), mem !! 306 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) 317 add %o1, 1, %o1 307 add %o1, 1, %o1 318 subcc %g1, 1, %g1 308 subcc %g1, 1, %g1 319 add %o0, 1, %o0 309 add %o0, 1, %o0 320 bne,pt %icc, 1b 310 bne,pt %icc, 1b 321 EX_ST(STORE(stb, %g2, %o0 - 0x01), me !! 311 EX_ST(STORE(stb, %g2, %o0 - 0x01)) 322 2: 312 2: 323 and %o1, 0x7, %g1 313 and %o1, 0x7, %g1 324 brz,pn %g1, .Lmedium_noprefet 314 brz,pn %g1, .Lmedium_noprefetch 325 sll %g1, 3, %g1 315 sll %g1, 3, %g1 326 mov 64, %g2 316 mov 64, %g2 327 sub %g2, %g1, %g2 317 sub %g2, %g1, %g2 328 andn %o1, 0x7, %o1 318 andn %o1, 0x7, %o1 329 EX_LD(LOAD(ldx, %o1 + 0x00, %o4), memc !! 319 EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) 330 sllx %o4, %g1, %o4 320 sllx %o4, %g1, %o4 331 andn %o2, 0x08 - 1, %o5 321 andn %o2, 0x08 - 1, %o5 332 sub %o2, %o5, %o2 322 sub %o2, %o5, %o2 333 1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), memc !! 323 1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) 334 add %o1, 0x08, %o1 324 add %o1, 0x08, %o1 335 subcc %o5, 0x08, %o5 325 subcc %o5, 0x08, %o5 336 srlx %g3, %g2, GLOBAL_SPARE 326 srlx %g3, %g2, GLOBAL_SPARE 337 or GLOBAL_SPARE, %o4, GLO 327 or GLOBAL_SPARE, %o4, GLOBAL_SPARE 338 EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0 !! 328 EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) 339 add %o0, 0x08, %o0 329 add %o0, 0x08, %o0 340 bne,pt %icc, 1b 330 bne,pt %icc, 1b 341 sllx %g3, %g1, %o4 331 sllx %g3, %g1, %o4 342 srl %g1, 3, %g1 332 srl %g1, 3, %g1 343 add %o1, %g1, %o1 333 add %o1, %g1, %o1 344 brz,pn %o2, .Lexit 334 brz,pn %o2, .Lexit 345 nop 335 nop 346 ba,pt %icc, .Lsmall_unaligne 336 ba,pt %icc, .Lsmall_unaligned 347 337 348 .Ltiny: 338 .Ltiny: 349 EX_LD(LOAD(ldub, %o1 + 0x00, %g1), mem !! 339 EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) 350 subcc %o2, 1, %o2 340 subcc %o2, 1, %o2 351 be,pn %icc, .Lexit 341 be,pn %icc, .Lexit 352 EX_ST(STORE(stb, %g1, %o0 + 0x00), me !! 342 EX_ST(STORE(stb, %g1, %o0 + 0x00)) 353 EX_LD(LOAD(ldub, %o1 + 0x01, %g1), mem !! 343 EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) 354 subcc %o2, 1, %o2 344 subcc %o2, 1, %o2 355 be,pn %icc, .Lexit 345 be,pn %icc, .Lexit 356 EX_ST(STORE(stb, %g1, %o0 + 0x01), me !! 346 EX_ST(STORE(stb, %g1, %o0 + 0x01)) 357 EX_LD(LOAD(ldub, %o1 + 0x02, %g1), mem !! 347 EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) 358 ba,pt %icc, .Lexit 348 ba,pt %icc, .Lexit 359 EX_ST(STORE(stb, %g1, %o0 + 0x02), me !! 349 EX_ST(STORE(stb, %g1, %o0 + 0x02)) 360 350 361 .Lsmall: 351 .Lsmall: 362 andcc %g2, 0x3, %g0 352 andcc %g2, 0x3, %g0 363 bne,pn %icc, .Lsmall_unaligne 353 bne,pn %icc, .Lsmall_unaligned 364 andn %o2, 0x4 - 1, %o5 354 andn %o2, 0x4 - 1, %o5 365 sub %o2, %o5, %o2 355 sub %o2, %o5, %o2 366 1: 356 1: 367 EX_LD(LOAD(lduw, %o1 + 0x00, %g1), mem !! 357 EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) 368 add %o1, 0x04, %o1 358 add %o1, 0x04, %o1 369 subcc %o5, 0x04, %o5 359 subcc %o5, 0x04, %o5 370 add %o0, 0x04, %o0 360 add %o0, 0x04, %o0 371 bne,pt %icc, 1b 361 bne,pt %icc, 1b 372 EX_ST(STORE(stw, %g1, %o0 - 0x04), me !! 362 EX_ST(STORE(stw, %g1, %o0 - 0x04)) 373 brz,pt %o2, .Lexit 363 brz,pt %o2, .Lexit 374 nop 364 nop 375 ba,a,pt %icc, .Ltiny 365 ba,a,pt %icc, .Ltiny 376 366 377 .Lsmall_unaligned: 367 .Lsmall_unaligned: 378 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), mem !! 368 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) 379 add %o1, 1, %o1 369 add %o1, 1, %o1 380 add %o0, 1, %o0 370 add %o0, 1, %o0 381 subcc %o2, 1, %o2 371 subcc %o2, 1, %o2 382 bne,pt %icc, 1b 372 bne,pt %icc, 1b 383 EX_ST(STORE(stb, %g1, %o0 - 0x01), me !! 373 EX_ST(STORE(stb, %g1, %o0 - 0x01)) 384 ba,a,pt %icc, .Lexit 374 ba,a,pt %icc, .Lexit 385 nop << 386 .size FUNC_NAME, .-FUNC_NAME 375 .size FUNC_NAME, .-FUNC_NAME
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.