1 /* SPDX-License-Identifier: GPL-2.0-only */ !! 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* !! 2 /* linux/arch/sparc/lib/memset.S: Sparc optimized memset, bzero and clear_user code 3 * Copyright (C) 2013 ARM Ltd. !! 3 * Copyright (C) 1991,1996 Free Software Foundation 4 * Copyright (C) 2013 Linaro. !! 4 * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) >> 5 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 5 * 6 * 6 * This code is based on glibc cortex strings !! 7 * Calls to memset returns initial %o0. Calls to bzero returns 0, if ok, and 7 * be found @ !! 8 * number of bytes not yet set if exception occurs and we were called as 8 * !! 9 * clear_user. 9 * http://bazaar.launchpad.net/~linaro-toolcha << 10 * files/head:/src/aarch64/ << 11 */ 10 */ 12 11 13 #include <linux/linkage.h> !! 12 #include <asm/ptrace.h> 14 #include <asm/assembler.h> !! 13 #include <asm/export.h> 15 #include <asm/cache.h> << 16 14 17 /* !! 15 /* Work around cpp -rob */ 18 * Fill in the buffer with character c (alignm !! 16 #define ALLOC #alloc 19 * !! 17 #define EXECINSTR #execinstr 20 * Parameters: !! 18 #define EX(x,y,a,b) \ 21 * x0 - buf !! 19 98: x,y; \ 22 * x1 - c !! 20 .section .fixup,ALLOC,EXECINSTR; \ 23 * x2 - n !! 21 .align 4; \ 24 * Returns: !! 22 99: ba 30f; \ 25 * x0 - buf !! 23 a, b, %o0; \ 26 */ !! 24 .section __ex_table,ALLOC; \ >> 25 .align 4; \ >> 26 .word 98b, 99b; \ >> 27 .text; \ >> 28 .align 4 >> 29 >> 30 #define EXT(start,end,handler) \ >> 31 .section __ex_table,ALLOC; \ >> 32 .align 4; \ >> 33 .word start, 0, end, handler; \ >> 34 .text; \ >> 35 .align 4 >> 36 >> 37 /* Please don't change these macros, unless you change the logic >> 38 * in the .fixup section below as well. >> 39 * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */ >> 40 #define ZERO_BIG_BLOCK(base, offset, source) \ >> 41 std source, [base + offset + 0x00]; \ >> 42 std source, [base + offset + 0x08]; \ >> 43 std source, [base + offset + 0x10]; \ >> 44 std source, [base + offset + 0x18]; \ >> 45 std source, [base + offset + 0x20]; \ >> 46 std source, [base + offset + 0x28]; \ >> 47 std source, [base + offset + 0x30]; \ >> 48 std source, [base + offset + 0x38]; >> 49 >> 50 #define ZERO_LAST_BLOCKS(base, offset, source) \ >> 51 std source, [base - offset - 0x38]; \ >> 52 std source, [base - offset - 0x30]; \ >> 53 std source, [base - offset - 0x28]; \ >> 54 std source, [base - offset - 0x20]; \ >> 55 std source, [base - offset - 0x18]; \ >> 56 std source, [base - offset - 0x10]; \ >> 57 std source, [base - offset - 0x08]; \ >> 58 std source, [base - offset - 0x00]; >> 59 >> 60 .text >> 61 .align 4 >> 62 >> 63 .globl __bzero_begin >> 64 __bzero_begin: >> 65 >> 66 .globl __bzero >> 67 .type __bzero,#function >> 68 .globl memset >> 69 EXPORT_SYMBOL(__bzero) >> 70 EXPORT_SYMBOL(memset) >> 71 .globl __memset_start, __memset_end >> 72 __memset_start: >> 73 memset: >> 74 mov %o0, %g1 >> 75 mov 1, %g4 >> 76 and %o1, 0xff, %g3 >> 77 sll %g3, 8, %g2 >> 78 or %g3, %g2, %g3 >> 79 sll %g3, 16, %g2 >> 80 or %g3, %g2, %g3 >> 81 b 1f >> 82 mov %o2, %o1 >> 83 3: >> 84 cmp %o2, 3 >> 85 be 2f >> 86 EX(stb %g3, [%o0], sub %o1, 0) >> 87 >> 88 cmp %o2, 2 >> 89 be 2f >> 90 EX(stb %g3, [%o0 + 0x01], sub %o1, 1) 27 91 28 dstin .req x0 !! 92 EX(stb %g3, [%o0 + 0x02], sub %o1, 2) 29 val .req w1 << 30 count .req x2 << 31 tmp1 .req x3 << 32 tmp1w .req w3 << 33 tmp2 .req x4 << 34 tmp2w .req w4 << 35 zva_len_x .req x5 << 36 zva_len .req w5 << 37 zva_bits_x .req x6 << 38 << 39 A_l .req x7 << 40 A_lw .req w7 << 41 dst .req x8 << 42 tmp3w .req w9 << 43 tmp3 .req x9 << 44 << 45 SYM_FUNC_START(__pi_memset) << 46 mov dst, dstin /* Preserve re << 47 and A_lw, val, #255 << 48 orr A_lw, A_lw, A_lw, lsl #8 << 49 orr A_lw, A_lw, A_lw, lsl #16 << 50 orr A_l, A_l, A_l, lsl #32 << 51 << 52 cmp count, #15 << 53 b.hi .Lover16_proc << 54 /*All store maybe are non-aligned..*/ << 55 tbz count, #3, 1f << 56 str A_l, [dst], #8 << 57 1: << 58 tbz count, #2, 2f << 59 str A_lw, [dst], #4 << 60 2: 93 2: 61 tbz count, #1, 3f !! 94 sub %o2, 4, %o2 62 strh A_lw, [dst], #2 !! 95 add %o1, %o2, %o1 63 3: !! 96 b 4f 64 tbz count, #0, 4f !! 97 sub %o0, %o2, %o0 65 strb A_lw, [dst] !! 98 >> 99 __bzero: >> 100 clr %g4 >> 101 mov %g0, %g3 >> 102 1: >> 103 cmp %o1, 7 >> 104 bleu 7f >> 105 andcc %o0, 3, %o2 >> 106 >> 107 bne 3b 66 4: 108 4: 67 ret !! 109 andcc %o0, 4, %g0 68 110 69 .Lover16_proc: !! 111 be 2f 70 /*Whether the start address is aligne !! 112 mov %g3, %g2 71 neg tmp2, dst !! 113 72 ands tmp2, tmp2, #15 !! 114 EX(st %g3, [%o0], sub %o1, 0) 73 b.eq .Laligned !! 115 sub %o1, 4, %o1 74 /* !! 116 add %o0, 4, %o0 75 * The count is not less than 16, we can use st << 76 * then adjust the dst aligned with 16.This pro << 77 * memory address at alignment boundary. << 78 */ << 79 stp A_l, A_l, [dst] /*non-aligned << 80 /*make the dst aligned..*/ << 81 sub count, count, tmp2 << 82 add dst, dst, tmp2 << 83 << 84 .Laligned: << 85 cbz A_l, .Lzero_mem << 86 << 87 .Ltail_maybe_long: << 88 cmp count, #64 << 89 b.ge .Lnot_short << 90 .Ltail63: << 91 ands tmp1, count, #0x30 << 92 b.eq 3f << 93 cmp tmp1w, #0x20 << 94 b.eq 1f << 95 b.lt 2f << 96 stp A_l, A_l, [dst], #16 << 97 1: << 98 stp A_l, A_l, [dst], #16 << 99 2: 117 2: 100 stp A_l, A_l, [dst], #16 !! 118 andcc %o1, 0xffffff80, %o3 ! Now everything is 8 aligned and o1 is len to run 101 /* !! 119 be 9f 102 * The last store length is less than 16,use st !! 120 andcc %o1, 0x78, %o2 103 * It will lead some bytes written twice and th !! 121 10: 104 */ !! 122 ZERO_BIG_BLOCK(%o0, 0x00, %g2) 105 3: !! 123 subcc %o3, 128, %o3 106 ands count, count, #15 !! 124 ZERO_BIG_BLOCK(%o0, 0x40, %g2) 107 cbz count, 4f !! 125 11: 108 add dst, dst, count !! 126 EXT(10b, 11b, 20f) 109 stp A_l, A_l, [dst, #-16] /* Rep !! 127 bne 10b 110 4: !! 128 add %o0, 128, %o0 111 ret !! 129 >> 130 orcc %o2, %g0, %g0 >> 131 9: >> 132 be 13f >> 133 andcc %o1, 7, %o1 >> 134 >> 135 srl %o2, 1, %o3 >> 136 set 13f, %o4 >> 137 sub %o4, %o3, %o4 >> 138 jmp %o4 >> 139 add %o0, %o2, %o0 >> 140 >> 141 12: >> 142 ZERO_LAST_BLOCKS(%o0, 0x48, %g2) >> 143 ZERO_LAST_BLOCKS(%o0, 0x08, %g2) >> 144 13: >> 145 be 8f >> 146 andcc %o1, 4, %g0 >> 147 >> 148 be 1f >> 149 andcc %o1, 2, %g0 112 150 113 /* !! 151 EX(st %g3, [%o0], and %o1, 7) 114 * Critical loop. Start at a new cache !! 152 add %o0, 4, %o0 115 * 64 bytes per line, this ensures the << 116 */ << 117 .p2align L1_CACHE_SHIFT << 118 .Lnot_short: << 119 sub dst, dst, #16/* Pre-bias. */ << 120 sub count, count, #64 << 121 1: 153 1: 122 stp A_l, A_l, [dst, #16] !! 154 be 1f 123 stp A_l, A_l, [dst, #32] !! 155 andcc %o1, 1, %g0 124 stp A_l, A_l, [dst, #48] << 125 stp A_l, A_l, [dst, #64]! << 126 subs count, count, #64 << 127 b.ge 1b << 128 tst count, #0x3f << 129 add dst, dst, #16 << 130 b.ne .Ltail63 << 131 .Lexitfunc: << 132 ret << 133 156 134 /* !! 157 EX(sth %g3, [%o0], and %o1, 3) 135 * For zeroing memory, check to see if !! 158 add %o0, 2, %o0 136 * zero entire 'cache' lines. << 137 */ << 138 .Lzero_mem: << 139 cmp count, #63 << 140 b.le .Ltail63 << 141 /* << 142 * For zeroing small amounts of memory, << 143 * the line-clear code. << 144 */ << 145 cmp count, #128 << 146 b.lt .Lnot_short /*count is at leas << 147 << 148 mrs tmp1, dczid_el0 << 149 tbnz tmp1, #4, .Lnot_short << 150 mov tmp3w, #4 << 151 and zva_len, tmp1w, #15 /* Saf << 152 lsl zva_len, tmp3w, zva_len << 153 << 154 ands tmp3w, zva_len, #63 << 155 /* << 156 * ensure the zva_len is not less than << 157 * It is not meaningful to use ZVA if t << 158 */ << 159 b.ne .Lnot_short << 160 .Lzero_by_line: << 161 /* << 162 * Compute how far we need to go to bec << 163 * already at quad-word alignment. << 164 */ << 165 cmp count, zva_len_x << 166 b.lt .Lnot_short /* Not << 167 sub zva_bits_x, zva_len_x, #1 << 168 neg tmp2, dst << 169 ands tmp2, tmp2, zva_bits_x << 170 b.eq 2f /* Alr << 171 /* Not aligned, check that there's eno << 172 sub tmp1, count, tmp2 << 173 /* << 174 * grantee the remain length to be ZVA << 175 * avoid to make the 2f's process over << 176 cmp tmp1, #64 << 177 ccmp tmp1, zva_len_x, #8, ge /* NZC << 178 b.lt .Lnot_short << 179 /* << 180 * We know that there's at least 64 byt << 181 * to overrun by 64 bytes. << 182 */ << 183 mov count, tmp1 << 184 1: 159 1: 185 stp A_l, A_l, [dst] !! 160 bne,a 8f 186 stp A_l, A_l, [dst, #16] !! 161 EX(stb %g3, [%o0], and %o1, 1) 187 stp A_l, A_l, [dst, #32] !! 162 8: 188 subs tmp2, tmp2, #64 !! 163 b 0f 189 stp A_l, A_l, [dst, #48] !! 164 nop 190 add dst, dst, #64 !! 165 7: 191 b.ge 1b !! 166 be 13b 192 /* We've overrun a bit, so adjust dst !! 167 orcc %o1, 0, %g0 193 add dst, dst, tmp2 !! 168 194 2: !! 169 be 0f 195 sub count, count, zva_len_x !! 170 8: 196 3: !! 171 add %o0, 1, %o0 197 dc zva, dst !! 172 subcc %o1, 1, %o1 198 add dst, dst, zva_len_x !! 173 bne 8b 199 subs count, count, zva_len_x !! 174 EX(stb %g3, [%o0 - 1], add %o1, 1) 200 b.ge 3b !! 175 0: 201 ands count, count, zva_bits_x !! 176 andcc %g4, 1, %g0 202 b.ne .Ltail_maybe_long !! 177 be 5f >> 178 nop >> 179 retl >> 180 mov %g1, %o0 >> 181 5: >> 182 retl >> 183 clr %o0 >> 184 __memset_end: >> 185 >> 186 .section .fixup,#alloc,#execinstr >> 187 .align 4 >> 188 20: >> 189 cmp %g2, 8 >> 190 bleu 1f >> 191 and %o1, 0x7f, %o1 >> 192 sub %g2, 9, %g2 >> 193 add %o3, 64, %o3 >> 194 1: >> 195 sll %g2, 3, %g2 >> 196 add %o3, %o1, %o0 >> 197 b 30f >> 198 sub %o0, %g2, %o0 >> 199 21: >> 200 mov 8, %o0 >> 201 and %o1, 7, %o1 >> 202 sub %o0, %g2, %o0 >> 203 sll %o0, 3, %o0 >> 204 b 30f >> 205 add %o0, %o1, %o0 >> 206 30: >> 207 /* %o4 is faulting address, %o5 is %pc where fault occurred */ >> 208 save %sp, -104, %sp >> 209 mov %i5, %o0 >> 210 mov %i7, %o1 >> 211 call lookup_fault >> 212 mov %i4, %o2 203 ret 213 ret 204 SYM_FUNC_END(__pi_memset) !! 214 restore 205 << 206 SYM_FUNC_ALIAS(__memset, __pi_memset) << 207 EXPORT_SYMBOL(__memset) << 208 215 209 SYM_FUNC_ALIAS_WEAK(memset, __pi_memset) !! 216 .globl __bzero_end 210 EXPORT_SYMBOL(memset) !! 217 __bzero_end:
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.