1 /* SPDX-License-Identifier: GPL-2.0-only */ !! 1 /* linux/arch/sparc/lib/memset.S: Sparc optimized memset, bzero and clear_user code 2 /* !! 2 * Copyright (C) 1991,1996 Free Software Foundation 3 * Copyright (C) 2013 ARM Ltd. !! 3 * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 4 * Copyright (C) 2013 Linaro. !! 4 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 5 * 5 * 6 * This code is based on glibc cortex strings !! 6 * Returns 0, if ok, and number of bytes not yet set if exception 7 * be found @ !! 7 * occurs and we were called as clear_user. 8 * << 9 * http://bazaar.launchpad.net/~linaro-toolcha << 10 * files/head:/src/aarch64/ << 11 */ 8 */ 12 9 13 #include <linux/linkage.h> !! 10 #include <asm/ptrace.h> 14 #include <asm/assembler.h> << 15 #include <asm/cache.h> << 16 11 17 /* !! 12 /* Work around cpp -rob */ 18 * Fill in the buffer with character c (alignm !! 13 #define ALLOC #alloc 19 * !! 14 #define EXECINSTR #execinstr 20 * Parameters: !! 15 #define EX(x,y,a,b) \ 21 * x0 - buf !! 16 98: x,y; \ 22 * x1 - c !! 17 .section .fixup,ALLOC,EXECINSTR; \ 23 * x2 - n !! 18 .align 4; \ 24 * Returns: !! 19 99: ba 30f; \ 25 * x0 - buf !! 20 a, b, %o0; \ 26 */ !! 21 .section __ex_table,ALLOC; \ >> 22 .align 4; \ >> 23 .word 98b, 99b; \ >> 24 .text; \ >> 25 .align 4 >> 26 >> 27 #define EXT(start,end,handler) \ >> 28 .section __ex_table,ALLOC; \ >> 29 .align 4; \ >> 30 .word start, 0, end, handler; \ >> 31 .text; \ >> 32 .align 4 >> 33 >> 34 /* Please don't change these macros, unless you change the logic >> 35 * in the .fixup section below as well. >> 36 * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */ >> 37 #define ZERO_BIG_BLOCK(base, offset, source) \ >> 38 std source, [base + offset + 0x00]; \ >> 39 std source, [base + offset + 0x08]; \ >> 40 std source, [base + offset + 0x10]; \ >> 41 std source, [base + offset + 0x18]; \ >> 42 std source, [base + offset + 0x20]; \ >> 43 std source, [base + offset + 0x28]; \ >> 44 std source, [base + offset + 0x30]; \ >> 45 std source, [base + offset + 0x38]; >> 46 >> 47 #define ZERO_LAST_BLOCKS(base, offset, source) \ >> 48 std source, [base - offset - 0x38]; \ >> 49 std source, [base - offset - 0x30]; \ >> 50 std source, [base - offset - 0x28]; \ >> 51 std source, [base - offset - 0x20]; \ >> 52 std source, [base - offset - 0x18]; \ >> 53 std source, [base - offset - 0x10]; \ >> 54 std source, [base - offset - 0x08]; \ >> 55 std source, [base - offset - 0x00]; >> 56 >> 57 .text >> 58 .align 4 >> 59 >> 60 .globl __bzero_begin >> 61 __bzero_begin: >> 62 >> 63 .globl __bzero >> 64 .globl memset >> 65 .globl __memset_start, __memset_end >> 66 __memset_start: >> 67 memset: >> 68 and %o1, 0xff, %g3 >> 69 sll %g3, 8, %g2 >> 70 or %g3, %g2, %g3 >> 71 sll %g3, 16, %g2 >> 72 or %g3, %g2, %g3 >> 73 b 1f >> 74 mov %o2, %o1 >> 75 3: >> 76 cmp %o2, 3 >> 77 be 2f >> 78 EX(stb %g3, [%o0], sub %o1, 0) >> 79 >> 80 cmp %o2, 2 >> 81 be 2f >> 82 EX(stb %g3, [%o0 + 0x01], sub %o1, 1) 27 83 28 dstin .req x0 !! 84 EX(stb %g3, [%o0 + 0x02], sub %o1, 2) 29 val .req w1 << 30 count .req x2 << 31 tmp1 .req x3 << 32 tmp1w .req w3 << 33 tmp2 .req x4 << 34 tmp2w .req w4 << 35 zva_len_x .req x5 << 36 zva_len .req w5 << 37 zva_bits_x .req x6 << 38 << 39 A_l .req x7 << 40 A_lw .req w7 << 41 dst .req x8 << 42 tmp3w .req w9 << 43 tmp3 .req x9 << 44 << 45 SYM_FUNC_START(__pi_memset) << 46 mov dst, dstin /* Preserve re << 47 and A_lw, val, #255 << 48 orr A_lw, A_lw, A_lw, lsl #8 << 49 orr A_lw, A_lw, A_lw, lsl #16 << 50 orr A_l, A_l, A_l, lsl #32 << 51 << 52 cmp count, #15 << 53 b.hi .Lover16_proc << 54 /*All store maybe are non-aligned..*/ << 55 tbz count, #3, 1f << 56 str A_l, [dst], #8 << 57 1: << 58 tbz count, #2, 2f << 59 str A_lw, [dst], #4 << 60 2: 85 2: 61 tbz count, #1, 3f !! 86 sub %o2, 4, %o2 62 strh A_lw, [dst], #2 !! 87 add %o1, %o2, %o1 63 3: !! 88 b 4f 64 tbz count, #0, 4f !! 89 sub %o0, %o2, %o0 65 strb A_lw, [dst] << 66 4: << 67 ret << 68 90 69 .Lover16_proc: !! 91 __bzero: 70 /*Whether the start address is aligne !! 92 mov %g0, %g3 71 neg tmp2, dst << 72 ands tmp2, tmp2, #15 << 73 b.eq .Laligned << 74 /* << 75 * The count is not less than 16, we can use st << 76 * then adjust the dst aligned with 16.This pro << 77 * memory address at alignment boundary. << 78 */ << 79 stp A_l, A_l, [dst] /*non-aligned << 80 /*make the dst aligned..*/ << 81 sub count, count, tmp2 << 82 add dst, dst, tmp2 << 83 << 84 .Laligned: << 85 cbz A_l, .Lzero_mem << 86 << 87 .Ltail_maybe_long: << 88 cmp count, #64 << 89 b.ge .Lnot_short << 90 .Ltail63: << 91 ands tmp1, count, #0x30 << 92 b.eq 3f << 93 cmp tmp1w, #0x20 << 94 b.eq 1f << 95 b.lt 2f << 96 stp A_l, A_l, [dst], #16 << 97 1: 93 1: 98 stp A_l, A_l, [dst], #16 !! 94 cmp %o1, 7 99 2: !! 95 bleu 7f 100 stp A_l, A_l, [dst], #16 !! 96 andcc %o0, 3, %o2 101 /* !! 97 102 * The last store length is less than 16,use st !! 98 bne 3b 103 * It will lead some bytes written twice and th << 104 */ << 105 3: << 106 ands count, count, #15 << 107 cbz count, 4f << 108 add dst, dst, count << 109 stp A_l, A_l, [dst, #-16] /* Rep << 110 4: 99 4: 111 ret !! 100 andcc %o0, 4, %g0 >> 101 >> 102 be 2f >> 103 mov %g3, %g2 >> 104 >> 105 EX(st %g3, [%o0], sub %o1, 0) >> 106 sub %o1, 4, %o1 >> 107 add %o0, 4, %o0 >> 108 2: >> 109 andcc %o1, 0xffffff80, %o3 ! Now everything is 8 aligned and o1 is len to run >> 110 be 9f >> 111 andcc %o1, 0x78, %o2 >> 112 10: >> 113 ZERO_BIG_BLOCK(%o0, 0x00, %g2) >> 114 subcc %o3, 128, %o3 >> 115 ZERO_BIG_BLOCK(%o0, 0x40, %g2) >> 116 11: >> 117 EXT(10b, 11b, 20f) >> 118 bne 10b >> 119 add %o0, 128, %o0 >> 120 >> 121 orcc %o2, %g0, %g0 >> 122 9: >> 123 be 13f >> 124 andcc %o1, 7, %o1 >> 125 >> 126 srl %o2, 1, %o3 >> 127 set 13f, %o4 >> 128 sub %o4, %o3, %o4 >> 129 jmp %o4 >> 130 add %o0, %o2, %o0 >> 131 >> 132 12: >> 133 ZERO_LAST_BLOCKS(%o0, 0x48, %g2) >> 134 ZERO_LAST_BLOCKS(%o0, 0x08, %g2) >> 135 13: >> 136 be 8f >> 137 andcc %o1, 4, %g0 >> 138 >> 139 be 1f >> 140 andcc %o1, 2, %g0 112 141 113 /* !! 142 EX(st %g3, [%o0], and %o1, 7) 114 * Critical loop. Start at a new cache !! 143 add %o0, 4, %o0 115 * 64 bytes per line, this ensures the << 116 */ << 117 .p2align L1_CACHE_SHIFT << 118 .Lnot_short: << 119 sub dst, dst, #16/* Pre-bias. */ << 120 sub count, count, #64 << 121 1: 144 1: 122 stp A_l, A_l, [dst, #16] !! 145 be 1f 123 stp A_l, A_l, [dst, #32] !! 146 andcc %o1, 1, %g0 124 stp A_l, A_l, [dst, #48] << 125 stp A_l, A_l, [dst, #64]! << 126 subs count, count, #64 << 127 b.ge 1b << 128 tst count, #0x3f << 129 add dst, dst, #16 << 130 b.ne .Ltail63 << 131 .Lexitfunc: << 132 ret << 133 147 134 /* !! 148 EX(sth %g3, [%o0], and %o1, 3) 135 * For zeroing memory, check to see if !! 149 add %o0, 2, %o0 136 * zero entire 'cache' lines. << 137 */ << 138 .Lzero_mem: << 139 cmp count, #63 << 140 b.le .Ltail63 << 141 /* << 142 * For zeroing small amounts of memory, << 143 * the line-clear code. << 144 */ << 145 cmp count, #128 << 146 b.lt .Lnot_short /*count is at leas << 147 << 148 mrs tmp1, dczid_el0 << 149 tbnz tmp1, #4, .Lnot_short << 150 mov tmp3w, #4 << 151 and zva_len, tmp1w, #15 /* Saf << 152 lsl zva_len, tmp3w, zva_len << 153 << 154 ands tmp3w, zva_len, #63 << 155 /* << 156 * ensure the zva_len is not less than << 157 * It is not meaningful to use ZVA if t << 158 */ << 159 b.ne .Lnot_short << 160 .Lzero_by_line: << 161 /* << 162 * Compute how far we need to go to bec << 163 * already at quad-word alignment. << 164 */ << 165 cmp count, zva_len_x << 166 b.lt .Lnot_short /* Not << 167 sub zva_bits_x, zva_len_x, #1 << 168 neg tmp2, dst << 169 ands tmp2, tmp2, zva_bits_x << 170 b.eq 2f /* Alr << 171 /* Not aligned, check that there's eno << 172 sub tmp1, count, tmp2 << 173 /* << 174 * grantee the remain length to be ZVA << 175 * avoid to make the 2f's process over << 176 cmp tmp1, #64 << 177 ccmp tmp1, zva_len_x, #8, ge /* NZC << 178 b.lt .Lnot_short << 179 /* << 180 * We know that there's at least 64 byt << 181 * to overrun by 64 bytes. << 182 */ << 183 mov count, tmp1 << 184 1: 150 1: 185 stp A_l, A_l, [dst] !! 151 bne,a 8f 186 stp A_l, A_l, [dst, #16] !! 152 EX(stb %g3, [%o0], and %o1, 1) 187 stp A_l, A_l, [dst, #32] !! 153 8: 188 subs tmp2, tmp2, #64 !! 154 retl 189 stp A_l, A_l, [dst, #48] !! 155 clr %o0 190 add dst, dst, #64 !! 156 7: 191 b.ge 1b !! 157 be 13b 192 /* We've overrun a bit, so adjust dst !! 158 orcc %o1, 0, %g0 193 add dst, dst, tmp2 !! 159 194 2: !! 160 be 0f 195 sub count, count, zva_len_x !! 161 8: 196 3: !! 162 add %o0, 1, %o0 197 dc zva, dst !! 163 subcc %o1, 1, %o1 198 add dst, dst, zva_len_x !! 164 bne 8b 199 subs count, count, zva_len_x !! 165 EX(stb %g3, [%o0 - 1], add %o1, 1) 200 b.ge 3b !! 166 0: 201 ands count, count, zva_bits_x !! 167 retl 202 b.ne .Ltail_maybe_long !! 168 clr %o0 >> 169 __memset_end: >> 170 >> 171 .section .fixup,#alloc,#execinstr >> 172 .align 4 >> 173 20: >> 174 cmp %g2, 8 >> 175 bleu 1f >> 176 and %o1, 0x7f, %o1 >> 177 sub %g2, 9, %g2 >> 178 add %o3, 64, %o3 >> 179 1: >> 180 sll %g2, 3, %g2 >> 181 add %o3, %o1, %o0 >> 182 b 30f >> 183 sub %o0, %g2, %o0 >> 184 21: >> 185 mov 8, %o0 >> 186 and %o1, 7, %o1 >> 187 sub %o0, %g2, %o0 >> 188 sll %o0, 3, %o0 >> 189 b 30f >> 190 add %o0, %o1, %o0 >> 191 30: >> 192 /* %o4 is faulting address, %o5 is %pc where fault occurred */ >> 193 save %sp, -104, %sp >> 194 mov %i5, %o0 >> 195 mov %i7, %o1 >> 196 call lookup_fault >> 197 mov %i4, %o2 203 ret 198 ret 204 SYM_FUNC_END(__pi_memset) !! 199 restore 205 << 206 SYM_FUNC_ALIAS(__memset, __pi_memset) << 207 EXPORT_SYMBOL(__memset) << 208 200 209 SYM_FUNC_ALIAS_WEAK(memset, __pi_memset) !! 201 .globl __bzero_end 210 EXPORT_SYMBOL(memset) !! 202 __bzero_end:
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.