1 /* SPDX-License-Identifier: GPL-2.0-only */ !! 1 /* linux/arch/sparc/lib/memset.S: Sparc optimized memset, bzero and clear_user code 2 /* !! 2 * Copyright (C) 1991,1996 Free Software Foundation 3 * Copyright (C) 2013 ARM Ltd. !! 3 * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 4 * Copyright (C) 2013 Linaro. !! 4 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 5 * 5 * 6 * This code is based on glibc cortex strings !! 6 * Calls to memset returns initial %o0. Calls to bzero returns 0, if ok, and 7 * be found @ !! 7 * number of bytes not yet set if exception occurs and we were called as 8 * !! 8 * clear_user. 9 * http://bazaar.launchpad.net/~linaro-toolcha << 10 * files/head:/src/aarch64/ << 11 */ 9 */ 12 10 13 #include <linux/linkage.h> !! 11 #include <asm/ptrace.h> 14 #include <asm/assembler.h> !! 12 #include <asm/export.h> 15 #include <asm/cache.h> << 16 13 17 /* !! 14 /* Work around cpp -rob */ 18 * Fill in the buffer with character c (alignm !! 15 #define ALLOC #alloc 19 * !! 16 #define EXECINSTR #execinstr 20 * Parameters: !! 17 #define EX(x,y,a,b) \ 21 * x0 - buf !! 18 98: x,y; \ 22 * x1 - c !! 19 .section .fixup,ALLOC,EXECINSTR; \ 23 * x2 - n !! 20 .align 4; \ 24 * Returns: !! 21 99: ba 30f; \ 25 * x0 - buf !! 22 a, b, %o0; \ 26 */ !! 23 .section __ex_table,ALLOC; \ >> 24 .align 4; \ >> 25 .word 98b, 99b; \ >> 26 .text; \ >> 27 .align 4 >> 28 >> 29 #define EXT(start,end,handler) \ >> 30 .section __ex_table,ALLOC; \ >> 31 .align 4; \ >> 32 .word start, 0, end, handler; \ >> 33 .text; \ >> 34 .align 4 >> 35 >> 36 /* Please don't change these macros, unless you change the logic >> 37 * in the .fixup section below as well. >> 38 * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */ >> 39 #define ZERO_BIG_BLOCK(base, offset, source) \ >> 40 std source, [base + offset + 0x00]; \ >> 41 std source, [base + offset + 0x08]; \ >> 42 std source, [base + offset + 0x10]; \ >> 43 std source, [base + offset + 0x18]; \ >> 44 std source, [base + offset + 0x20]; \ >> 45 std source, [base + offset + 0x28]; \ >> 46 std source, [base + offset + 0x30]; \ >> 47 std source, [base + offset + 0x38]; >> 48 >> 49 #define ZERO_LAST_BLOCKS(base, offset, source) \ >> 50 std source, [base - offset - 0x38]; \ >> 51 std source, [base - offset - 0x30]; \ >> 52 std source, [base - offset - 0x28]; \ >> 53 std source, [base - offset - 0x20]; \ >> 54 std source, [base - offset - 0x18]; \ >> 55 std source, [base - offset - 0x10]; \ >> 56 std source, [base - offset - 0x08]; \ >> 57 std source, [base - offset - 0x00]; >> 58 >> 59 .text >> 60 .align 4 >> 61 >> 62 .globl __bzero_begin >> 63 __bzero_begin: >> 64 >> 65 .globl __bzero >> 66 .type __bzero,#function >> 67 .globl memset >> 68 EXPORT_SYMBOL(__bzero) >> 69 EXPORT_SYMBOL(memset) >> 70 .globl __memset_start, __memset_end >> 71 __memset_start: >> 72 memset: >> 73 mov %o0, %g1 >> 74 mov 1, %g4 >> 75 and %o1, 0xff, %g3 >> 76 sll %g3, 8, %g2 >> 77 or %g3, %g2, %g3 >> 78 sll %g3, 16, %g2 >> 79 or %g3, %g2, %g3 >> 80 b 1f >> 81 mov %o2, %o1 >> 82 3: >> 83 cmp %o2, 3 >> 84 be 2f >> 85 EX(stb %g3, [%o0], sub %o1, 0) >> 86 >> 87 cmp %o2, 2 >> 88 be 2f >> 89 EX(stb %g3, [%o0 + 0x01], sub %o1, 1) 27 90 28 dstin .req x0 !! 91 EX(stb %g3, [%o0 + 0x02], sub %o1, 2) 29 val .req w1 << 30 count .req x2 << 31 tmp1 .req x3 << 32 tmp1w .req w3 << 33 tmp2 .req x4 << 34 tmp2w .req w4 << 35 zva_len_x .req x5 << 36 zva_len .req w5 << 37 zva_bits_x .req x6 << 38 << 39 A_l .req x7 << 40 A_lw .req w7 << 41 dst .req x8 << 42 tmp3w .req w9 << 43 tmp3 .req x9 << 44 << 45 SYM_FUNC_START(__pi_memset) << 46 mov dst, dstin /* Preserve re << 47 and A_lw, val, #255 << 48 orr A_lw, A_lw, A_lw, lsl #8 << 49 orr A_lw, A_lw, A_lw, lsl #16 << 50 orr A_l, A_l, A_l, lsl #32 << 51 << 52 cmp count, #15 << 53 b.hi .Lover16_proc << 54 /*All store maybe are non-aligned..*/ << 55 tbz count, #3, 1f << 56 str A_l, [dst], #8 << 57 1: << 58 tbz count, #2, 2f << 59 str A_lw, [dst], #4 << 60 2: 92 2: 61 tbz count, #1, 3f !! 93 sub %o2, 4, %o2 62 strh A_lw, [dst], #2 !! 94 add %o1, %o2, %o1 63 3: !! 95 b 4f 64 tbz count, #0, 4f !! 96 sub %o0, %o2, %o0 65 strb A_lw, [dst] !! 97 >> 98 __bzero: >> 99 clr %g4 >> 100 mov %g0, %g3 >> 101 1: >> 102 cmp %o1, 7 >> 103 bleu 7f >> 104 andcc %o0, 3, %o2 >> 105 >> 106 bne 3b 66 4: 107 4: 67 ret !! 108 andcc %o0, 4, %g0 68 109 69 .Lover16_proc: !! 110 be 2f 70 /*Whether the start address is aligne !! 111 mov %g3, %g2 71 neg tmp2, dst !! 112 72 ands tmp2, tmp2, #15 !! 113 EX(st %g3, [%o0], sub %o1, 0) 73 b.eq .Laligned !! 114 sub %o1, 4, %o1 74 /* !! 115 add %o0, 4, %o0 75 * The count is not less than 16, we can use st << 76 * then adjust the dst aligned with 16.This pro << 77 * memory address at alignment boundary. << 78 */ << 79 stp A_l, A_l, [dst] /*non-aligned << 80 /*make the dst aligned..*/ << 81 sub count, count, tmp2 << 82 add dst, dst, tmp2 << 83 << 84 .Laligned: << 85 cbz A_l, .Lzero_mem << 86 << 87 .Ltail_maybe_long: << 88 cmp count, #64 << 89 b.ge .Lnot_short << 90 .Ltail63: << 91 ands tmp1, count, #0x30 << 92 b.eq 3f << 93 cmp tmp1w, #0x20 << 94 b.eq 1f << 95 b.lt 2f << 96 stp A_l, A_l, [dst], #16 << 97 1: << 98 stp A_l, A_l, [dst], #16 << 99 2: 116 2: 100 stp A_l, A_l, [dst], #16 !! 117 andcc %o1, 0xffffff80, %o3 ! Now everything is 8 aligned and o1 is len to run 101 /* !! 118 be 9f 102 * The last store length is less than 16,use st !! 119 andcc %o1, 0x78, %o2 103 * It will lead some bytes written twice and th !! 120 10: 104 */ !! 121 ZERO_BIG_BLOCK(%o0, 0x00, %g2) 105 3: !! 122 subcc %o3, 128, %o3 106 ands count, count, #15 !! 123 ZERO_BIG_BLOCK(%o0, 0x40, %g2) 107 cbz count, 4f !! 124 11: 108 add dst, dst, count !! 125 EXT(10b, 11b, 20f) 109 stp A_l, A_l, [dst, #-16] /* Rep !! 126 bne 10b 110 4: !! 127 add %o0, 128, %o0 111 ret !! 128 >> 129 orcc %o2, %g0, %g0 >> 130 9: >> 131 be 13f >> 132 andcc %o1, 7, %o1 >> 133 >> 134 srl %o2, 1, %o3 >> 135 set 13f, %o4 >> 136 sub %o4, %o3, %o4 >> 137 jmp %o4 >> 138 add %o0, %o2, %o0 >> 139 >> 140 12: >> 141 ZERO_LAST_BLOCKS(%o0, 0x48, %g2) >> 142 ZERO_LAST_BLOCKS(%o0, 0x08, %g2) >> 143 13: >> 144 be 8f >> 145 andcc %o1, 4, %g0 >> 146 >> 147 be 1f >> 148 andcc %o1, 2, %g0 112 149 113 /* !! 150 EX(st %g3, [%o0], and %o1, 7) 114 * Critical loop. Start at a new cache !! 151 add %o0, 4, %o0 115 * 64 bytes per line, this ensures the << 116 */ << 117 .p2align L1_CACHE_SHIFT << 118 .Lnot_short: << 119 sub dst, dst, #16/* Pre-bias. */ << 120 sub count, count, #64 << 121 1: 152 1: 122 stp A_l, A_l, [dst, #16] !! 153 be 1f 123 stp A_l, A_l, [dst, #32] !! 154 andcc %o1, 1, %g0 124 stp A_l, A_l, [dst, #48] << 125 stp A_l, A_l, [dst, #64]! << 126 subs count, count, #64 << 127 b.ge 1b << 128 tst count, #0x3f << 129 add dst, dst, #16 << 130 b.ne .Ltail63 << 131 .Lexitfunc: << 132 ret << 133 155 134 /* !! 156 EX(sth %g3, [%o0], and %o1, 3) 135 * For zeroing memory, check to see if !! 157 add %o0, 2, %o0 136 * zero entire 'cache' lines. << 137 */ << 138 .Lzero_mem: << 139 cmp count, #63 << 140 b.le .Ltail63 << 141 /* << 142 * For zeroing small amounts of memory, << 143 * the line-clear code. << 144 */ << 145 cmp count, #128 << 146 b.lt .Lnot_short /*count is at leas << 147 << 148 mrs tmp1, dczid_el0 << 149 tbnz tmp1, #4, .Lnot_short << 150 mov tmp3w, #4 << 151 and zva_len, tmp1w, #15 /* Saf << 152 lsl zva_len, tmp3w, zva_len << 153 << 154 ands tmp3w, zva_len, #63 << 155 /* << 156 * ensure the zva_len is not less than << 157 * It is not meaningful to use ZVA if t << 158 */ << 159 b.ne .Lnot_short << 160 .Lzero_by_line: << 161 /* << 162 * Compute how far we need to go to bec << 163 * already at quad-word alignment. << 164 */ << 165 cmp count, zva_len_x << 166 b.lt .Lnot_short /* Not << 167 sub zva_bits_x, zva_len_x, #1 << 168 neg tmp2, dst << 169 ands tmp2, tmp2, zva_bits_x << 170 b.eq 2f /* Alr << 171 /* Not aligned, check that there's eno << 172 sub tmp1, count, tmp2 << 173 /* << 174 * grantee the remain length to be ZVA << 175 * avoid to make the 2f's process over << 176 cmp tmp1, #64 << 177 ccmp tmp1, zva_len_x, #8, ge /* NZC << 178 b.lt .Lnot_short << 179 /* << 180 * We know that there's at least 64 byt << 181 * to overrun by 64 bytes. << 182 */ << 183 mov count, tmp1 << 184 1: 158 1: 185 stp A_l, A_l, [dst] !! 159 bne,a 8f 186 stp A_l, A_l, [dst, #16] !! 160 EX(stb %g3, [%o0], and %o1, 1) 187 stp A_l, A_l, [dst, #32] !! 161 8: 188 subs tmp2, tmp2, #64 !! 162 b 0f 189 stp A_l, A_l, [dst, #48] !! 163 nop 190 add dst, dst, #64 !! 164 7: 191 b.ge 1b !! 165 be 13b 192 /* We've overrun a bit, so adjust dst !! 166 orcc %o1, 0, %g0 193 add dst, dst, tmp2 !! 167 194 2: !! 168 be 0f 195 sub count, count, zva_len_x !! 169 8: 196 3: !! 170 add %o0, 1, %o0 197 dc zva, dst !! 171 subcc %o1, 1, %o1 198 add dst, dst, zva_len_x !! 172 bne 8b 199 subs count, count, zva_len_x !! 173 EX(stb %g3, [%o0 - 1], add %o1, 1) 200 b.ge 3b !! 174 0: 201 ands count, count, zva_bits_x !! 175 andcc %g4, 1, %g0 202 b.ne .Ltail_maybe_long !! 176 be 5f >> 177 nop >> 178 retl >> 179 mov %g1, %o0 >> 180 5: >> 181 retl >> 182 clr %o0 >> 183 __memset_end: >> 184 >> 185 .section .fixup,#alloc,#execinstr >> 186 .align 4 >> 187 20: >> 188 cmp %g2, 8 >> 189 bleu 1f >> 190 and %o1, 0x7f, %o1 >> 191 sub %g2, 9, %g2 >> 192 add %o3, 64, %o3 >> 193 1: >> 194 sll %g2, 3, %g2 >> 195 add %o3, %o1, %o0 >> 196 b 30f >> 197 sub %o0, %g2, %o0 >> 198 21: >> 199 mov 8, %o0 >> 200 and %o1, 7, %o1 >> 201 sub %o0, %g2, %o0 >> 202 sll %o0, 3, %o0 >> 203 b 30f >> 204 add %o0, %o1, %o0 >> 205 30: >> 206 /* %o4 is faulting address, %o5 is %pc where fault occurred */ >> 207 save %sp, -104, %sp >> 208 mov %i5, %o0 >> 209 mov %i7, %o1 >> 210 call lookup_fault >> 211 mov %i4, %o2 203 ret 212 ret 204 SYM_FUNC_END(__pi_memset) !! 213 restore 205 << 206 SYM_FUNC_ALIAS(__memset, __pi_memset) << 207 EXPORT_SYMBOL(__memset) << 208 214 209 SYM_FUNC_ALIAS_WEAK(memset, __pi_memset) !! 215 .globl __bzero_end 210 EXPORT_SYMBOL(memset) !! 216 __bzero_end:
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.