1 /* SPDX-License-Identifier: GPL-2.0-only */ !! 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* !! 2 /* linux/arch/sparc/lib/memset.S: Sparc optimized memset, bzero and clear_user code 3 * Copyright (C) 2013 ARM Ltd. !! 3 * Copyright (C) 1991,1996 Free Software Foundation 4 * Copyright (C) 2013 Linaro. !! 4 * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) >> 5 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 5 * 6 * 6 * This code is based on glibc cortex strings !! 7 * Calls to memset returns initial %o0. Calls to bzero returns 0, if ok, and 7 * be found @ !! 8 * number of bytes not yet set if exception occurs and we were called as 8 * !! 9 * clear_user. 9 * http://bazaar.launchpad.net/~linaro-toolcha << 10 * files/head:/src/aarch64/ << 11 */ 10 */ 12 11 13 #include <linux/linkage.h> !! 12 #include <linux/export.h> 14 #include <asm/assembler.h> !! 13 #include <asm/ptrace.h> 15 #include <asm/cache.h> << 16 14 17 /* !! 15 /* Work around cpp -rob */ 18 * Fill in the buffer with character c (alignm !! 16 #define ALLOC #alloc 19 * !! 17 #define EXECINSTR #execinstr 20 * Parameters: !! 18 #define EX(x,y,a,b) \ 21 * x0 - buf !! 19 98: x,y; \ 22 * x1 - c !! 20 .section .fixup,ALLOC,EXECINSTR; \ 23 * x2 - n !! 21 .align 4; \ 24 * Returns: !! 22 99: retl; \ 25 * x0 - buf !! 23 a, b, %o0; \ 26 */ !! 24 .section __ex_table,ALLOC; \ >> 25 .align 4; \ >> 26 .word 98b, 99b; \ >> 27 .text; \ >> 28 .align 4 >> 29 >> 30 #define STORE(source, base, offset, n) \ >> 31 98: std source, [base + offset + n]; \ >> 32 .section .fixup,ALLOC,EXECINSTR; \ >> 33 .align 4; \ >> 34 99: ba 30f; \ >> 35 sub %o3, n - offset, %o3; \ >> 36 .section __ex_table,ALLOC; \ >> 37 .align 4; \ >> 38 .word 98b, 99b; \ >> 39 .text; \ >> 40 .align 4; >> 41 >> 42 #define STORE_LAST(source, base, offset, n) \ >> 43 EX(std source, [base - offset - n], \ >> 44 add %o1, offset + n); >> 45 >> 46 /* Please don't change these macros, unless you change the logic >> 47 * in the .fixup section below as well. >> 48 * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */ >> 49 #define ZERO_BIG_BLOCK(base, offset, source) \ >> 50 STORE(source, base, offset, 0x00); \ >> 51 STORE(source, base, offset, 0x08); \ >> 52 STORE(source, base, offset, 0x10); \ >> 53 STORE(source, base, offset, 0x18); \ >> 54 STORE(source, base, offset, 0x20); \ >> 55 STORE(source, base, offset, 0x28); \ >> 56 STORE(source, base, offset, 0x30); \ >> 57 STORE(source, base, offset, 0x38); >> 58 >> 59 #define ZERO_LAST_BLOCKS(base, offset, source) \ >> 60 STORE_LAST(source, base, offset, 0x38); \ >> 61 STORE_LAST(source, base, offset, 0x30); \ >> 62 STORE_LAST(source, base, offset, 0x28); \ >> 63 STORE_LAST(source, base, offset, 0x20); \ >> 64 STORE_LAST(source, base, offset, 0x18); \ >> 65 STORE_LAST(source, base, offset, 0x10); \ >> 66 STORE_LAST(source, base, offset, 0x08); \ >> 67 STORE_LAST(source, base, offset, 0x00); >> 68 >> 69 .text >> 70 .align 4 >> 71 >> 72 .globl __bzero_begin >> 73 __bzero_begin: >> 74 >> 75 .globl __bzero >> 76 .type __bzero,#function >> 77 .globl memset >> 78 EXPORT_SYMBOL(__bzero) >> 79 EXPORT_SYMBOL(memset) >> 80 memset: >> 81 mov %o0, %g1 >> 82 mov 1, %g4 >> 83 and %o1, 0xff, %g3 >> 84 sll %g3, 8, %g2 >> 85 or %g3, %g2, %g3 >> 86 sll %g3, 16, %g2 >> 87 or %g3, %g2, %g3 >> 88 b 1f >> 89 mov %o2, %o1 >> 90 3: >> 91 cmp %o2, 3 >> 92 be 2f >> 93 EX(stb %g3, [%o0], sub %o1, 0) >> 94 >> 95 cmp %o2, 2 >> 96 be 2f >> 97 EX(stb %g3, [%o0 + 0x01], sub %o1, 1) 27 98 28 dstin .req x0 !! 99 EX(stb %g3, [%o0 + 0x02], sub %o1, 2) 29 val .req w1 << 30 count .req x2 << 31 tmp1 .req x3 << 32 tmp1w .req w3 << 33 tmp2 .req x4 << 34 tmp2w .req w4 << 35 zva_len_x .req x5 << 36 zva_len .req w5 << 37 zva_bits_x .req x6 << 38 << 39 A_l .req x7 << 40 A_lw .req w7 << 41 dst .req x8 << 42 tmp3w .req w9 << 43 tmp3 .req x9 << 44 << 45 SYM_FUNC_START(__pi_memset) << 46 mov dst, dstin /* Preserve re << 47 and A_lw, val, #255 << 48 orr A_lw, A_lw, A_lw, lsl #8 << 49 orr A_lw, A_lw, A_lw, lsl #16 << 50 orr A_l, A_l, A_l, lsl #32 << 51 << 52 cmp count, #15 << 53 b.hi .Lover16_proc << 54 /*All store maybe are non-aligned..*/ << 55 tbz count, #3, 1f << 56 str A_l, [dst], #8 << 57 1: << 58 tbz count, #2, 2f << 59 str A_lw, [dst], #4 << 60 2: 100 2: 61 tbz count, #1, 3f !! 101 sub %o2, 4, %o2 62 strh A_lw, [dst], #2 !! 102 add %o1, %o2, %o1 63 3: !! 103 b 4f 64 tbz count, #0, 4f !! 104 sub %o0, %o2, %o0 65 strb A_lw, [dst] !! 105 >> 106 __bzero: >> 107 clr %g4 >> 108 mov %g0, %g3 >> 109 1: >> 110 cmp %o1, 7 >> 111 bleu 7f >> 112 andcc %o0, 3, %o2 >> 113 >> 114 bne 3b 66 4: 115 4: 67 ret !! 116 andcc %o0, 4, %g0 68 117 69 .Lover16_proc: !! 118 be 2f 70 /*Whether the start address is aligne !! 119 mov %g3, %g2 71 neg tmp2, dst !! 120 72 ands tmp2, tmp2, #15 !! 121 EX(st %g3, [%o0], sub %o1, 0) 73 b.eq .Laligned !! 122 sub %o1, 4, %o1 74 /* !! 123 add %o0, 4, %o0 75 * The count is not less than 16, we can use st << 76 * then adjust the dst aligned with 16.This pro << 77 * memory address at alignment boundary. << 78 */ << 79 stp A_l, A_l, [dst] /*non-aligned << 80 /*make the dst aligned..*/ << 81 sub count, count, tmp2 << 82 add dst, dst, tmp2 << 83 << 84 .Laligned: << 85 cbz A_l, .Lzero_mem << 86 << 87 .Ltail_maybe_long: << 88 cmp count, #64 << 89 b.ge .Lnot_short << 90 .Ltail63: << 91 ands tmp1, count, #0x30 << 92 b.eq 3f << 93 cmp tmp1w, #0x20 << 94 b.eq 1f << 95 b.lt 2f << 96 stp A_l, A_l, [dst], #16 << 97 1: << 98 stp A_l, A_l, [dst], #16 << 99 2: 124 2: 100 stp A_l, A_l, [dst], #16 !! 125 andcc %o1, 0xffffff80, %o3 ! Now everything is 8 aligned and o1 is len to run 101 /* !! 126 be 9f 102 * The last store length is less than 16,use st !! 127 andcc %o1, 0x78, %o2 103 * It will lead some bytes written twice and th !! 128 10: 104 */ !! 129 ZERO_BIG_BLOCK(%o0, 0x00, %g2) 105 3: !! 130 subcc %o3, 128, %o3 106 ands count, count, #15 !! 131 ZERO_BIG_BLOCK(%o0, 0x40, %g2) 107 cbz count, 4f !! 132 bne 10b 108 add dst, dst, count !! 133 add %o0, 128, %o0 109 stp A_l, A_l, [dst, #-16] /* Rep !! 134 110 4: !! 135 orcc %o2, %g0, %g0 111 ret !! 136 9: >> 137 be 13f >> 138 andcc %o1, 7, %o1 >> 139 >> 140 srl %o2, 1, %o3 >> 141 set 13f, %o4 >> 142 sub %o4, %o3, %o4 >> 143 jmp %o4 >> 144 add %o0, %o2, %o0 >> 145 >> 146 ZERO_LAST_BLOCKS(%o0, 0x48, %g2) >> 147 ZERO_LAST_BLOCKS(%o0, 0x08, %g2) >> 148 13: >> 149 be 8f >> 150 andcc %o1, 4, %g0 112 151 113 /* !! 152 be 1f 114 * Critical loop. Start at a new cache !! 153 andcc %o1, 2, %g0 115 * 64 bytes per line, this ensures the !! 154 116 */ !! 155 EX(st %g3, [%o0], and %o1, 7) 117 .p2align L1_CACHE_SHIFT !! 156 add %o0, 4, %o0 118 .Lnot_short: << 119 sub dst, dst, #16/* Pre-bias. */ << 120 sub count, count, #64 << 121 1: << 122 stp A_l, A_l, [dst, #16] << 123 stp A_l, A_l, [dst, #32] << 124 stp A_l, A_l, [dst, #48] << 125 stp A_l, A_l, [dst, #64]! << 126 subs count, count, #64 << 127 b.ge 1b << 128 tst count, #0x3f << 129 add dst, dst, #16 << 130 b.ne .Ltail63 << 131 .Lexitfunc: << 132 ret << 133 << 134 /* << 135 * For zeroing memory, check to see if << 136 * zero entire 'cache' lines. << 137 */ << 138 .Lzero_mem: << 139 cmp count, #63 << 140 b.le .Ltail63 << 141 /* << 142 * For zeroing small amounts of memory, << 143 * the line-clear code. << 144 */ << 145 cmp count, #128 << 146 b.lt .Lnot_short /*count is at leas << 147 << 148 mrs tmp1, dczid_el0 << 149 tbnz tmp1, #4, .Lnot_short << 150 mov tmp3w, #4 << 151 and zva_len, tmp1w, #15 /* Saf << 152 lsl zva_len, tmp3w, zva_len << 153 << 154 ands tmp3w, zva_len, #63 << 155 /* << 156 * ensure the zva_len is not less than << 157 * It is not meaningful to use ZVA if t << 158 */ << 159 b.ne .Lnot_short << 160 .Lzero_by_line: << 161 /* << 162 * Compute how far we need to go to bec << 163 * already at quad-word alignment. << 164 */ << 165 cmp count, zva_len_x << 166 b.lt .Lnot_short /* Not << 167 sub zva_bits_x, zva_len_x, #1 << 168 neg tmp2, dst << 169 ands tmp2, tmp2, zva_bits_x << 170 b.eq 2f /* Alr << 171 /* Not aligned, check that there's eno << 172 sub tmp1, count, tmp2 << 173 /* << 174 * grantee the remain length to be ZVA << 175 * avoid to make the 2f's process over << 176 cmp tmp1, #64 << 177 ccmp tmp1, zva_len_x, #8, ge /* NZC << 178 b.lt .Lnot_short << 179 /* << 180 * We know that there's at least 64 byt << 181 * to overrun by 64 bytes. << 182 */ << 183 mov count, tmp1 << 184 1: 157 1: 185 stp A_l, A_l, [dst] !! 158 be 1f 186 stp A_l, A_l, [dst, #16] !! 159 andcc %o1, 1, %g0 187 stp A_l, A_l, [dst, #32] << 188 subs tmp2, tmp2, #64 << 189 stp A_l, A_l, [dst, #48] << 190 add dst, dst, #64 << 191 b.ge 1b << 192 /* We've overrun a bit, so adjust dst << 193 add dst, dst, tmp2 << 194 2: << 195 sub count, count, zva_len_x << 196 3: << 197 dc zva, dst << 198 add dst, dst, zva_len_x << 199 subs count, count, zva_len_x << 200 b.ge 3b << 201 ands count, count, zva_bits_x << 202 b.ne .Ltail_maybe_long << 203 ret << 204 SYM_FUNC_END(__pi_memset) << 205 160 206 SYM_FUNC_ALIAS(__memset, __pi_memset) !! 161 EX(sth %g3, [%o0], and %o1, 3) 207 EXPORT_SYMBOL(__memset) !! 162 add %o0, 2, %o0 >> 163 1: >> 164 bne,a 8f >> 165 EX(stb %g3, [%o0], and %o1, 1) >> 166 8: >> 167 b 0f >> 168 nop >> 169 7: >> 170 be 13b >> 171 orcc %o1, 0, %g0 >> 172 >> 173 be 0f >> 174 8: >> 175 add %o0, 1, %o0 >> 176 subcc %o1, 1, %o1 >> 177 bne 8b >> 178 EX(stb %g3, [%o0 - 1], add %o1, 1) >> 179 0: >> 180 andcc %g4, 1, %g0 >> 181 be 5f >> 182 nop >> 183 retl >> 184 mov %g1, %o0 >> 185 5: >> 186 retl >> 187 clr %o0 >> 188 >> 189 .section .fixup,#alloc,#execinstr >> 190 .align 4 >> 191 30: >> 192 and %o1, 0x7f, %o1 >> 193 retl >> 194 add %o3, %o1, %o0 208 195 209 SYM_FUNC_ALIAS_WEAK(memset, __pi_memset) !! 196 .globl __bzero_end 210 EXPORT_SYMBOL(memset) !! 197 __bzero_end:
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.