1 /* SPDX-License-Identifier: GPL-2.0-only */ !! 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* !! 2 /* linux/arch/sparc/lib/memset.S: Sparc optimized memset, bzero and clear_user code 3 * Copyright (C) 2013 Regents of the Universit !! 3 * Copyright (C) 1991,1996 Free Software Foundation >> 4 * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) >> 5 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) >> 6 * >> 7 * Calls to memset returns initial %o0. Calls to bzero returns 0, if ok, and >> 8 * number of bytes not yet set if exception occurs and we were called as >> 9 * clear_user. 4 */ 10 */ 5 11 >> 12 #include <linux/export.h> >> 13 #include <asm/ptrace.h> 6 14 7 #include <linux/linkage.h> !! 15 /* Work around cpp -rob */ 8 #include <asm/asm.h> !! 16 #define ALLOC #alloc 9 !! 17 #define EXECINSTR #execinstr 10 /* void *memset(void *, int, size_t) */ !! 18 #define EX(x,y,a,b) \ 11 SYM_FUNC_START(__memset) !! 19 98: x,y; \ 12 move t0, a0 /* Preserve return value !! 20 .section .fixup,ALLOC,EXECINSTR; \ 13 !! 21 .align 4; \ 14 /* Defer to byte-oriented fill for sma !! 22 99: retl; \ 15 sltiu a3, a2, 16 !! 23 a, b, %o0; \ 16 bnez a3, 4f !! 24 .section __ex_table,ALLOC; \ 17 !! 25 .align 4; \ 18 /* !! 26 .word 98b, 99b; \ 19 * Round to nearest XLEN-aligned addre !! 27 .text; \ 20 * greater than or equal to start addr !! 28 .align 4 21 */ !! 29 22 addi a3, t0, SZREG-1 !! 30 #define STORE(source, base, offset, n) \ 23 andi a3, a3, ~(SZREG-1) !! 31 98: std source, [base + offset + n]; \ 24 beq a3, t0, 2f /* Skip if already ali !! 32 .section .fixup,ALLOC,EXECINSTR; \ 25 /* Handle initial misalignment */ !! 33 .align 4; \ 26 sub a4, a3, t0 !! 34 99: ba 30f; \ 27 1: !! 35 sub %o3, n - offset, %o3; \ 28 sb a1, 0(t0) !! 36 .section __ex_table,ALLOC; \ 29 addi t0, t0, 1 !! 37 .align 4; \ 30 bltu t0, a3, 1b !! 38 .word 98b, 99b; \ 31 sub a2, a2, a4 /* Update count */ !! 39 .text; \ 32 !! 40 .align 4; 33 2: /* Duff's device with 32 XLEN stores per it !! 41 34 /* Broadcast value into all bytes */ !! 42 #define STORE_LAST(source, base, offset, n) \ 35 andi a1, a1, 0xff !! 43 EX(std source, [base - offset - n], \ 36 slli a3, a1, 8 !! 44 add %o1, offset + n); 37 or a1, a3, a1 !! 45 38 slli a3, a1, 16 !! 46 /* Please don't change these macros, unless you change the logic 39 or a1, a3, a1 !! 47 * in the .fixup section below as well. 40 #ifdef CONFIG_64BIT !! 48 * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */ 41 slli a3, a1, 32 !! 49 #define ZERO_BIG_BLOCK(base, offset, source) \ 42 or a1, a3, a1 !! 50 STORE(source, base, offset, 0x00); \ 43 #endif !! 51 STORE(source, base, offset, 0x08); \ 44 !! 52 STORE(source, base, offset, 0x10); \ 45 /* Calculate end address */ !! 53 STORE(source, base, offset, 0x18); \ 46 andi a4, a2, ~(SZREG-1) !! 54 STORE(source, base, offset, 0x20); \ 47 add a3, t0, a4 !! 55 STORE(source, base, offset, 0x28); \ 48 !! 56 STORE(source, base, offset, 0x30); \ 49 andi a4, a4, 31*SZREG /* Calculate re !! 57 STORE(source, base, offset, 0x38); 50 beqz a4, 3f /* Shortcut if !! 58 51 neg a4, a4 !! 59 #define ZERO_LAST_BLOCKS(base, offset, source) \ 52 addi a4, a4, 32*SZREG /* Calculate in !! 60 STORE_LAST(source, base, offset, 0x38); \ 53 !! 61 STORE_LAST(source, base, offset, 0x30); \ 54 /* Adjust start address with offset */ !! 62 STORE_LAST(source, base, offset, 0x28); \ 55 sub t0, t0, a4 !! 63 STORE_LAST(source, base, offset, 0x20); \ 56 !! 64 STORE_LAST(source, base, offset, 0x18); \ 57 /* Jump into loop body */ !! 65 STORE_LAST(source, base, offset, 0x10); \ 58 /* Assumes 32-bit instruction lengths !! 66 STORE_LAST(source, base, offset, 0x08); \ 59 la a5, 3f !! 67 STORE_LAST(source, base, offset, 0x00); 60 #ifdef CONFIG_64BIT !! 68 61 srli a4, a4, 1 !! 69 .text 62 #endif !! 70 .align 4 63 add a5, a5, a4 !! 71 64 jr a5 !! 72 .globl __bzero_begin >> 73 __bzero_begin: >> 74 >> 75 .globl __bzero >> 76 .type __bzero,#function >> 77 .globl memset >> 78 EXPORT_SYMBOL(__bzero) >> 79 EXPORT_SYMBOL(memset) >> 80 memset: >> 81 mov %o0, %g1 >> 82 mov 1, %g4 >> 83 and %o1, 0xff, %g3 >> 84 sll %g3, 8, %g2 >> 85 or %g3, %g2, %g3 >> 86 sll %g3, 16, %g2 >> 87 or %g3, %g2, %g3 >> 88 b 1f >> 89 mov %o2, %o1 65 3: 90 3: 66 REG_S a1, 0(t0) !! 91 cmp %o2, 3 67 REG_S a1, SZREG(t0) !! 92 be 2f 68 REG_S a1, 2*SZREG(t0) !! 93 EX(stb %g3, [%o0], sub %o1, 0) 69 REG_S a1, 3*SZREG(t0) !! 94 70 REG_S a1, 4*SZREG(t0) !! 95 cmp %o2, 2 71 REG_S a1, 5*SZREG(t0) !! 96 be 2f 72 REG_S a1, 6*SZREG(t0) !! 97 EX(stb %g3, [%o0 + 0x01], sub %o1, 1) 73 REG_S a1, 7*SZREG(t0) !! 98 74 REG_S a1, 8*SZREG(t0) !! 99 EX(stb %g3, [%o0 + 0x02], sub %o1, 2) 75 REG_S a1, 9*SZREG(t0) !! 100 2: 76 REG_S a1, 10*SZREG(t0) !! 101 sub %o2, 4, %o2 77 REG_S a1, 11*SZREG(t0) !! 102 add %o1, %o2, %o1 78 REG_S a1, 12*SZREG(t0) !! 103 b 4f 79 REG_S a1, 13*SZREG(t0) !! 104 sub %o0, %o2, %o0 80 REG_S a1, 14*SZREG(t0) !! 105 81 REG_S a1, 15*SZREG(t0) !! 106 __bzero: 82 REG_S a1, 16*SZREG(t0) !! 107 clr %g4 83 REG_S a1, 17*SZREG(t0) !! 108 mov %g0, %g3 84 REG_S a1, 18*SZREG(t0) !! 109 1: 85 REG_S a1, 19*SZREG(t0) !! 110 cmp %o1, 7 86 REG_S a1, 20*SZREG(t0) !! 111 bleu 7f 87 REG_S a1, 21*SZREG(t0) !! 112 andcc %o0, 3, %o2 88 REG_S a1, 22*SZREG(t0) << 89 REG_S a1, 23*SZREG(t0) << 90 REG_S a1, 24*SZREG(t0) << 91 REG_S a1, 25*SZREG(t0) << 92 REG_S a1, 26*SZREG(t0) << 93 REG_S a1, 27*SZREG(t0) << 94 REG_S a1, 28*SZREG(t0) << 95 REG_S a1, 29*SZREG(t0) << 96 REG_S a1, 30*SZREG(t0) << 97 REG_S a1, 31*SZREG(t0) << 98 addi t0, t0, 32*SZREG << 99 bltu t0, a3, 3b << 100 andi a2, a2, SZREG-1 /* Update count << 101 113 >> 114 bne 3b 102 4: 115 4: 103 /* Handle trailing misalignment */ !! 116 andcc %o0, 4, %g0 104 beqz a2, 6f !! 117 105 add a3, t0, a2 !! 118 be 2f >> 119 mov %g3, %g2 >> 120 >> 121 EX(st %g3, [%o0], sub %o1, 0) >> 122 sub %o1, 4, %o1 >> 123 add %o0, 4, %o0 >> 124 2: >> 125 andcc %o1, 0xffffff80, %o3 ! Now everything is 8 aligned and o1 is len to run >> 126 be 9f >> 127 andcc %o1, 0x78, %o2 >> 128 10: >> 129 ZERO_BIG_BLOCK(%o0, 0x00, %g2) >> 130 subcc %o3, 128, %o3 >> 131 ZERO_BIG_BLOCK(%o0, 0x40, %g2) >> 132 bne 10b >> 133 add %o0, 128, %o0 >> 134 >> 135 orcc %o2, %g0, %g0 >> 136 9: >> 137 be 13f >> 138 andcc %o1, 7, %o1 >> 139 >> 140 srl %o2, 1, %o3 >> 141 set 13f, %o4 >> 142 sub %o4, %o3, %o4 >> 143 jmp %o4 >> 144 add %o0, %o2, %o0 >> 145 >> 146 ZERO_LAST_BLOCKS(%o0, 0x48, %g2) >> 147 ZERO_LAST_BLOCKS(%o0, 0x08, %g2) >> 148 13: >> 149 be 8f >> 150 andcc %o1, 4, %g0 >> 151 >> 152 be 1f >> 153 andcc %o1, 2, %g0 >> 154 >> 155 EX(st %g3, [%o0], and %o1, 7) >> 156 add %o0, 4, %o0 >> 157 1: >> 158 be 1f >> 159 andcc %o1, 1, %g0 >> 160 >> 161 EX(sth %g3, [%o0], and %o1, 3) >> 162 add %o0, 2, %o0 >> 163 1: >> 164 bne,a 8f >> 165 EX(stb %g3, [%o0], and %o1, 1) >> 166 8: >> 167 b 0f >> 168 nop >> 169 7: >> 170 be 13b >> 171 orcc %o1, 0, %g0 >> 172 >> 173 be 0f >> 174 8: >> 175 add %o0, 1, %o0 >> 176 subcc %o1, 1, %o1 >> 177 bne 8b >> 178 EX(stb %g3, [%o0 - 1], add %o1, 1) >> 179 0: >> 180 andcc %g4, 1, %g0 >> 181 be 5f >> 182 nop >> 183 retl >> 184 mov %g1, %o0 106 5: 185 5: 107 sb a1, 0(t0) !! 186 retl 108 addi t0, t0, 1 !! 187 clr %o0 109 bltu t0, a3, 5b !! 188 110 6: !! 189 .section .fixup,#alloc,#execinstr 111 ret !! 190 .align 4 112 SYM_FUNC_END(__memset) !! 191 30: 113 SYM_FUNC_ALIAS_WEAK(memset, __memset) !! 192 and %o1, 0x7f, %o1 114 SYM_FUNC_ALIAS(__pi_memset, __memset) !! 193 retl 115 SYM_FUNC_ALIAS(__pi___memset, __memset) !! 194 add %o3, %o1, %o0 >> 195 >> 196 .globl __bzero_end >> 197 __bzero_end:
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.