>> 1 /* SPDX-License-Identifier: GPL-2.0 */ 1 /* 2 /* 2 * arch/xtensa/lib/memset.S !! 3 * linux/arch/alpha/lib/memset.S 3 * 4 * 4 * ANSI C standard library function memset !! 5 * This is an efficient (and small) implementation of the C library "memset()" 5 * (Well, almost. .fixup code might return z !! 6 * function for the alpha. 6 * 7 * 7 * This file is subject to the terms and cond !! 8 * (C) Copyright 1996 Linus Torvalds 8 * Public License. See the file "COPYING" in << 9 * this archive for more details. << 10 * 9 * 11 * Copyright (C) 2002 Tensilica Inc. !! 10 * This routine is "moral-ware": you are free to use it any way you wish, and 12 */ !! 11 * the only obligation I put on you is a moral one: if you make any improvements 13 !! 12 * to the routine, please send me your improvements for me to use similarly. 14 #include <linux/linkage.h> << 15 #include <asm/asmmacro.h> << 16 #include <asm/core.h> << 17 << 18 /* << 19 * void *memset(void *dst, int c, size_t lengt << 20 * 13 * 21 * The algorithm is as follows: !! 14 * The scheduling comments are according to the EV5 documentation (and done by 22 * Create a word with c in all byte position !! 15 * hand, so they might well be incorrect, please do tell me about it..) 23 * If the destination is aligned, << 24 * do 16B chucks with a loop, and then fin << 25 * 8B, 4B, 2B, and 1B stores conditional o << 26 * If destination is unaligned, align it by << 27 * setting 1B and 2B and then go to aligne << 28 * This code tries to use fall-through branc << 29 * case of an aligned destination (except << 30 * the alignment labels). << 31 */ 16 */ 32 !! 17 #include <linux/export.h> >> 18 .set noat >> 19 .set noreorder 33 .text 20 .text 34 ENTRY(__memset) !! 21 .globl memset 35 WEAK(memset) !! 22 .globl __memset 36 !! 23 .globl ___memset 37 abi_entry_default !! 24 .globl __memset16 38 # a2/ dst, a3/ c, a4/ length !! 25 .globl __constant_c_memset 39 extui a3, a3, 0, 8 # mask to just !! 26 40 slli a7, a3, 8 # duplicate ch !! 27 .ent ___memset 41 or a3, a3, a7 # ... !! 28 .align 5 42 slli a7, a3, 16 # ... !! 29 ___memset: 43 or a3, a3, a7 # ... !! 30 .frame $30,0,$26,0 44 mov a5, a2 # copy dst so !! 31 .prologue 0 45 movi a6, 3 # for alignmen !! 32 46 bany a2, a6, .Ldstunaligned # if ds !! 33 and $17,255,$1 /* E1 */ 47 .L0: # return here from .Ldstunaligned when !! 34 insbl $17,1,$17 /* .. E0 */ 48 srli a7, a4, 4 # number of lo !! 35 bis $17,$1,$17 /* E0 (p-c latency, next cycle) */ 49 # per iteratio !! 36 sll $17,16,$1 /* E1 (p-c latency, next cycle) */ 50 bnez a4, .Laligned !! 37 51 abi_ret_default !! 38 bis $17,$1,$17 /* E0 (p-c latency, next cycle) */ 52 !! 39 sll $17,32,$1 /* E1 (p-c latency, next cycle) */ 53 /* !! 40 bis $17,$1,$17 /* E0 (p-c latency, next cycle) */ 54 * Destination is word-aligned. !! 41 ldq_u $31,0($30) /* .. E1 */ 55 */ !! 42 56 # set 16 bytes per iteration for word- !! 43 .align 5 57 .align 4 # 1 mod 4 alig !! 44 __constant_c_memset: 58 .byte 0 # (0 mod 4 ali !! 45 addq $18,$16,$6 /* E0 */ 59 .Laligned: !! 46 bis $16,$16,$0 /* .. E1 */ 60 #if XCHAL_HAVE_LOOPS !! 47 xor $16,$6,$1 /* E0 */ 61 loopnez a7, .Loop1done !! 48 ble $18,end /* .. E1 */ 62 #else /* !XCHAL_HAVE_LOOPS */ !! 49 63 beqz a7, .Loop1done !! 50 bic $1,7,$1 /* E0 */ 64 slli a6, a7, 4 !! 51 beq $1,within_one_quad /* .. E1 (note EV5 zero-latency forwarding) */ 65 add a6, a6, a5 # a6 = end of !! 52 and $16,7,$3 /* E0 */ 66 #endif /* !XCHAL_HAVE_LOOPS */ !! 53 beq $3,aligned /* .. E1 (note EV5 zero-latency forwarding) */ 67 .Loop1: !! 54 68 EX(10f) s32i a3, a5, 0 !! 55 ldq_u $4,0($16) /* E0 */ 69 EX(10f) s32i a3, a5, 4 !! 56 bis $16,$16,$5 /* .. E1 */ 70 EX(10f) s32i a3, a5, 8 !! 57 insql $17,$16,$2 /* E0 */ 71 EX(10f) s32i a3, a5, 12 !! 58 subq $3,8,$3 /* .. E1 */ 72 addi a5, a5, 16 !! 59 73 #if !XCHAL_HAVE_LOOPS !! 60 addq $18,$3,$18 /* E0 $18 is new count ($3 is negative) */ 74 blt a5, a6, .Loop1 !! 61 mskql $4,$16,$4 /* .. E1 (and possible load stall) */ 75 #endif /* !XCHAL_HAVE_LOOPS */ !! 62 subq $16,$3,$16 /* E0 $16 is new aligned destination */ 76 .Loop1done: !! 63 bis $2,$4,$1 /* .. E1 */ 77 bbci.l a4, 3, .L2 !! 64 78 # set 8 bytes !! 65 bis $31,$31,$31 /* E0 */ 79 EX(10f) s32i a3, a5, 0 !! 66 ldq_u $31,0($30) /* .. E1 */ 80 EX(10f) s32i a3, a5, 4 !! 67 stq_u $1,0($5) /* E0 */ 81 addi a5, a5, 8 !! 68 bis $31,$31,$31 /* .. E1 */ 82 .L2: !! 69 83 bbci.l a4, 2, .L3 !! 70 .align 4 84 # set 4 bytes !! 71 aligned: 85 EX(10f) s32i a3, a5, 0 !! 72 sra $18,3,$3 /* E0 */ 86 addi a5, a5, 4 !! 73 and $18,7,$18 /* .. E1 */ 87 .L3: !! 74 bis $16,$16,$5 /* E0 */ 88 bbci.l a4, 1, .L4 !! 75 beq $3,no_quad /* .. E1 */ 89 # set 2 bytes !! 76 90 EX(10f) s16i a3, a5, 0 !! 77 .align 3 91 addi a5, a5, 2 !! 78 loop: 92 .L4: !! 79 stq $17,0($5) /* E0 */ 93 bbci.l a4, 0, .L5 !! 80 subq $3,1,$3 /* .. E1 */ 94 # set 1 byte !! 81 addq $5,8,$5 /* E0 */ 95 EX(10f) s8i a3, a5, 0 !! 82 bne $3,loop /* .. E1 */ 96 .L5: !! 83 97 .Lret1: !! 84 no_quad: 98 abi_ret_default !! 85 bis $31,$31,$31 /* E0 */ 99 !! 86 beq $18,end /* .. E1 */ 100 /* !! 87 ldq $7,0($5) /* E0 */ 101 * Destination is unaligned !! 88 mskqh $7,$6,$2 /* .. E1 (and load stall) */ 102 */ !! 89 103 !! 90 insqh $17,$6,$4 /* E0 */ 104 .Ldstunaligned: !! 91 bis $2,$4,$1 /* .. E1 */ 105 bltui a4, 8, .Lbyteset # do s !! 92 stq $1,0($5) /* E0 */ 106 bbci.l a5, 0, .L20 # bran !! 93 ret $31,($26),1 /* .. E1 */ 107 # dst is only byte aligned !! 94 108 # set 1 byte !! 95 .align 3 109 EX(10f) s8i a3, a5, 0 !! 96 within_one_quad: 110 addi a5, a5, 1 !! 97 ldq_u $1,0($16) /* E0 */ 111 addi a4, a4, -1 !! 98 insql $17,$16,$2 /* E1 */ 112 # now retest if dst aligned !! 99 mskql $1,$16,$4 /* E0 (after load stall) */ 113 bbci.l a5, 1, .L0 # if now align !! 100 bis $2,$4,$2 /* E0 */ 114 .L20: !! 101 115 # dst half-aligned !! 102 mskql $2,$6,$4 /* E0 */ 116 # set 2 bytes !! 103 mskqh $1,$6,$2 /* .. E1 */ 117 EX(10f) s16i a3, a5, 0 !! 104 bis $2,$4,$1 /* E0 */ 118 addi a5, a5, 2 !! 105 stq_u $1,0($16) /* E0 */ 119 addi a4, a4, -2 !! 106 120 j .L0 # dst is now a !! 107 end: 121 !! 108 ret $31,($26),1 /* E1 */ 122 /* !! 109 .end ___memset 123 * Byte by byte set !! 110 EXPORT_SYMBOL(___memset) 124 */ !! 111 EXPORT_SYMBOL(__constant_c_memset) 125 .align 4 !! 112 126 .byte 0 # 1 mod 4 alig !! 113 .align 5 127 # (0 mod 4 ali !! 114 .ent __memset16 128 .Lbyteset: !! 115 __memset16: 129 #if XCHAL_HAVE_LOOPS !! 116 .prologue 0 130 loopnez a4, .Lbytesetdone !! 117 131 #else /* !XCHAL_HAVE_LOOPS */ !! 118 inswl $17,0,$1 /* E0 */ 132 beqz a4, .Lbytesetdone !! 119 inswl $17,2,$2 /* E0 */ 133 add a6, a5, a4 # a6 = ending !! 120 inswl $17,4,$3 /* E0 */ 134 #endif /* !XCHAL_HAVE_LOOPS */ !! 121 or $1,$2,$1 /* .. E1 */ 135 .Lbyteloop: !! 122 inswl $17,6,$4 /* E0 */ 136 EX(10f) s8i a3, a5, 0 !! 123 or $1,$3,$1 /* .. E1 */ 137 addi a5, a5, 1 !! 124 or $1,$4,$17 /* E0 */ 138 #if !XCHAL_HAVE_LOOPS !! 125 br __constant_c_memset /* .. E1 */ 139 blt a5, a6, .Lbyteloop !! 126 140 #endif /* !XCHAL_HAVE_LOOPS */ !! 127 .end __memset16 141 .Lbytesetdone: !! 128 EXPORT_SYMBOL(__memset16) 142 abi_ret_default !! 129 143 !! 130 memset = ___memset 144 ENDPROC(__memset) !! 131 __memset = ___memset 145 EXPORT_SYMBOL(__memset) !! 132 EXPORT_SYMBOL(memset) 146 EXPORT_SYMBOL(memset) !! 133 EXPORT_SYMBOL(__memset) 147 << 148 .section .fixup, "ax" << 149 .align 4 << 150 << 151 /* We return zero if a failure occurred. */ << 152 << 153 10: << 154 movi a2, 0 << 155 abi_ret_default <<
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.