1 /* SPDX-License-Identifier: GPL-2.0 */ 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 2 /* 3 * Copyright (C) 2020-2022 Loongson Technology !! 3 * arch/alpha/lib/memmove.S 4 */ << 5 << 6 #include <linux/export.h> << 7 #include <asm/alternative-asm.h> << 8 #include <asm/asm.h> << 9 #include <asm/asmmacro.h> << 10 #include <asm/cpu.h> << 11 #include <asm/regdef.h> << 12 << 13 .section .noinstr.text, "ax" << 14 << 15 SYM_FUNC_START(memmove) << 16 blt a0, a1, __memcpy /* dst << 17 blt a1, a0, __rmemcpy /* src << 18 jr ra /* dst << 19 SYM_FUNC_END(memmove) << 20 SYM_FUNC_ALIAS(__memmove, memmove) << 21 << 22 EXPORT_SYMBOL(memmove) << 23 EXPORT_SYMBOL(__memmove) << 24 << 25 _ASM_NOKPROBE(memmove) << 26 _ASM_NOKPROBE(__memmove) << 27 << 28 SYM_FUNC_START(__rmemcpy) << 29 /* << 30 * Some CPUs support hardware unaligne << 31 */ << 32 ALTERNATIVE "b __rmemcpy_generic", << 33 "b __rmemcpy_fast", CP << 34 SYM_FUNC_END(__rmemcpy) << 35 _ASM_NOKPROBE(__rmemcpy) << 36 << 37 /* << 38 * void *__rmemcpy_generic(void *dst, const vo << 39 * 4 * 40 * a0: dst !! 5 * Barely optimized memmove routine for Alpha EV5. 41 * a1: src << 42 * a2: n << 43 */ << 44 SYM_FUNC_START(__rmemcpy_generic) << 45 move a3, a0 << 46 beqz a2, 2f << 47 << 48 add.d a0, a0, a2 << 49 add.d a1, a1, a2 << 50 << 51 1: ld.b t0, a1, -1 << 52 st.b t0, a0, -1 << 53 addi.d a0, a0, -1 << 54 addi.d a1, a1, -1 << 55 addi.d a2, a2, -1 << 56 bgt a2, zero, 1b << 57 << 58 2: move a0, a3 << 59 jr ra << 60 SYM_FUNC_END(__rmemcpy_generic) << 61 _ASM_NOKPROBE(__rmemcpy_generic) << 62 << 63 /* << 64 * void *__rmemcpy_fast(void *dst, const void << 65 * 6 * 66 * a0: dst !! 7 * This is hand-massaged output from the original memcpy.c. We defer to 67 * a1: src !! 8 * memcpy whenever possible; the backwards copy loops are not unrolled. 68 * a2: n << 69 */ 9 */ 70 SYM_FUNC_START(__rmemcpy_fast) !! 10 #include <linux/export.h> 71 sltui t0, a2, 9 !! 11 .set noat 72 bnez t0, __memcpy_small !! 12 .set noreorder 73 !! 13 .text 74 add.d a3, a1, a2 !! 14 75 add.d a2, a0, a2 !! 15 .align 4 76 ld.d a6, a1, 0 !! 16 .globl memmove 77 ld.d a7, a3, -8 !! 17 .ent memmove 78 !! 18 memmove: 79 /* align up destination address */ !! 19 ldgp $29, 0($27) 80 andi t1, a2, 7 !! 20 unop 81 sub.d a3, a3, t1 !! 21 nop 82 sub.d a5, a2, t1 !! 22 .prologue 1 83 !! 23 84 addi.d a4, a1, 64 !! 24 addq $16,$18,$4 85 bgeu a4, a3, .Llt64 !! 25 addq $17,$18,$5 86 !! 26 cmpule $4,$17,$1 /* dest + n <= src */ 87 /* copy 64 bytes at a time */ !! 27 cmpule $5,$16,$2 /* dest >= src + n */ 88 .Lloop64: !! 28 89 ld.d t0, a3, -8 !! 29 bis $1,$2,$1 90 ld.d t1, a3, -16 !! 30 mov $16,$0 91 ld.d t2, a3, -24 !! 31 xor $16,$17,$2 92 ld.d t3, a3, -32 !! 32 bne $1,memcpy !samegp 93 ld.d t4, a3, -40 !! 33 94 ld.d t5, a3, -48 !! 34 and $2,7,$2 /* Test for src/dest co-alignment. */ 95 ld.d t6, a3, -56 !! 35 and $16,7,$1 96 ld.d t7, a3, -64 !! 36 cmpule $16,$17,$3 97 addi.d a3, a3, -64 !! 37 bne $3,$memmove_up /* dest < src */ 98 st.d t0, a5, -8 !! 38 99 st.d t1, a5, -16 !! 39 and $4,7,$1 100 st.d t2, a5, -24 !! 40 bne $2,$misaligned_dn 101 st.d t3, a5, -32 !! 41 unop 102 st.d t4, a5, -40 !! 42 beq $1,$skip_aligned_byte_loop_head_dn 103 st.d t5, a5, -48 !! 43 104 st.d t6, a5, -56 !! 44 $aligned_byte_loop_head_dn: 105 st.d t7, a5, -64 !! 45 lda $4,-1($4) 106 addi.d a5, a5, -64 !! 46 lda $5,-1($5) 107 bltu a4, a3, .Lloop64 !! 47 unop 108 !! 48 ble $18,$egress 109 /* copy the remaining bytes */ !! 49 110 .Llt64: !! 50 ldq_u $3,0($5) 111 addi.d a4, a1, 32 !! 51 ldq_u $2,0($4) 112 bgeu a4, a3, .Llt32 !! 52 lda $18,-1($18) 113 ld.d t0, a3, -8 !! 53 extbl $3,$5,$1 114 ld.d t1, a3, -16 !! 54 115 ld.d t2, a3, -24 !! 55 insbl $1,$4,$1 116 ld.d t3, a3, -32 !! 56 mskbl $2,$4,$2 117 addi.d a3, a3, -32 !! 57 bis $1,$2,$1 118 st.d t0, a5, -8 !! 58 and $4,7,$6 119 st.d t1, a5, -16 !! 59 120 st.d t2, a5, -24 !! 60 stq_u $1,0($4) 121 st.d t3, a5, -32 !! 61 bne $6,$aligned_byte_loop_head_dn 122 addi.d a5, a5, -32 !! 62 123 !! 63 $skip_aligned_byte_loop_head_dn: 124 .Llt32: !! 64 lda $18,-8($18) 125 addi.d a4, a1, 16 !! 65 blt $18,$skip_aligned_word_loop_dn 126 bgeu a4, a3, .Llt16 !! 66 127 ld.d t0, a3, -8 !! 67 $aligned_word_loop_dn: 128 ld.d t1, a3, -16 !! 68 ldq $1,-8($5) 129 addi.d a3, a3, -16 !! 69 nop 130 st.d t0, a5, -8 !! 70 lda $5,-8($5) 131 st.d t1, a5, -16 !! 71 lda $18,-8($18) 132 addi.d a5, a5, -16 !! 72 133 !! 73 stq $1,-8($4) 134 .Llt16: !! 74 nop 135 addi.d a4, a1, 8 !! 75 lda $4,-8($4) 136 bgeu a4, a3, .Llt8 !! 76 bge $18,$aligned_word_loop_dn 137 ld.d t0, a3, -8 !! 77 138 st.d t0, a5, -8 !! 78 $skip_aligned_word_loop_dn: 139 !! 79 lda $18,8($18) 140 .Llt8: !! 80 bgt $18,$byte_loop_tail_dn 141 st.d a6, a0, 0 !! 81 unop 142 st.d a7, a2, -8 !! 82 ret $31,($26),1 143 !! 83 144 /* return */ !! 84 .align 4 145 jr ra !! 85 $misaligned_dn: 146 SYM_FUNC_END(__rmemcpy_fast) !! 86 nop 147 _ASM_NOKPROBE(__rmemcpy_fast) !! 87 fnop >> 88 unop >> 89 beq $18,$egress >> 90 >> 91 $byte_loop_tail_dn: >> 92 ldq_u $3,-1($5) >> 93 ldq_u $2,-1($4) >> 94 lda $5,-1($5) >> 95 lda $4,-1($4) >> 96 >> 97 lda $18,-1($18) >> 98 extbl $3,$5,$1 >> 99 insbl $1,$4,$1 >> 100 mskbl $2,$4,$2 >> 101 >> 102 bis $1,$2,$1 >> 103 stq_u $1,0($4) >> 104 bgt $18,$byte_loop_tail_dn >> 105 br $egress >> 106 >> 107 $memmove_up: >> 108 mov $16,$4 >> 109 mov $17,$5 >> 110 bne $2,$misaligned_up >> 111 beq $1,$skip_aligned_byte_loop_head_up >> 112 >> 113 $aligned_byte_loop_head_up: >> 114 unop >> 115 ble $18,$egress >> 116 ldq_u $3,0($5) >> 117 ldq_u $2,0($4) >> 118 >> 119 lda $18,-1($18) >> 120 extbl $3,$5,$1 >> 121 insbl $1,$4,$1 >> 122 mskbl $2,$4,$2 >> 123 >> 124 bis $1,$2,$1 >> 125 lda $5,1($5) >> 126 stq_u $1,0($4) >> 127 lda $4,1($4) >> 128 >> 129 and $4,7,$6 >> 130 bne $6,$aligned_byte_loop_head_up >> 131 >> 132 $skip_aligned_byte_loop_head_up: >> 133 lda $18,-8($18) >> 134 blt $18,$skip_aligned_word_loop_up >> 135 >> 136 $aligned_word_loop_up: >> 137 ldq $1,0($5) >> 138 nop >> 139 lda $5,8($5) >> 140 lda $18,-8($18) >> 141 >> 142 stq $1,0($4) >> 143 nop >> 144 lda $4,8($4) >> 145 bge $18,$aligned_word_loop_up >> 146 >> 147 $skip_aligned_word_loop_up: >> 148 lda $18,8($18) >> 149 bgt $18,$byte_loop_tail_up >> 150 unop >> 151 ret $31,($26),1 >> 152 >> 153 .align 4 >> 154 $misaligned_up: >> 155 nop >> 156 fnop >> 157 unop >> 158 beq $18,$egress >> 159 >> 160 $byte_loop_tail_up: >> 161 ldq_u $3,0($5) >> 162 ldq_u $2,0($4) >> 163 lda $18,-1($18) >> 164 extbl $3,$5,$1 >> 165 >> 166 insbl $1,$4,$1 >> 167 mskbl $2,$4,$2 >> 168 bis $1,$2,$1 >> 169 stq_u $1,0($4) >> 170 >> 171 lda $5,1($5) >> 172 lda $4,1($4) >> 173 nop >> 174 bgt $18,$byte_loop_tail_up >> 175 >> 176 $egress: >> 177 ret $31,($26),1 >> 178 nop >> 179 nop >> 180 nop >> 181 >> 182 .end memmove >> 183 EXPORT_SYMBOL(memmove)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.