1 /* SPDX-License-Identifier: GPL-2.0-only */ !! 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 2 /* 3 * linux/arch/arm/lib/memmove.S !! 3 * arch/alpha/lib/memmove.S 4 * 4 * 5 * Author: Nicolas Pitre !! 5 * Barely optimized memmove routine for Alpha EV5. 6 * Created: Sep 28, 2005 << 7 * Copyright: (C) MontaVista Software Inc. << 8 */ << 9 << 10 #include <linux/linkage.h> << 11 #include <asm/assembler.h> << 12 #include <asm/unwind.h> << 13 << 14 .text << 15 << 16 /* << 17 * Prototype: void *memmove(void *dest, const << 18 * << 19 * Note: << 20 * 6 * 21 * If the memory regions don't overlap, we sim !! 7 * This is hand-massaged output from the original memcpy.c. We defer to 22 * normally a bit faster. Otherwise the copy i !! 8 * memcpy whenever possible; the backwards copy loops are not unrolled. 23 * is a transposition of the code from copy_te << 24 * occurring in the opposite direction. << 25 */ 9 */ >> 10 #include <linux/export.h> >> 11 .set noat >> 12 .set noreorder >> 13 .text >> 14 >> 15 .align 4 >> 16 .globl memmove >> 17 .ent memmove >> 18 memmove: >> 19 ldgp $29, 0($27) >> 20 unop >> 21 nop >> 22 .prologue 1 >> 23 >> 24 addq $16,$18,$4 >> 25 addq $17,$18,$5 >> 26 cmpule $4,$17,$1 /* dest + n <= src */ >> 27 cmpule $5,$16,$2 /* dest >= src + n */ >> 28 >> 29 bis $1,$2,$1 >> 30 mov $16,$0 >> 31 xor $16,$17,$2 >> 32 bne $1,memcpy !samegp >> 33 >> 34 and $2,7,$2 /* Test for src/dest co-alignment. */ >> 35 and $16,7,$1 >> 36 cmpule $16,$17,$3 >> 37 bne $3,$memmove_up /* dest < src */ >> 38 >> 39 and $4,7,$1 >> 40 bne $2,$misaligned_dn >> 41 unop >> 42 beq $1,$skip_aligned_byte_loop_head_dn >> 43 >> 44 $aligned_byte_loop_head_dn: >> 45 lda $4,-1($4) >> 46 lda $5,-1($5) >> 47 unop >> 48 ble $18,$egress >> 49 >> 50 ldq_u $3,0($5) >> 51 ldq_u $2,0($4) >> 52 lda $18,-1($18) >> 53 extbl $3,$5,$1 >> 54 >> 55 insbl $1,$4,$1 >> 56 mskbl $2,$4,$2 >> 57 bis $1,$2,$1 >> 58 and $4,7,$6 >> 59 >> 60 stq_u $1,0($4) >> 61 bne $6,$aligned_byte_loop_head_dn >> 62 >> 63 $skip_aligned_byte_loop_head_dn: >> 64 lda $18,-8($18) >> 65 blt $18,$skip_aligned_word_loop_dn >> 66 >> 67 $aligned_word_loop_dn: >> 68 ldq $1,-8($5) >> 69 nop >> 70 lda $5,-8($5) >> 71 lda $18,-8($18) >> 72 >> 73 stq $1,-8($4) >> 74 nop >> 75 lda $4,-8($4) >> 76 bge $18,$aligned_word_loop_dn >> 77 >> 78 $skip_aligned_word_loop_dn: >> 79 lda $18,8($18) >> 80 bgt $18,$byte_loop_tail_dn >> 81 unop >> 82 ret $31,($26),1 >> 83 >> 84 .align 4 >> 85 $misaligned_dn: >> 86 nop >> 87 fnop >> 88 unop >> 89 beq $18,$egress >> 90 >> 91 $byte_loop_tail_dn: >> 92 ldq_u $3,-1($5) >> 93 ldq_u $2,-1($4) >> 94 lda $5,-1($5) >> 95 lda $4,-1($4) >> 96 >> 97 lda $18,-1($18) >> 98 extbl $3,$5,$1 >> 99 insbl $1,$4,$1 >> 100 mskbl $2,$4,$2 >> 101 >> 102 bis $1,$2,$1 >> 103 stq_u $1,0($4) >> 104 bgt $18,$byte_loop_tail_dn >> 105 br $egress >> 106 >> 107 $memmove_up: >> 108 mov $16,$4 >> 109 mov $17,$5 >> 110 bne $2,$misaligned_up >> 111 beq $1,$skip_aligned_byte_loop_head_up >> 112 >> 113 $aligned_byte_loop_head_up: >> 114 unop >> 115 ble $18,$egress >> 116 ldq_u $3,0($5) >> 117 ldq_u $2,0($4) >> 118 >> 119 lda $18,-1($18) >> 120 extbl $3,$5,$1 >> 121 insbl $1,$4,$1 >> 122 mskbl $2,$4,$2 >> 123 >> 124 bis $1,$2,$1 >> 125 lda $5,1($5) >> 126 stq_u $1,0($4) >> 127 lda $4,1($4) >> 128 >> 129 and $4,7,$6 >> 130 bne $6,$aligned_byte_loop_head_up >> 131 >> 132 $skip_aligned_byte_loop_head_up: >> 133 lda $18,-8($18) >> 134 blt $18,$skip_aligned_word_loop_up >> 135 >> 136 $aligned_word_loop_up: >> 137 ldq $1,0($5) >> 138 nop >> 139 lda $5,8($5) >> 140 lda $18,-8($18) >> 141 >> 142 stq $1,0($4) >> 143 nop >> 144 lda $4,8($4) >> 145 bge $18,$aligned_word_loop_up >> 146 >> 147 $skip_aligned_word_loop_up: >> 148 lda $18,8($18) >> 149 bgt $18,$byte_loop_tail_up >> 150 unop >> 151 ret $31,($26),1 >> 152 >> 153 .align 4 >> 154 $misaligned_up: >> 155 nop >> 156 fnop >> 157 unop >> 158 beq $18,$egress >> 159 >> 160 $byte_loop_tail_up: >> 161 ldq_u $3,0($5) >> 162 ldq_u $2,0($4) >> 163 lda $18,-1($18) >> 164 extbl $3,$5,$1 >> 165 >> 166 insbl $1,$4,$1 >> 167 mskbl $2,$4,$2 >> 168 bis $1,$2,$1 >> 169 stq_u $1,0($4) >> 170 >> 171 lda $5,1($5) >> 172 lda $4,1($4) >> 173 nop >> 174 bgt $18,$byte_loop_tail_up >> 175 >> 176 $egress: >> 177 ret $31,($26),1 >> 178 nop >> 179 nop >> 180 nop 26 181 27 ENTRY(__memmove) !! 182 .end memmove 28 WEAK(memmove) !! 183 EXPORT_SYMBOL(memmove) 29 UNWIND( .fnstart << 30 << 31 subs ip, r0, r1 << 32 cmphi r2, ip << 33 bls __memcpy << 34 UNWIND( .fnend << 35 << 36 UNWIND( .fnstart << 37 UNWIND( .save {r0, r4, fpreg, lr} << 38 stmfd sp!, {r0, r4, UNWIND(f << 39 UNWIND( .setfp fpreg, sp << 40 UNWIND( mov fpreg, sp << 41 add r1, r1, r2 << 42 add r0, r0, r2 << 43 subs r2, r2, #4 << 44 blt 8f << 45 ands ip, r0, #3 << 46 PLD( pld [r1, #-4] << 47 bne 9f << 48 ands ip, r1, #3 << 49 bne 10f << 50 << 51 1: subs r2, r2, #(28) << 52 stmfd sp!, {r5, r6, r8, r9} << 53 blt 5f << 54 << 55 CALGN( ands ip, r0, #31 << 56 CALGN( sbcsne r4, ip, r2 << 57 CALGN( bcs 2f << 58 CALGN( adr r4, 6f << 59 CALGN( subs r2, r2, ip << 60 CALGN( rsb ip, ip, #32 << 61 CALGN( add pc, r4, ip << 62 << 63 PLD( pld [r1, #-4] << 64 2: PLD( subs r2, r2, #96 << 65 PLD( pld [r1, #-32] << 66 PLD( blt 4f << 67 PLD( pld [r1, #-64] << 68 PLD( pld [r1, #-96] << 69 << 70 3: PLD( pld [r1, #-128] << 71 4: ldmdb r1!, {r3, r4, r5, r6, << 72 subs r2, r2, #32 << 73 stmdb r0!, {r3, r4, r5, r6, << 74 bge 3b << 75 PLD( cmn r2, #96 << 76 PLD( bge 4b << 77 << 78 5: ands ip, r2, #28 << 79 rsb ip, ip, #32 << 80 addne pc, pc, ip << 81 b 7f << 82 6: W(nop) << 83 W(ldr) r3, [r1, #-4]! << 84 W(ldr) r4, [r1, #-4]! << 85 W(ldr) r5, [r1, #-4]! << 86 W(ldr) r6, [r1, #-4]! << 87 W(ldr) r8, [r1, #-4]! << 88 W(ldr) r9, [r1, #-4]! << 89 W(ldr) lr, [r1, #-4]! << 90 << 91 add pc, pc, ip << 92 nop << 93 W(nop) << 94 W(str) r3, [r0, #-4]! << 95 W(str) r4, [r0, #-4]! << 96 W(str) r5, [r0, #-4]! << 97 W(str) r6, [r0, #-4]! << 98 W(str) r8, [r0, #-4]! << 99 W(str) r9, [r0, #-4]! << 100 W(str) lr, [r0, #-4]! << 101 << 102 CALGN( bcs 2b << 103 << 104 7: ldmfd sp!, {r5, r6, r8, r9} << 105 << 106 8: movs r2, r2, lsl #31 << 107 ldrbne r3, [r1, #-1]! << 108 ldrbcs r4, [r1, #-1]! << 109 ldrbcs ip, [r1, #-1] << 110 strbne r3, [r0, #-1]! << 111 strbcs r4, [r0, #-1]! << 112 strbcs ip, [r0, #-1] << 113 ldmfd sp!, {r0, r4, UNWIND(f << 114 << 115 9: cmp ip, #2 << 116 ldrbgt r3, [r1, #-1]! << 117 ldrbge r4, [r1, #-1]! << 118 ldrb lr, [r1, #-1]! << 119 strbgt r3, [r0, #-1]! << 120 strbge r4, [r0, #-1]! << 121 subs r2, r2, ip << 122 strb lr, [r0, #-1]! << 123 blt 8b << 124 ands ip, r1, #3 << 125 beq 1b << 126 << 127 10: bic r1, r1, #3 << 128 cmp ip, #2 << 129 ldr r3, [r1, #0] << 130 beq 17f << 131 blt 18f << 132 << 133 << 134 .macro backward_copy_shift pu << 135 << 136 subs r2, r2, #28 << 137 blt 14f << 138 << 139 CALGN( ands ip, r0, #31 << 140 CALGN( sbcsne r4, ip, r2 << 141 CALGN( subcc r2, r2, ip << 142 CALGN( bcc 15f << 143 << 144 11: stmfd sp!, {r5, r6, r8 - r10 << 145 << 146 PLD( pld [r1, #-4] << 147 PLD( subs r2, r2, #96 << 148 PLD( pld [r1, #-32] << 149 PLD( blt 13f << 150 PLD( pld [r1, #-64] << 151 PLD( pld [r1, #-96] << 152 << 153 12: PLD( pld [r1, #-128] << 154 13: ldmdb r1!, {r8, r9, r10, ip} << 155 mov lr, r3, lspush #\push << 156 subs r2, r2, #32 << 157 ldmdb r1!, {r3, r4, r5, r6} << 158 orr lr, lr, ip, lspull #\p << 159 mov ip, ip, lspush #\push << 160 orr ip, ip, r10, lspull #\ << 161 mov r10, r10, lspush #\pus << 162 orr r10, r10, r9, lspull # << 163 mov r9, r9, lspush #\push << 164 orr r9, r9, r8, lspull #\p << 165 mov r8, r8, lspush #\push << 166 orr r8, r8, r6, lspull #\p << 167 mov r6, r6, lspush #\push << 168 orr r6, r6, r5, lspull #\p << 169 mov r5, r5, lspush #\push << 170 orr r5, r5, r4, lspull #\p << 171 mov r4, r4, lspush #\push << 172 orr r4, r4, r3, lspull #\p << 173 stmdb r0!, {r4 - r6, r8 - r1 << 174 bge 12b << 175 PLD( cmn r2, #96 << 176 PLD( bge 13b << 177 << 178 ldmfd sp!, {r5, r6, r8 - r10 << 179 << 180 14: ands ip, r2, #28 << 181 beq 16f << 182 << 183 15: mov lr, r3, lspush #\push << 184 ldr r3, [r1, #-4]! << 185 subs ip, ip, #4 << 186 orr lr, lr, r3, lspull #\p << 187 str lr, [r0, #-4]! << 188 bgt 15b << 189 CALGN( cmp r2, #0 << 190 CALGN( bge 11b << 191 << 192 16: add r1, r1, #(\pull / 8) << 193 b 8b << 194 << 195 .endm << 196 << 197 << 198 backward_copy_shift push=8 << 199 << 200 17: backward_copy_shift push=1 << 201 << 202 18: backward_copy_shift push=2 << 203 << 204 UNWIND( .fnend << 205 ENDPROC(memmove) << 206 ENDPROC(__memmove) <<
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.