1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Copyright (C) IBM Corporation, 2011 4 * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com> 5 * Author - Balbir Singh <bsingharora@gmail.com> 6 */ 7 #include <linux/export.h> 8 #include <asm/ppc_asm.h> 9 #include <asm/errno.h> 10 11 .macro err1 12 100: 13 EX_TABLE(100b,.Ldo_err1) 14 .endm 15 16 .macro err2 17 200: 18 EX_TABLE(200b,.Ldo_err2) 19 .endm 20 21 .macro err3 22 300: EX_TABLE(300b,.Ldone) 23 .endm 24 25 .Ldo_err2: 26 ld r22,STK_REG(R22)(r1) 27 ld r21,STK_REG(R21)(r1) 28 ld r20,STK_REG(R20)(r1) 29 ld r19,STK_REG(R19)(r1) 30 ld r18,STK_REG(R18)(r1) 31 ld r17,STK_REG(R17)(r1) 32 ld r16,STK_REG(R16)(r1) 33 ld r15,STK_REG(R15)(r1) 34 ld r14,STK_REG(R14)(r1) 35 addi r1,r1,STACKFRAMESIZE 36 .Ldo_err1: 37 /* Do a byte by byte copy to get the exact remaining size */ 38 mtctr r7 39 46: 40 err3; lbz r0,0(r4) 41 addi r4,r4,1 42 err3; stb r0,0(r3) 43 addi r3,r3,1 44 bdnz 46b 45 li r3,0 46 blr 47 48 .Ldone: 49 mfctr r3 50 blr 51 52 53 _GLOBAL(copy_mc_generic) 54 mr r7,r5 55 cmpldi r5,16 56 blt .Lshort_copy 57 58 .Lcopy: 59 /* Get the source 8B aligned */ 60 neg r6,r4 61 mtocrf 0x01,r6 62 clrldi r6,r6,(64-3) 63 64 bf cr7*4+3,1f 65 err1; lbz r0,0(r4) 66 addi r4,r4,1 67 err1; stb r0,0(r3) 68 addi r3,r3,1 69 subi r7,r7,1 70 71 1: bf cr7*4+2,2f 72 err1; lhz r0,0(r4) 73 addi r4,r4,2 74 err1; sth r0,0(r3) 75 addi r3,r3,2 76 subi r7,r7,2 77 78 2: bf cr7*4+1,3f 79 err1; lwz r0,0(r4) 80 addi r4,r4,4 81 err1; stw r0,0(r3) 82 addi r3,r3,4 83 subi r7,r7,4 84 85 3: sub r5,r5,r6 86 cmpldi r5,128 87 88 mflr r0 89 stdu r1,-STACKFRAMESIZE(r1) 90 std r14,STK_REG(R14)(r1) 91 std r15,STK_REG(R15)(r1) 92 std r16,STK_REG(R16)(r1) 93 std r17,STK_REG(R17)(r1) 94 std r18,STK_REG(R18)(r1) 95 std r19,STK_REG(R19)(r1) 96 std r20,STK_REG(R20)(r1) 97 std r21,STK_REG(R21)(r1) 98 std r22,STK_REG(R22)(r1) 99 std r0,STACKFRAMESIZE+16(r1) 100 101 blt 5f 102 srdi r6,r5,7 103 mtctr r6 104 105 /* Now do cacheline (128B) sized loads and stores. */ 106 .align 5 107 4: 108 err2; ld r0,0(r4) 109 err2; ld r6,8(r4) 110 err2; ld r8,16(r4) 111 err2; ld r9,24(r4) 112 err2; ld r10,32(r4) 113 err2; ld r11,40(r4) 114 err2; ld r12,48(r4) 115 err2; ld r14,56(r4) 116 err2; ld r15,64(r4) 117 err2; ld r16,72(r4) 118 err2; ld r17,80(r4) 119 err2; ld r18,88(r4) 120 err2; ld r19,96(r4) 121 err2; ld r20,104(r4) 122 err2; ld r21,112(r4) 123 err2; ld r22,120(r4) 124 addi r4,r4,128 125 err2; std r0,0(r3) 126 err2; std r6,8(r3) 127 err2; std r8,16(r3) 128 err2; std r9,24(r3) 129 err2; std r10,32(r3) 130 err2; std r11,40(r3) 131 err2; std r12,48(r3) 132 err2; std r14,56(r3) 133 err2; std r15,64(r3) 134 err2; std r16,72(r3) 135 err2; std r17,80(r3) 136 err2; std r18,88(r3) 137 err2; std r19,96(r3) 138 err2; std r20,104(r3) 139 err2; std r21,112(r3) 140 err2; std r22,120(r3) 141 addi r3,r3,128 142 subi r7,r7,128 143 bdnz 4b 144 145 clrldi r5,r5,(64-7) 146 147 /* Up to 127B to go */ 148 5: srdi r6,r5,4 149 mtocrf 0x01,r6 150 151 6: bf cr7*4+1,7f 152 err2; ld r0,0(r4) 153 err2; ld r6,8(r4) 154 err2; ld r8,16(r4) 155 err2; ld r9,24(r4) 156 err2; ld r10,32(r4) 157 err2; ld r11,40(r4) 158 err2; ld r12,48(r4) 159 err2; ld r14,56(r4) 160 addi r4,r4,64 161 err2; std r0,0(r3) 162 err2; std r6,8(r3) 163 err2; std r8,16(r3) 164 err2; std r9,24(r3) 165 err2; std r10,32(r3) 166 err2; std r11,40(r3) 167 err2; std r12,48(r3) 168 err2; std r14,56(r3) 169 addi r3,r3,64 170 subi r7,r7,64 171 172 7: ld r14,STK_REG(R14)(r1) 173 ld r15,STK_REG(R15)(r1) 174 ld r16,STK_REG(R16)(r1) 175 ld r17,STK_REG(R17)(r1) 176 ld r18,STK_REG(R18)(r1) 177 ld r19,STK_REG(R19)(r1) 178 ld r20,STK_REG(R20)(r1) 179 ld r21,STK_REG(R21)(r1) 180 ld r22,STK_REG(R22)(r1) 181 addi r1,r1,STACKFRAMESIZE 182 183 /* Up to 63B to go */ 184 bf cr7*4+2,8f 185 err1; ld r0,0(r4) 186 err1; ld r6,8(r4) 187 err1; ld r8,16(r4) 188 err1; ld r9,24(r4) 189 addi r4,r4,32 190 err1; std r0,0(r3) 191 err1; std r6,8(r3) 192 err1; std r8,16(r3) 193 err1; std r9,24(r3) 194 addi r3,r3,32 195 subi r7,r7,32 196 197 /* Up to 31B to go */ 198 8: bf cr7*4+3,9f 199 err1; ld r0,0(r4) 200 err1; ld r6,8(r4) 201 addi r4,r4,16 202 err1; std r0,0(r3) 203 err1; std r6,8(r3) 204 addi r3,r3,16 205 subi r7,r7,16 206 207 9: clrldi r5,r5,(64-4) 208 209 /* Up to 15B to go */ 210 .Lshort_copy: 211 mtocrf 0x01,r5 212 bf cr7*4+0,12f 213 err1; lwz r0,0(r4) /* Less chance of a reject with word ops */ 214 err1; lwz r6,4(r4) 215 addi r4,r4,8 216 err1; stw r0,0(r3) 217 err1; stw r6,4(r3) 218 addi r3,r3,8 219 subi r7,r7,8 220 221 12: bf cr7*4+1,13f 222 err1; lwz r0,0(r4) 223 addi r4,r4,4 224 err1; stw r0,0(r3) 225 addi r3,r3,4 226 subi r7,r7,4 227 228 13: bf cr7*4+2,14f 229 err1; lhz r0,0(r4) 230 addi r4,r4,2 231 err1; sth r0,0(r3) 232 addi r3,r3,2 233 subi r7,r7,2 234 235 14: bf cr7*4+3,15f 236 err1; lbz r0,0(r4) 237 err1; stb r0,0(r3) 238 239 15: li r3,0 240 blr 241 242 EXPORT_SYMBOL_GPL(copy_mc_generic);
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.