1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 3 * Memory copy functions for 32-bit PowerPC. 4 * 5 * Copyright (C) 1996-2005 Paul Mackerras. 6 */ 7 #include <linux/export.h> 8 #include <asm/processor.h> 9 #include <asm/cache.h> 10 #include <asm/errno.h> 11 #include <asm/ppc_asm.h> 12 #include <asm/code-patching-asm.h> 13 #include <asm/kasan.h> 14 15 #define COPY_16_BYTES \ 16 lwz r7,4(r4); \ 17 lwz r8,8(r4); \ 18 lwz r9,12(r4); \ 19 lwzu r10,16(r4); \ 20 stw r7,4(r6); \ 21 stw r8,8(r6); \ 22 stw r9,12(r6); \ 23 stwu r10,16(r6) 24 25 #define COPY_16_BYTES_WITHEX(n) \ 26 8 ## n ## 0: \ 27 lwz r7,4(r4); \ 28 8 ## n ## 1: \ 29 lwz r8,8(r4); \ 30 8 ## n ## 2: \ 31 lwz r9,12(r4); \ 32 8 ## n ## 3: \ 33 lwzu r10,16(r4); \ 34 8 ## n ## 4: \ 35 stw r7,4(r6); \ 36 8 ## n ## 5: \ 37 stw r8,8(r6); \ 38 8 ## n ## 6: \ 39 stw r9,12(r6); \ 40 8 ## n ## 7: \ 41 stwu r10,16(r6) 42 43 #define COPY_16_BYTES_EXCODE(n) \ 44 9 ## n ## 0: \ 45 addi r5,r5,-(16 * n); \ 46 b 104f; \ 47 9 ## n ## 1: \ 48 addi r5,r5,-(16 * n); \ 49 b 105f; \ 50 EX_TABLE(8 ## n ## 0b,9 ## n ## 0b); \ 51 EX_TABLE(8 ## n ## 1b,9 ## n ## 0b); \ 52 EX_TABLE(8 ## n ## 2b,9 ## n ## 0b); \ 53 EX_TABLE(8 ## n ## 3b,9 ## n ## 0b); \ 54 EX_TABLE(8 ## n ## 4b,9 ## n ## 1b); \ 55 EX_TABLE(8 ## n ## 5b,9 ## n ## 1b); \ 56 EX_TABLE(8 ## n ## 6b,9 ## n ## 1b); \ 57 EX_TABLE(8 ## n ## 7b,9 ## n ## 1b) 58 59 .text 60 61 CACHELINE_BYTES = L1_CACHE_BYTES 62 LG_CACHELINE_BYTES = L1_CACHE_SHIFT 63 CACHELINE_MASK = (L1_CACHE_BYTES-1) 64 65 #ifndef CONFIG_KASAN 66 _GLOBAL(memset16) 67 rlwinm. r0 ,r5, 31, 1, 31 68 addi r6, r3, -4 69 beq- 2f 70 rlwimi r4 ,r4 ,16 ,0 ,15 71 mtctr r0 72 1: stwu r4, 4(r6) 73 bdnz 1b 74 2: andi. r0, r5, 1 75 beqlr 76 sth r4, 4(r6) 77 blr 78 EXPORT_SYMBOL(memset16) 79 #endif 80 81 /* 82 * Use dcbz on the complete cache lines in the destination 83 * to set them to zero. This requires that the destination 84 * area is cacheable. -- paulus 85 * 86 * During early init, cache might not be active yet, so dcbz cannot be used. 87 * We therefore skip the optimised bloc that uses dcbz. This jump is 88 * replaced by a nop once cache is active. This is done in machine_init() 89 */ 90 _GLOBAL_KASAN(memset) 91 cmplwi 0,r5,4 92 blt 7f 93 94 rlwimi r4,r4,8,16,23 95 rlwimi r4,r4,16,0,15 96 97 stw r4,0(r3) 98 beqlr 99 andi. r0,r3,3 100 add r5,r0,r5 101 subf r6,r0,r3 102 cmplwi 0,r4,0 103 /* 104 * Skip optimised bloc until cache is enabled. Will be replaced 105 * by 'bne' during boot to use normal procedure if r4 is not zero 106 */ 107 5: b 2f 108 patch_site 5b, patch__memset_nocache 109 110 clrlwi r7,r6,32-LG_CACHELINE_BYTES 111 add r8,r7,r5 112 srwi r9,r8,LG_CACHELINE_BYTES 113 addic. r9,r9,-1 /* total number of complete cachelines */ 114 ble 2f 115 xori r0,r7,CACHELINE_MASK & ~3 116 srwi. r0,r0,2 117 beq 3f 118 mtctr r0 119 4: stwu r4,4(r6) 120 bdnz 4b 121 3: mtctr r9 122 li r7,4 123 10: dcbz r7,r6 124 addi r6,r6,CACHELINE_BYTES 125 bdnz 10b 126 clrlwi r5,r8,32-LG_CACHELINE_BYTES 127 addi r5,r5,4 128 129 2: srwi r0,r5,2 130 mtctr r0 131 bdz 6f 132 1: stwu r4,4(r6) 133 bdnz 1b 134 6: andi. r5,r5,3 135 beqlr 136 mtctr r5 137 addi r6,r6,3 138 8: stbu r4,1(r6) 139 bdnz 8b 140 blr 141 142 7: cmpwi 0,r5,0 143 beqlr 144 mtctr r5 145 addi r6,r3,-1 146 9: stbu r4,1(r6) 147 bdnz 9b 148 blr 149 EXPORT_SYMBOL(memset) 150 EXPORT_SYMBOL_KASAN(memset) 151 152 /* 153 * This version uses dcbz on the complete cache lines in the 154 * destination area to reduce memory traffic. This requires that 155 * the destination area is cacheable. 156 * We only use this version if the source and dest don't overlap. 157 * -- paulus. 158 * 159 * During early init, cache might not be active yet, so dcbz cannot be used. 160 * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is 161 * replaced by a nop once cache is active. This is done in machine_init() 162 */ 163 _GLOBAL_KASAN(memmove) 164 cmplw 0,r3,r4 165 bgt backwards_memcpy 166 /* fall through */ 167 168 _GLOBAL_KASAN(memcpy) 169 1: b generic_memcpy 170 patch_site 1b, patch__memcpy_nocache 171 172 add r7,r3,r5 /* test if the src & dst overlap */ 173 add r8,r4,r5 174 cmplw 0,r4,r7 175 cmplw 1,r3,r8 176 crand 0,0,4 /* cr0.lt &= cr1.lt */ 177 blt generic_memcpy /* if regions overlap */ 178 179 addi r4,r4,-4 180 addi r6,r3,-4 181 neg r0,r3 182 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 183 beq 58f 184 185 cmplw 0,r5,r0 /* is this more than total to do? */ 186 blt 63f /* if not much to do */ 187 andi. r8,r0,3 /* get it word-aligned first */ 188 subf r5,r0,r5 189 mtctr r8 190 beq+ 61f 191 70: lbz r9,4(r4) /* do some bytes */ 192 addi r4,r4,1 193 addi r6,r6,1 194 stb r9,3(r6) 195 bdnz 70b 196 61: srwi. r0,r0,2 197 mtctr r0 198 beq 58f 199 72: lwzu r9,4(r4) /* do some words */ 200 stwu r9,4(r6) 201 bdnz 72b 202 203 58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 204 clrlwi r5,r5,32-LG_CACHELINE_BYTES 205 li r11,4 206 mtctr r0 207 beq 63f 208 53: 209 dcbz r11,r6 210 COPY_16_BYTES 211 #if L1_CACHE_BYTES >= 32 212 COPY_16_BYTES 213 #if L1_CACHE_BYTES >= 64 214 COPY_16_BYTES 215 COPY_16_BYTES 216 #if L1_CACHE_BYTES >= 128 217 COPY_16_BYTES 218 COPY_16_BYTES 219 COPY_16_BYTES 220 COPY_16_BYTES 221 #endif 222 #endif 223 #endif 224 bdnz 53b 225 226 63: srwi. r0,r5,2 227 mtctr r0 228 beq 64f 229 30: lwzu r0,4(r4) 230 stwu r0,4(r6) 231 bdnz 30b 232 233 64: andi. r0,r5,3 234 mtctr r0 235 beq+ 65f 236 addi r4,r4,3 237 addi r6,r6,3 238 40: lbzu r0,1(r4) 239 stbu r0,1(r6) 240 bdnz 40b 241 65: blr 242 EXPORT_SYMBOL(memcpy) 243 EXPORT_SYMBOL(memmove) 244 EXPORT_SYMBOL_KASAN(memcpy) 245 EXPORT_SYMBOL_KASAN(memmove) 246 247 generic_memcpy: 248 srwi. r7,r5,3 249 addi r6,r3,-4 250 addi r4,r4,-4 251 beq 2f /* if less than 8 bytes to do */ 252 andi. r0,r6,3 /* get dest word aligned */ 253 mtctr r7 254 bne 5f 255 1: lwz r7,4(r4) 256 lwzu r8,8(r4) 257 stw r7,4(r6) 258 stwu r8,8(r6) 259 bdnz 1b 260 andi. r5,r5,7 261 2: cmplwi 0,r5,4 262 blt 3f 263 lwzu r0,4(r4) 264 addi r5,r5,-4 265 stwu r0,4(r6) 266 3: cmpwi 0,r5,0 267 beqlr 268 mtctr r5 269 addi r4,r4,3 270 addi r6,r6,3 271 4: lbzu r0,1(r4) 272 stbu r0,1(r6) 273 bdnz 4b 274 blr 275 5: subfic r0,r0,4 276 mtctr r0 277 6: lbz r7,4(r4) 278 addi r4,r4,1 279 stb r7,4(r6) 280 addi r6,r6,1 281 bdnz 6b 282 subf r5,r0,r5 283 rlwinm. r7,r5,32-3,3,31 284 beq 2b 285 mtctr r7 286 b 1b 287 288 _GLOBAL(backwards_memcpy) 289 rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ 290 add r6,r3,r5 291 add r4,r4,r5 292 beq 2f 293 andi. r0,r6,3 294 mtctr r7 295 bne 5f 296 1: lwz r7,-4(r4) 297 lwzu r8,-8(r4) 298 stw r7,-4(r6) 299 stwu r8,-8(r6) 300 bdnz 1b 301 andi. r5,r5,7 302 2: cmplwi 0,r5,4 303 blt 3f 304 lwzu r0,-4(r4) 305 subi r5,r5,4 306 stwu r0,-4(r6) 307 3: cmpwi 0,r5,0 308 beqlr 309 mtctr r5 310 4: lbzu r0,-1(r4) 311 stbu r0,-1(r6) 312 bdnz 4b 313 blr 314 5: mtctr r0 315 6: lbzu r7,-1(r4) 316 stbu r7,-1(r6) 317 bdnz 6b 318 subf r5,r0,r5 319 rlwinm. r7,r5,32-3,3,31 320 beq 2b 321 mtctr r7 322 b 1b 323 324 _GLOBAL(__copy_tofrom_user) 325 addi r4,r4,-4 326 addi r6,r3,-4 327 neg r0,r3 328 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 329 beq 58f 330 331 cmplw 0,r5,r0 /* is this more than total to do? */ 332 blt 63f /* if not much to do */ 333 andi. r8,r0,3 /* get it word-aligned first */ 334 mtctr r8 335 beq+ 61f 336 70: lbz r9,4(r4) /* do some bytes */ 337 71: stb r9,4(r6) 338 addi r4,r4,1 339 addi r6,r6,1 340 bdnz 70b 341 61: subf r5,r0,r5 342 srwi. r0,r0,2 343 mtctr r0 344 beq 58f 345 72: lwzu r9,4(r4) /* do some words */ 346 73: stwu r9,4(r6) 347 bdnz 72b 348 349 EX_TABLE(70b,100f) 350 EX_TABLE(71b,101f) 351 EX_TABLE(72b,102f) 352 EX_TABLE(73b,103f) 353 354 58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 355 clrlwi r5,r5,32-LG_CACHELINE_BYTES 356 li r11,4 357 beq 63f 358 359 /* Here we decide how far ahead to prefetch the source */ 360 li r3,4 361 cmpwi r0,1 362 li r7,0 363 ble 114f 364 li r7,1 365 #if MAX_COPY_PREFETCH > 1 366 /* Heuristically, for large transfers we prefetch 367 MAX_COPY_PREFETCH cachelines ahead. For small transfers 368 we prefetch 1 cacheline ahead. */ 369 cmpwi r0,MAX_COPY_PREFETCH 370 ble 112f 371 li r7,MAX_COPY_PREFETCH 372 112: mtctr r7 373 111: dcbt r3,r4 374 addi r3,r3,CACHELINE_BYTES 375 bdnz 111b 376 #else 377 dcbt r3,r4 378 addi r3,r3,CACHELINE_BYTES 379 #endif /* MAX_COPY_PREFETCH > 1 */ 380 381 114: subf r8,r7,r0 382 mr r0,r7 383 mtctr r8 384 385 53: dcbt r3,r4 386 54: dcbz r11,r6 387 EX_TABLE(54b,105f) 388 /* the main body of the cacheline loop */ 389 COPY_16_BYTES_WITHEX(0) 390 #if L1_CACHE_BYTES >= 32 391 COPY_16_BYTES_WITHEX(1) 392 #if L1_CACHE_BYTES >= 64 393 COPY_16_BYTES_WITHEX(2) 394 COPY_16_BYTES_WITHEX(3) 395 #if L1_CACHE_BYTES >= 128 396 COPY_16_BYTES_WITHEX(4) 397 COPY_16_BYTES_WITHEX(5) 398 COPY_16_BYTES_WITHEX(6) 399 COPY_16_BYTES_WITHEX(7) 400 #endif 401 #endif 402 #endif 403 bdnz 53b 404 cmpwi r0,0 405 li r3,4 406 li r7,0 407 bne 114b 408 409 63: srwi. r0,r5,2 410 mtctr r0 411 beq 64f 412 30: lwzu r0,4(r4) 413 31: stwu r0,4(r6) 414 bdnz 30b 415 416 64: andi. r0,r5,3 417 mtctr r0 418 beq+ 65f 419 40: lbz r0,4(r4) 420 41: stb r0,4(r6) 421 addi r4,r4,1 422 addi r6,r6,1 423 bdnz 40b 424 65: li r3,0 425 blr 426 427 /* read fault, initial single-byte copy */ 428 100: li r9,0 429 b 90f 430 /* write fault, initial single-byte copy */ 431 101: li r9,1 432 90: subf r5,r8,r5 433 li r3,0 434 b 99f 435 /* read fault, initial word copy */ 436 102: li r9,0 437 b 91f 438 /* write fault, initial word copy */ 439 103: li r9,1 440 91: li r3,2 441 b 99f 442 443 /* 444 * this stuff handles faults in the cacheline loop and branches to either 445 * 104f (if in read part) or 105f (if in write part), after updating r5 446 */ 447 COPY_16_BYTES_EXCODE(0) 448 #if L1_CACHE_BYTES >= 32 449 COPY_16_BYTES_EXCODE(1) 450 #if L1_CACHE_BYTES >= 64 451 COPY_16_BYTES_EXCODE(2) 452 COPY_16_BYTES_EXCODE(3) 453 #if L1_CACHE_BYTES >= 128 454 COPY_16_BYTES_EXCODE(4) 455 COPY_16_BYTES_EXCODE(5) 456 COPY_16_BYTES_EXCODE(6) 457 COPY_16_BYTES_EXCODE(7) 458 #endif 459 #endif 460 #endif 461 462 /* read fault in cacheline loop */ 463 104: li r9,0 464 b 92f 465 /* fault on dcbz (effectively a write fault) */ 466 /* or write fault in cacheline loop */ 467 105: li r9,1 468 92: li r3,LG_CACHELINE_BYTES 469 mfctr r8 470 add r0,r0,r8 471 b 106f 472 /* read fault in final word loop */ 473 108: li r9,0 474 b 93f 475 /* write fault in final word loop */ 476 109: li r9,1 477 93: andi. r5,r5,3 478 li r3,2 479 b 99f 480 /* read fault in final byte loop */ 481 110: li r9,0 482 b 94f 483 /* write fault in final byte loop */ 484 111: li r9,1 485 94: li r5,0 486 li r3,0 487 /* 488 * At this stage the number of bytes not copied is 489 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write. 490 */ 491 99: mfctr r0 492 106: slw r3,r0,r3 493 add. r3,r3,r5 494 beq 120f /* shouldn't happen */ 495 cmpwi 0,r9,0 496 bne 120f 497 /* for a read fault, first try to continue the copy one byte at a time */ 498 mtctr r3 499 130: lbz r0,4(r4) 500 131: stb r0,4(r6) 501 addi r4,r4,1 502 addi r6,r6,1 503 bdnz 130b 504 /* then clear out the destination: r3 bytes starting at 4(r6) */ 505 132: mfctr r3 506 120: blr 507 508 EX_TABLE(30b,108b) 509 EX_TABLE(31b,109b) 510 EX_TABLE(40b,110b) 511 EX_TABLE(41b,111b) 512 EX_TABLE(130b,132b) 513 EX_TABLE(131b,120b) 514 515 EXPORT_SYMBOL(__copy_tofrom_user)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.