1 /* 2 * Copyright (C) 2008-2009 Michal Simek <monstr 3 * Copyright (C) 2008-2009 PetaLogix 4 * Copyright (C) 2008 Jim Law - Iris LP All r 5 * 6 * This file is subject to the terms and condi 7 * Public License. See the file COPYING in th 8 * archive for more details. 9 * 10 * Written by Jim Law <jlaw@irispower.com> 11 * 12 * intended to replace: 13 * memcpy in memcpy.c and 14 * memmove in memmove.c 15 * ... in arch/microblaze/lib 16 * 17 * 18 * assly_fastcopy.S 19 * 20 * Attempt at quicker memcpy and memmove for M 21 * Input : Operand1 in Reg r5 - destinati 22 * Operand2 in Reg r6 - source ad 23 * Operand3 in Reg r7 - number of 24 * Output: Result in Reg r3 - starting de 25 * 26 * 27 * Explanation: 28 * Perform (possibly unaligned) copy of a 29 * between mem locations with size of xfe 30 */ 31 32 #include <linux/linkage.h> 33 .text 34 .globl memcpy 35 .type memcpy, @function 36 .ent memcpy 37 38 memcpy: 39 fast_memcpy_ascending: 40 /* move d to return register as value 41 addi r3, r5, 0 42 43 addi r4, r0, 4 /* n = 4 */ 44 cmpu r4, r4, r7 /* n = c - n 45 blti r4, a_xfer_end /* if n < 0, l 46 47 /* transfer first 0~3 bytes to get ali 48 andi r4, r5, 3 /* n = 49 /* if zero, destination already aligne 50 beqi r4, a_dalign_done 51 /* n = 4 - n (yields 3, 2, 1 transfers 52 rsubi r4, r4, 4 53 rsub r7, r4, r7 /* c = 54 55 a_xfer_first_loop: 56 /* if no bytes left to transfer, trans 57 beqi r4, a_dalign_done 58 lbui r11, r6, 0 /* h = 59 sbi r11, r5, 0 /* *d 60 addi r6, r6, 1 /* s++ 61 addi r5, r5, 1 /* d++ 62 brid a_xfer_first_loop /* loo 63 addi r4, r4, -1 /* n-- 64 65 a_dalign_done: 66 addi r4, r0, 32 /* n = 67 cmpu r4, r4, r7 /* n = 68 /* if n < 0, less than one block to tr 69 blti r4, a_block_done 70 71 a_block_xfer: 72 andi r4, r7, 0xffffffe0 /* n = 73 rsub r7, r4, r7 /* c = 74 75 andi r9, r6, 3 /* t1 76 /* if temp != 0, unaligned transfers n 77 bnei r9, a_block_unaligned 78 79 a_block_aligned: 80 lwi r9, r6, 0 /* t1 81 lwi r10, r6, 4 /* t2 82 lwi r11, r6, 8 /* t3 83 lwi r12, r6, 12 /* t4 84 swi r9, r5, 0 /* *(d 85 swi r10, r5, 4 /* *(d 86 swi r11, r5, 8 /* *(d 87 swi r12, r5, 12 /* *(d 88 lwi r9, r6, 16 /* t1 89 lwi r10, r6, 20 /* t2 90 lwi r11, r6, 24 /* t3 91 lwi r12, r6, 28 /* t4 92 swi r9, r5, 16 /* *(d 93 swi r10, r5, 20 /* *(d 94 swi r11, r5, 24 /* *(d 95 swi r12, r5, 28 /* *(d 96 addi r6, r6, 32 /* s = 97 addi r4, r4, -32 /* n = 98 bneid r4, a_block_aligned /* whi 99 addi r5, r5, 32 /* d = 100 bri a_block_done 101 102 a_block_unaligned: 103 andi r8, r6, 0xfffffffc /* as 104 add r6, r6, r4 /* s = 105 lwi r11, r8, 0 /* h = 106 107 addi r9, r9, -1 108 beqi r9, a_block_u1 /* t1 109 addi r9, r9, -1 110 beqi r9, a_block_u2 /* t1 111 112 a_block_u3: 113 bslli r11, r11, 24 /* h = h << 24 114 a_bu3_loop: 115 lwi r12, r8, 4 /* v = *(as + 116 bsrli r9, r12, 8 /* t1 = v >> 8 117 or r9, r11, r9 /* t1 = h | t1 118 swi r9, r5, 0 /* *(d + 0) = 119 bslli r11, r12, 24 /* h = v << 24 120 lwi r12, r8, 8 /* v = *(as + 121 bsrli r9, r12, 8 /* t1 = v >> 8 122 or r9, r11, r9 /* t1 = h | t1 123 swi r9, r5, 4 /* *(d + 4) = 124 bslli r11, r12, 24 /* h = v << 24 125 lwi r12, r8, 12 /* v = *(as + 126 bsrli r9, r12, 8 /* t1 = v >> 8 127 or r9, r11, r9 /* t1 = h | t1 128 swi r9, r5, 8 /* *(d + 8) = 129 bslli r11, r12, 24 /* h = v << 24 130 lwi r12, r8, 16 /* v = *(as + 131 bsrli r9, r12, 8 /* t1 = v >> 8 132 or r9, r11, r9 /* t1 = h | t1 133 swi r9, r5, 12 /* *(d + 12) = 134 bslli r11, r12, 24 /* h = v << 24 135 lwi r12, r8, 20 /* v = *(as + 136 bsrli r9, r12, 8 /* t1 = v >> 8 137 or r9, r11, r9 /* t1 = h | t1 138 swi r9, r5, 16 /* *(d + 16) = 139 bslli r11, r12, 24 /* h = v << 24 140 lwi r12, r8, 24 /* v = *(as + 141 bsrli r9, r12, 8 /* t1 = v >> 8 142 or r9, r11, r9 /* t1 = h | t1 143 swi r9, r5, 20 /* *(d + 20) = 144 bslli r11, r12, 24 /* h = v << 24 145 lwi r12, r8, 28 /* v = *(as + 146 bsrli r9, r12, 8 /* t1 = v >> 8 147 or r9, r11, r9 /* t1 = h | t1 148 swi r9, r5, 24 /* *(d + 24) = 149 bslli r11, r12, 24 /* h = v << 24 150 lwi r12, r8, 32 /* v = *(as + 151 bsrli r9, r12, 8 /* t1 = v >> 8 152 or r9, r11, r9 /* t1 = h | t1 153 swi r9, r5, 28 /* *(d + 28) = 154 bslli r11, r12, 24 /* h = v << 24 155 addi r8, r8, 32 /* as = as + 3 156 addi r4, r4, -32 /* n = n - 32 157 bneid r4, a_bu3_loop /* while (n) l 158 addi r5, r5, 32 /* d = d + 32 159 bri a_block_done 160 161 a_block_u1: 162 bslli r11, r11, 8 /* h = h << 8 163 a_bu1_loop: 164 lwi r12, r8, 4 /* v = *(as + 165 bsrli r9, r12, 24 /* t1 = v >> 2 166 or r9, r11, r9 /* t1 = h | t1 167 swi r9, r5, 0 /* *(d + 0) = 168 bslli r11, r12, 8 /* h = v << 8 169 lwi r12, r8, 8 /* v = *(as + 170 bsrli r9, r12, 24 /* t1 = v >> 2 171 or r9, r11, r9 /* t1 = h | t1 172 swi r9, r5, 4 /* *(d + 4) = 173 bslli r11, r12, 8 /* h = v << 8 174 lwi r12, r8, 12 /* v = *(as + 175 bsrli r9, r12, 24 /* t1 = v >> 2 176 or r9, r11, r9 /* t1 = h | t1 177 swi r9, r5, 8 /* *(d + 8) = 178 bslli r11, r12, 8 /* h = v << 8 179 lwi r12, r8, 16 /* v = *(as + 180 bsrli r9, r12, 24 /* t1 = v >> 2 181 or r9, r11, r9 /* t1 = h | t1 182 swi r9, r5, 12 /* *(d + 12) = 183 bslli r11, r12, 8 /* h = v << 8 184 lwi r12, r8, 20 /* v = *(as + 185 bsrli r9, r12, 24 /* t1 = v >> 2 186 or r9, r11, r9 /* t1 = h | t1 187 swi r9, r5, 16 /* *(d + 16) = 188 bslli r11, r12, 8 /* h = v << 8 189 lwi r12, r8, 24 /* v = *(as + 190 bsrli r9, r12, 24 /* t1 = v >> 2 191 or r9, r11, r9 /* t1 = h | t1 192 swi r9, r5, 20 /* *(d + 20) = 193 bslli r11, r12, 8 /* h = v << 8 194 lwi r12, r8, 28 /* v = *(as + 195 bsrli r9, r12, 24 /* t1 = v >> 2 196 or r9, r11, r9 /* t1 = h | t1 197 swi r9, r5, 24 /* *(d + 24) = 198 bslli r11, r12, 8 /* h = v << 8 199 lwi r12, r8, 32 /* v = *(as + 200 bsrli r9, r12, 24 /* t1 = v >> 2 201 or r9, r11, r9 /* t1 = h | t1 202 swi r9, r5, 28 /* *(d + 28) = 203 bslli r11, r12, 8 /* h = v << 8 204 addi r8, r8, 32 /* as = as + 3 205 addi r4, r4, -32 /* n = n - 32 206 bneid r4, a_bu1_loop /* while (n) l 207 addi r5, r5, 32 /* d = d + 32 208 bri a_block_done 209 210 a_block_u2: 211 bslli r11, r11, 16 /* h = h << 16 212 a_bu2_loop: 213 lwi r12, r8, 4 /* v = *(as + 214 bsrli r9, r12, 16 /* t1 = v >> 1 215 or r9, r11, r9 /* t1 = h | t1 216 swi r9, r5, 0 /* *(d + 0) = 217 bslli r11, r12, 16 /* h = v << 16 218 lwi r12, r8, 8 /* v = *(as + 219 bsrli r9, r12, 16 /* t1 = v >> 1 220 or r9, r11, r9 /* t1 = h | t1 221 swi r9, r5, 4 /* *(d + 4) = 222 bslli r11, r12, 16 /* h = v << 16 223 lwi r12, r8, 12 /* v = *(as + 224 bsrli r9, r12, 16 /* t1 = v >> 1 225 or r9, r11, r9 /* t1 = h | t1 226 swi r9, r5, 8 /* *(d + 8) = 227 bslli r11, r12, 16 /* h = v << 16 228 lwi r12, r8, 16 /* v = *(as + 229 bsrli r9, r12, 16 /* t1 = v >> 1 230 or r9, r11, r9 /* t1 = h | t1 231 swi r9, r5, 12 /* *(d + 12) = 232 bslli r11, r12, 16 /* h = v << 16 233 lwi r12, r8, 20 /* v = *(as + 234 bsrli r9, r12, 16 /* t1 = v >> 1 235 or r9, r11, r9 /* t1 = h | t1 236 swi r9, r5, 16 /* *(d + 16) = 237 bslli r11, r12, 16 /* h = v << 16 238 lwi r12, r8, 24 /* v = *(as + 239 bsrli r9, r12, 16 /* t1 = v >> 1 240 or r9, r11, r9 /* t1 = h | t1 241 swi r9, r5, 20 /* *(d + 20) = 242 bslli r11, r12, 16 /* h = v << 16 243 lwi r12, r8, 28 /* v = *(as + 244 bsrli r9, r12, 16 /* t1 = v >> 1 245 or r9, r11, r9 /* t1 = h | t1 246 swi r9, r5, 24 /* *(d + 24) = 247 bslli r11, r12, 16 /* h = v << 16 248 lwi r12, r8, 32 /* v = *(as + 249 bsrli r9, r12, 16 /* t1 = v >> 1 250 or r9, r11, r9 /* t1 = h | t1 251 swi r9, r5, 28 /* *(d + 28) = 252 bslli r11, r12, 16 /* h = v << 16 253 addi r8, r8, 32 /* as = as + 3 254 addi r4, r4, -32 /* n = n - 32 255 bneid r4, a_bu2_loop /* while (n) l 256 addi r5, r5, 32 /* d = d + 32 257 258 a_block_done: 259 addi r4, r0, 4 /* n = 4 */ 260 cmpu r4, r4, r7 /* n = c - n 261 blti r4, a_xfer_end /* if n < 0, l 262 263 a_word_xfer: 264 andi r4, r7, 0xfffffffc /* n = 265 addi r10, r0, 0 /* off 266 267 andi r9, r6, 3 /* t1 268 /* if temp != 0, unaligned transfers n 269 bnei r9, a_word_unaligned 270 271 a_word_aligned: 272 lw r9, r6, r10 /* t1 273 sw r9, r5, r10 /* *(d 274 addi r4, r4,-4 /* n-- 275 bneid r4, a_word_aligned /* loo 276 addi r10, r10, 4 /* off 277 278 bri a_word_done 279 280 a_word_unaligned: 281 andi r8, r6, 0xfffffffc /* as 282 lwi r11, r8, 0 /* h = 283 addi r8, r8, 4 /* as 284 285 addi r9, r9, -1 286 beqi r9, a_word_u1 /* t1 287 addi r9, r9, -1 288 beqi r9, a_word_u2 /* t1 289 290 a_word_u3: 291 bslli r11, r11, 24 /* h = h << 24 292 a_wu3_loop: 293 lw r12, r8, r10 /* v = *(as + 294 bsrli r9, r12, 8 /* t1 = v >> 8 295 or r9, r11, r9 /* t1 = h | t1 296 sw r9, r5, r10 /* *(d + offse 297 bslli r11, r12, 24 /* h = v << 24 298 addi r4, r4,-4 /* n = n - 4 * 299 bneid r4, a_wu3_loop /* while (n) l 300 addi r10, r10, 4 /* offset = of 301 302 bri a_word_done 303 304 a_word_u1: 305 bslli r11, r11, 8 /* h = h << 8 306 a_wu1_loop: 307 lw r12, r8, r10 /* v = *(as + 308 bsrli r9, r12, 24 /* t1 = v >> 2 309 or r9, r11, r9 /* t1 = h | t1 310 sw r9, r5, r10 /* *(d + offse 311 bslli r11, r12, 8 /* h = v << 8 312 addi r4, r4,-4 /* n = n - 4 * 313 bneid r4, a_wu1_loop /* while (n) l 314 addi r10, r10, 4 /* offset = of 315 316 bri a_word_done 317 318 a_word_u2: 319 bslli r11, r11, 16 /* h = h << 16 320 a_wu2_loop: 321 lw r12, r8, r10 /* v = *(as + 322 bsrli r9, r12, 16 /* t1 = v >> 1 323 or r9, r11, r9 /* t1 = h | t1 324 sw r9, r5, r10 /* *(d + offse 325 bslli r11, r12, 16 /* h = v << 16 326 addi r4, r4,-4 /* n = n - 4 * 327 bneid r4, a_wu2_loop /* while (n) l 328 addi r10, r10, 4 /* offset = of 329 330 a_word_done: 331 add r5, r5, r10 /* d = d + off 332 add r6, r6, r10 /* s = s + off 333 rsub r7, r10, r7 /* c = c - off 334 335 a_xfer_end: 336 a_xfer_end_loop: 337 beqi r7, a_done /* whi 338 lbui r9, r6, 0 /* t1 339 addi r6, r6, 1 /* s++ 340 sbi r9, r5, 0 /* *d 341 addi r7, r7, -1 /* c-- 342 brid a_xfer_end_loop /* loo 343 addi r5, r5, 1 /* d++ 344 345 a_done: 346 rtsd r15, 8 347 nop 348 349 .size memcpy, . - memcpy 350 .end memcpy 351 /*-------------------------------------------- 352 .globl memmove 353 .type memmove, @function 354 .ent memmove 355 356 memmove: 357 cmpu r4, r5, r6 /* n = s - d * 358 bgei r4,fast_memcpy_ascending 359 360 fast_memcpy_descending: 361 /* move d to return register as value 362 addi r3, r5, 0 363 364 add r5, r5, r7 /* d = d + c * 365 add r6, r6, r7 /* s = s + c * 366 367 addi r4, r0, 4 /* n = 4 */ 368 cmpu r4, r4, r7 /* n = c - n 369 blti r4,d_xfer_end /* if n < 0, l 370 371 /* transfer first 0~3 bytes to get ali 372 andi r4, r5, 3 /* n = 373 /* if zero, destination already aligne 374 beqi r4,d_dalign_done 375 rsub r7, r4, r7 /* c = 376 377 d_xfer_first_loop: 378 /* if no bytes left to transfer, trans 379 beqi r4,d_dalign_done 380 addi r6, r6, -1 /* s-- 381 addi r5, r5, -1 /* d-- 382 lbui r11, r6, 0 /* h = 383 sbi r11, r5, 0 /* *d 384 brid d_xfer_first_loop /* loo 385 addi r4, r4, -1 /* n-- 386 387 d_dalign_done: 388 addi r4, r0, 32 /* n = 32 */ 389 cmpu r4, r4, r7 /* n = c - n 390 /* if n < 0, less than one block to tr 391 blti r4, d_block_done 392 393 d_block_xfer: 394 andi r4, r7, 0xffffffe0 /* n = 395 rsub r7, r4, r7 /* c = 396 397 andi r9, r6, 3 /* t1 398 /* if temp != 0, unaligned transfers n 399 bnei r9, d_block_unaligned 400 401 d_block_aligned: 402 addi r6, r6, -32 /* s = 403 addi r5, r5, -32 /* d = 404 lwi r9, r6, 28 /* t1 405 lwi r10, r6, 24 /* t2 406 lwi r11, r6, 20 /* t3 407 lwi r12, r6, 16 /* t4 408 swi r9, r5, 28 /* *(d 409 swi r10, r5, 24 /* *(d 410 swi r11, r5, 20 /* *(d 411 swi r12, r5, 16 /* *(d 412 lwi r9, r6, 12 /* t1 413 lwi r10, r6, 8 /* t2 414 lwi r11, r6, 4 /* t3 415 lwi r12, r6, 0 /* t4 416 swi r9, r5, 12 /* *(d 417 swi r10, r5, 8 /* *(d 418 swi r11, r5, 4 /* *(d 419 addi r4, r4, -32 /* n = 420 bneid r4, d_block_aligned /* whi 421 swi r12, r5, 0 /* *(d 422 bri d_block_done 423 424 d_block_unaligned: 425 andi r8, r6, 0xfffffffc /* as 426 rsub r6, r4, r6 /* s = 427 lwi r11, r8, 0 /* h = 428 429 addi r9, r9, -1 430 beqi r9,d_block_u1 /* t1 431 addi r9, r9, -1 432 beqi r9,d_block_u2 /* t1 433 434 d_block_u3: 435 bsrli r11, r11, 8 /* h = h >> 8 436 d_bu3_loop: 437 addi r8, r8, -32 /* as = as - 3 438 addi r5, r5, -32 /* d = d - 32 439 lwi r12, r8, 28 /* v = *(as + 440 bslli r9, r12, 24 /* t1 = v << 2 441 or r9, r11, r9 /* t1 = h | t1 442 swi r9, r5, 28 /* *(d + 28) = 443 bsrli r11, r12, 8 /* h = v >> 8 444 lwi r12, r8, 24 /* v = *(as + 445 bslli r9, r12, 24 /* t1 = v << 2 446 or r9, r11, r9 /* t1 = h | t1 447 swi r9, r5, 24 /* *(d + 24) = 448 bsrli r11, r12, 8 /* h = v >> 8 449 lwi r12, r8, 20 /* v = *(as + 450 bslli r9, r12, 24 /* t1 = v << 2 451 or r9, r11, r9 /* t1 = h | t1 452 swi r9, r5, 20 /* *(d + 20) = 453 bsrli r11, r12, 8 /* h = v >> 8 454 lwi r12, r8, 16 /* v = *(as + 455 bslli r9, r12, 24 /* t1 = v << 2 456 or r9, r11, r9 /* t1 = h | t1 457 swi r9, r5, 16 /* *(d + 16) = 458 bsrli r11, r12, 8 /* h = v >> 8 459 lwi r12, r8, 12 /* v = *(as + 460 bslli r9, r12, 24 /* t1 = v << 2 461 or r9, r11, r9 /* t1 = h | t1 462 swi r9, r5, 12 /* *(d + 112) 463 bsrli r11, r12, 8 /* h = v >> 8 464 lwi r12, r8, 8 /* v = *(as + 465 bslli r9, r12, 24 /* t1 = v << 2 466 or r9, r11, r9 /* t1 = h | t1 467 swi r9, r5, 8 /* *(d + 8) = 468 bsrli r11, r12, 8 /* h = v >> 8 469 lwi r12, r8, 4 /* v = *(as + 470 bslli r9, r12, 24 /* t1 = v << 2 471 or r9, r11, r9 /* t1 = h | t1 472 swi r9, r5, 4 /* *(d + 4) = 473 bsrli r11, r12, 8 /* h = v >> 8 474 lwi r12, r8, 0 /* v = *(as + 475 bslli r9, r12, 24 /* t1 = v << 2 476 or r9, r11, r9 /* t1 = h | t1 477 swi r9, r5, 0 /* *(d + 0) = 478 addi r4, r4, -32 /* n = n - 32 479 bneid r4, d_bu3_loop /* while (n) l 480 bsrli r11, r12, 8 /* h = v >> 8 481 bri d_block_done 482 483 d_block_u1: 484 bsrli r11, r11, 24 /* h = h >> 24 485 d_bu1_loop: 486 addi r8, r8, -32 /* as = as - 3 487 addi r5, r5, -32 /* d = d - 32 488 lwi r12, r8, 28 /* v = *(as + 489 bslli r9, r12, 8 /* t1 = v << 8 490 or r9, r11, r9 /* t1 = h | t1 491 swi r9, r5, 28 /* *(d + 28) = 492 bsrli r11, r12, 24 /* h = v >> 24 493 lwi r12, r8, 24 /* v = *(as + 494 bslli r9, r12, 8 /* t1 = v << 8 495 or r9, r11, r9 /* t1 = h | t1 496 swi r9, r5, 24 /* *(d + 24) = 497 bsrli r11, r12, 24 /* h = v >> 24 498 lwi r12, r8, 20 /* v = *(as + 499 bslli r9, r12, 8 /* t1 = v << 8 500 or r9, r11, r9 /* t1 = h | t1 501 swi r9, r5, 20 /* *(d + 20) = 502 bsrli r11, r12, 24 /* h = v >> 24 503 lwi r12, r8, 16 /* v = *(as + 504 bslli r9, r12, 8 /* t1 = v << 8 505 or r9, r11, r9 /* t1 = h | t1 506 swi r9, r5, 16 /* *(d + 16) = 507 bsrli r11, r12, 24 /* h = v >> 24 508 lwi r12, r8, 12 /* v = *(as + 509 bslli r9, r12, 8 /* t1 = v << 8 510 or r9, r11, r9 /* t1 = h | t1 511 swi r9, r5, 12 /* *(d + 112) 512 bsrli r11, r12, 24 /* h = v >> 24 513 lwi r12, r8, 8 /* v = *(as + 514 bslli r9, r12, 8 /* t1 = v << 8 515 or r9, r11, r9 /* t1 = h | t1 516 swi r9, r5, 8 /* *(d + 8) = 517 bsrli r11, r12, 24 /* h = v >> 24 518 lwi r12, r8, 4 /* v = *(as + 519 bslli r9, r12, 8 /* t1 = v << 8 520 or r9, r11, r9 /* t1 = h | t1 521 swi r9, r5, 4 /* *(d + 4) = 522 bsrli r11, r12, 24 /* h = v >> 24 523 lwi r12, r8, 0 /* v = *(as + 524 bslli r9, r12, 8 /* t1 = v << 8 525 or r9, r11, r9 /* t1 = h | t1 526 swi r9, r5, 0 /* *(d + 0) = 527 addi r4, r4, -32 /* n = n - 32 528 bneid r4, d_bu1_loop /* while (n) l 529 bsrli r11, r12, 24 /* h = v >> 24 530 bri d_block_done 531 532 d_block_u2: 533 bsrli r11, r11, 16 /* h = h >> 16 534 d_bu2_loop: 535 addi r8, r8, -32 /* as = as - 3 536 addi r5, r5, -32 /* d = d - 32 537 lwi r12, r8, 28 /* v = *(as + 538 bslli r9, r12, 16 /* t1 = v << 1 539 or r9, r11, r9 /* t1 = h | t1 540 swi r9, r5, 28 /* *(d + 28) = 541 bsrli r11, r12, 16 /* h = v >> 16 542 lwi r12, r8, 24 /* v = *(as + 543 bslli r9, r12, 16 /* t1 = v << 1 544 or r9, r11, r9 /* t1 = h | t1 545 swi r9, r5, 24 /* *(d + 24) = 546 bsrli r11, r12, 16 /* h = v >> 16 547 lwi r12, r8, 20 /* v = *(as + 548 bslli r9, r12, 16 /* t1 = v << 1 549 or r9, r11, r9 /* t1 = h | t1 550 swi r9, r5, 20 /* *(d + 20) = 551 bsrli r11, r12, 16 /* h = v >> 16 552 lwi r12, r8, 16 /* v = *(as + 553 bslli r9, r12, 16 /* t1 = v << 1 554 or r9, r11, r9 /* t1 = h | t1 555 swi r9, r5, 16 /* *(d + 16) = 556 bsrli r11, r12, 16 /* h = v >> 16 557 lwi r12, r8, 12 /* v = *(as + 558 bslli r9, r12, 16 /* t1 = v << 1 559 or r9, r11, r9 /* t1 = h | t1 560 swi r9, r5, 12 /* *(d + 112) 561 bsrli r11, r12, 16 /* h = v >> 16 562 lwi r12, r8, 8 /* v = *(as + 563 bslli r9, r12, 16 /* t1 = v << 1 564 or r9, r11, r9 /* t1 = h | t1 565 swi r9, r5, 8 /* *(d + 8) = 566 bsrli r11, r12, 16 /* h = v >> 16 567 lwi r12, r8, 4 /* v = *(as + 568 bslli r9, r12, 16 /* t1 = v << 1 569 or r9, r11, r9 /* t1 = h | t1 570 swi r9, r5, 4 /* *(d + 4) = 571 bsrli r11, r12, 16 /* h = v >> 16 572 lwi r12, r8, 0 /* v = *(as + 573 bslli r9, r12, 16 /* t1 = v << 1 574 or r9, r11, r9 /* t1 = h | t1 575 swi r9, r5, 0 /* *(d + 0) = 576 addi r4, r4, -32 /* n = n - 32 577 bneid r4, d_bu2_loop /* while (n) l 578 bsrli r11, r12, 16 /* h = v >> 16 579 580 d_block_done: 581 addi r4, r0, 4 /* n = 4 */ 582 cmpu r4, r4, r7 /* n = c - n 583 blti r4,d_xfer_end /* if n < 0, l 584 585 d_word_xfer: 586 andi r4, r7, 0xfffffffc /* n = 587 rsub r5, r4, r5 /* d = 588 rsub r6, r4, r6 /* s = 589 rsub r7, r4, r7 /* c = 590 591 andi r9, r6, 3 /* t1 592 /* if temp != 0, unaligned transfers n 593 bnei r9, d_word_unaligned 594 595 d_word_aligned: 596 addi r4, r4,-4 /* n-- 597 lw r9, r6, r4 /* t1 598 bneid r4, d_word_aligned /* loo 599 sw r9, r5, r4 /* *(d 600 601 bri d_word_done 602 603 d_word_unaligned: 604 andi r8, r6, 0xfffffffc /* as 605 lw r11, r8, r4 /* h = 606 607 addi r9, r9, -1 608 beqi r9,d_word_u1 /* t1 609 addi r9, r9, -1 610 beqi r9,d_word_u2 /* t1 611 612 d_word_u3: 613 bsrli r11, r11, 8 /* h = h >> 8 614 d_wu3_loop: 615 addi r4, r4,-4 /* n = n - 4 * 616 lw r12, r8, r4 /* v = *(as + 617 bslli r9, r12, 24 /* t1 = v << 2 618 or r9, r11, r9 /* t1 = h | t1 619 sw r9, r5, r4 /* *(d + n) = 620 bneid r4, d_wu3_loop /* while (n) l 621 bsrli r11, r12, 8 /* h = v >> 8 622 623 bri d_word_done 624 625 d_word_u1: 626 bsrli r11, r11, 24 /* h = h >> 24 627 d_wu1_loop: 628 addi r4, r4,-4 /* n = n - 4 * 629 lw r12, r8, r4 /* v = *(as + 630 bslli r9, r12, 8 /* t1 = v << 8 631 or r9, r11, r9 /* t1 = h | t1 632 sw r9, r5, r4 /* *(d + n) = 633 bneid r4, d_wu1_loop /* while (n) l 634 bsrli r11, r12, 24 /* h = v >> 24 635 636 bri d_word_done 637 638 d_word_u2: 639 bsrli r11, r11, 16 /* h = h >> 16 640 d_wu2_loop: 641 addi r4, r4,-4 /* n = n - 4 * 642 lw r12, r8, r4 /* v = *(as + 643 bslli r9, r12, 16 /* t1 = v << 1 644 or r9, r11, r9 /* t1 = h | t1 645 sw r9, r5, r4 /* *(d + n) = 646 bneid r4, d_wu2_loop /* while (n) l 647 bsrli r11, r12, 16 /* h = v >> 16 648 649 d_word_done: 650 651 d_xfer_end: 652 d_xfer_end_loop: 653 beqi r7, a_done /* whi 654 addi r6, r6, -1 /* s-- 655 lbui r9, r6, 0 /* t1 656 addi r5, r5, -1 /* d-- 657 sbi r9, r5, 0 /* *d 658 brid d_xfer_end_loop /* loo 659 addi r7, r7, -1 /* c-- 660 661 d_done: 662 rtsd r15, 8 663 nop 664 665 .size memmove, . - memmove 666 .end memmove
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.