1 // SPDX-License-Identifier: GPL-2.0 1 // SPDX-License-Identifier: GPL-2.0 2 /* 2 /* 3 * linux/arch/alpha/lib/memcpy.c 3 * linux/arch/alpha/lib/memcpy.c 4 * 4 * 5 * Copyright (C) 1995 Linus Torvalds 5 * Copyright (C) 1995 Linus Torvalds 6 */ 6 */ 7 7 8 /* 8 /* 9 * This is a reasonably optimized memcpy() rou 9 * This is a reasonably optimized memcpy() routine. 10 */ 10 */ 11 11 12 /* 12 /* 13 * Note that the C code is written to be optim 13 * Note that the C code is written to be optimized into good assembly. However, 14 * at this point gcc is unable to sanely compi 14 * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a 15 * explicit compare against 0 (instead of just 15 * explicit compare against 0 (instead of just using the proper "blt reg, xx" or 16 * "bge reg, xx"). I hope alpha-gcc will be fi 16 * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually.. 17 */ 17 */ 18 18 19 #include <linux/types.h> 19 #include <linux/types.h> 20 #include <linux/export.h> 20 #include <linux/export.h> 21 #include <linux/string.h> 21 #include <linux/string.h> 22 22 23 /* 23 /* 24 * This should be done in one go with ldq_u*2/ 24 * This should be done in one go with ldq_u*2/mask/stq_u. Do it 25 * with a macro so that we can fix it up later 25 * with a macro so that we can fix it up later.. 26 */ 26 */ 27 #define ALIGN_DEST_TO8_UP(d,s,n) \ 27 #define ALIGN_DEST_TO8_UP(d,s,n) \ 28 while (d & 7) { \ 28 while (d & 7) { \ 29 if (n <= 0) return; \ 29 if (n <= 0) return; \ 30 n--; \ 30 n--; \ 31 *(char *) d = *(char *) s; \ 31 *(char *) d = *(char *) s; \ 32 d++; s++; \ 32 d++; s++; \ 33 } 33 } 34 #define ALIGN_DEST_TO8_DN(d,s,n) \ 34 #define ALIGN_DEST_TO8_DN(d,s,n) \ 35 while (d & 7) { \ 35 while (d & 7) { \ 36 if (n <= 0) return; \ 36 if (n <= 0) return; \ 37 n--; \ 37 n--; \ 38 d--; s--; \ 38 d--; s--; \ 39 *(char *) d = *(char *) s; \ 39 *(char *) d = *(char *) s; \ 40 } 40 } 41 41 42 /* 42 /* 43 * This should similarly be done with ldq_u*2/ 43 * This should similarly be done with ldq_u*2/mask/stq. The destination 44 * is aligned, but we don't fill in a full qua 44 * is aligned, but we don't fill in a full quad-word 45 */ 45 */ 46 #define DO_REST_UP(d,s,n) \ 46 #define DO_REST_UP(d,s,n) \ 47 while (n > 0) { \ 47 while (n > 0) { \ 48 n--; \ 48 n--; \ 49 *(char *) d = *(char *) s; \ 49 *(char *) d = *(char *) s; \ 50 d++; s++; \ 50 d++; s++; \ 51 } 51 } 52 #define DO_REST_DN(d,s,n) \ 52 #define DO_REST_DN(d,s,n) \ 53 while (n > 0) { \ 53 while (n > 0) { \ 54 n--; \ 54 n--; \ 55 d--; s--; \ 55 d--; s--; \ 56 *(char *) d = *(char *) s; \ 56 *(char *) d = *(char *) s; \ 57 } 57 } 58 58 59 /* 59 /* 60 * This should be done with ldq/mask/stq. The 60 * This should be done with ldq/mask/stq. The source and destination are 61 * aligned, but we don't fill in a full quad-w 61 * aligned, but we don't fill in a full quad-word 62 */ 62 */ 63 #define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d 63 #define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n) 64 #define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d 64 #define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n) 65 65 66 /* 66 /* 67 * This does unaligned memory copies. We want 67 * This does unaligned memory copies. We want to avoid storing to 68 * an unaligned address, as that would do a re 68 * an unaligned address, as that would do a read-modify-write cycle. 69 * We also want to avoid double-reading the un 69 * We also want to avoid double-reading the unaligned reads. 70 * 70 * 71 * Note the ordering to try to avoid load (and 71 * Note the ordering to try to avoid load (and address generation) latencies. 72 */ 72 */ 73 static inline void __memcpy_unaligned_up (unsi 73 static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s, 74 long 74 long n) 75 { 75 { 76 ALIGN_DEST_TO8_UP(d,s,n); 76 ALIGN_DEST_TO8_UP(d,s,n); 77 n -= 8; /* to avoid co 77 n -= 8; /* to avoid compare against 8 in the loop */ 78 if (n >= 0) { 78 if (n >= 0) { 79 unsigned long low_word, high_w 79 unsigned long low_word, high_word; 80 __asm__("ldq_u %0,%1":"=r" (lo 80 __asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s)); 81 do { 81 do { 82 unsigned long tmp; 82 unsigned long tmp; 83 __asm__("ldq_u %0,%1": 83 __asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8))); 84 n -= 8; 84 n -= 8; 85 __asm__("extql %1,%2,% 85 __asm__("extql %1,%2,%0" 86 :"=r" (low_wor 86 :"=r" (low_word) 87 :"r" (low_word 87 :"r" (low_word), "r" (s)); 88 __asm__("extqh %1,%2,% 88 __asm__("extqh %1,%2,%0" 89 :"=r" (tmp) 89 :"=r" (tmp) 90 :"r" (high_wor 90 :"r" (high_word), "r" (s)); 91 s += 8; 91 s += 8; 92 *(unsigned long *) d = 92 *(unsigned long *) d = low_word | tmp; 93 d += 8; 93 d += 8; 94 low_word = high_word; 94 low_word = high_word; 95 } while (n >= 0); 95 } while (n >= 0); 96 } 96 } 97 n += 8; 97 n += 8; 98 DO_REST_UP(d,s,n); 98 DO_REST_UP(d,s,n); 99 } 99 } 100 100 101 static inline void __memcpy_unaligned_dn (unsi 101 static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s, 102 long 102 long n) 103 { 103 { 104 /* I don't understand AXP assembler we 104 /* I don't understand AXP assembler well enough for this. -Tim */ 105 s += n; 105 s += n; 106 d += n; 106 d += n; 107 while (n--) 107 while (n--) 108 * (char *) --d = * (char *) -- 108 * (char *) --d = * (char *) --s; 109 } 109 } 110 110 111 /* 111 /* 112 * Hmm.. Strange. The __asm__ here is there to 112 * Hmm.. Strange. The __asm__ here is there to make gcc use an integer register 113 * for the load-store. I don't know why, but i 113 * for the load-store. I don't know why, but it would seem that using a floating 114 * point register for the move seems to slow t 114 * point register for the move seems to slow things down (very small difference, 115 * though). 115 * though). 116 * 116 * 117 * Note the ordering to try to avoid load (and 117 * Note the ordering to try to avoid load (and address generation) latencies. 118 */ 118 */ 119 static inline void __memcpy_aligned_up (unsign 119 static inline void __memcpy_aligned_up (unsigned long d, unsigned long s, 120 long n 120 long n) 121 { 121 { 122 ALIGN_DEST_TO8_UP(d,s,n); 122 ALIGN_DEST_TO8_UP(d,s,n); 123 n -= 8; 123 n -= 8; 124 while (n >= 0) { 124 while (n >= 0) { 125 unsigned long tmp; 125 unsigned long tmp; 126 __asm__("ldq %0,%1":"=r" (tmp) 126 __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s)); 127 n -= 8; 127 n -= 8; 128 s += 8; 128 s += 8; 129 *(unsigned long *) d = tmp; 129 *(unsigned long *) d = tmp; 130 d += 8; 130 d += 8; 131 } 131 } 132 n += 8; 132 n += 8; 133 DO_REST_ALIGNED_UP(d,s,n); 133 DO_REST_ALIGNED_UP(d,s,n); 134 } 134 } 135 static inline void __memcpy_aligned_dn (unsign 135 static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s, 136 long n 136 long n) 137 { 137 { 138 s += n; 138 s += n; 139 d += n; 139 d += n; 140 ALIGN_DEST_TO8_DN(d,s,n); 140 ALIGN_DEST_TO8_DN(d,s,n); 141 n -= 8; 141 n -= 8; 142 while (n >= 0) { 142 while (n >= 0) { 143 unsigned long tmp; 143 unsigned long tmp; 144 s -= 8; 144 s -= 8; 145 __asm__("ldq %0,%1":"=r" (tmp) 145 __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s)); 146 n -= 8; 146 n -= 8; 147 d -= 8; 147 d -= 8; 148 *(unsigned long *) d = tmp; 148 *(unsigned long *) d = tmp; 149 } 149 } 150 n += 8; 150 n += 8; 151 DO_REST_ALIGNED_DN(d,s,n); 151 DO_REST_ALIGNED_DN(d,s,n); 152 } 152 } 153 153 154 #undef memcpy 154 #undef memcpy 155 155 156 void * memcpy(void * dest, const void *src, si 156 void * memcpy(void * dest, const void *src, size_t n) 157 { 157 { 158 if (!(((unsigned long) dest ^ (unsigne 158 if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) { 159 __memcpy_aligned_up ((unsigned 159 __memcpy_aligned_up ((unsigned long) dest, (unsigned long) src, 160 n); 160 n); 161 return dest; 161 return dest; 162 } 162 } 163 __memcpy_unaligned_up ((unsigned long) 163 __memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n); 164 return dest; 164 return dest; 165 } 165 } 166 EXPORT_SYMBOL(memcpy); 166 EXPORT_SYMBOL(memcpy); 167 167
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.