1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright 2023 Linus Torvalds <torvalds@linux-foundation.org> 4 */ 5 6 #include <linux/export.h> 7 #include <linux/linkage.h> 8 #include <asm/asm.h> 9 10 /* 11 * copy_user_nocache - Uncached memory copy with exception handling 12 * 13 * This copies from user space into kernel space, but the kernel 14 * space accesses can take a machine check exception, so they too 15 * need exception handling. 16 * 17 * Note: only 32-bit and 64-bit stores have non-temporal versions, 18 * and we only use aligned versions. Any unaligned parts at the 19 * start or end of the copy will be done using normal cached stores. 20 * 21 * Input: 22 * rdi destination 23 * rsi source 24 * edx count 25 * 26 * Output: 27 * rax uncopied bytes or 0 if successful. 28 */ 29 SYM_FUNC_START(__copy_user_nocache) 30 /* If destination is not 7-byte aligned, we'll have to align it */ 31 testb $7,%dil 32 jne .Lalign 33 34 .Lis_aligned: 35 cmp $64,%edx 36 jb .Lquadwords 37 38 .p2align 4,0x90 39 .Lunrolled: 40 10: movq (%rsi),%r8 41 11: movq 8(%rsi),%r9 42 12: movq 16(%rsi),%r10 43 13: movq 24(%rsi),%r11 44 20: movnti %r8,(%rdi) 45 21: movnti %r9,8(%rdi) 46 22: movnti %r10,16(%rdi) 47 23: movnti %r11,24(%rdi) 48 30: movq 32(%rsi),%r8 49 31: movq 40(%rsi),%r9 50 32: movq 48(%rsi),%r10 51 33: movq 56(%rsi),%r11 52 40: movnti %r8,32(%rdi) 53 41: movnti %r9,40(%rdi) 54 42: movnti %r10,48(%rdi) 55 43: movnti %r11,56(%rdi) 56 57 addq $64,%rsi 58 addq $64,%rdi 59 sub $64,%edx 60 cmp $64,%edx 61 jae .Lunrolled 62 63 /* 64 * First set of user mode loads have been done 65 * without any stores, so if they fail, we can 66 * just try the non-unrolled loop. 67 */ 68 _ASM_EXTABLE_UA(10b, .Lquadwords) 69 _ASM_EXTABLE_UA(11b, .Lquadwords) 70 _ASM_EXTABLE_UA(12b, .Lquadwords) 71 _ASM_EXTABLE_UA(13b, .Lquadwords) 72 73 /* 74 * The second set of user mode loads have been 75 * done with 32 bytes stored to the destination, 76 * so we need to take that into account before 77 * falling back to the unrolled loop. 78 */ 79 _ASM_EXTABLE_UA(30b, .Lfixup32) 80 _ASM_EXTABLE_UA(31b, .Lfixup32) 81 _ASM_EXTABLE_UA(32b, .Lfixup32) 82 _ASM_EXTABLE_UA(33b, .Lfixup32) 83 84 /* 85 * An exception on a write means that we're 86 * done, but we need to update the count 87 * depending on where in the unrolled loop 88 * we were. 89 */ 90 _ASM_EXTABLE_UA(20b, .Ldone0) 91 _ASM_EXTABLE_UA(21b, .Ldone8) 92 _ASM_EXTABLE_UA(22b, .Ldone16) 93 _ASM_EXTABLE_UA(23b, .Ldone24) 94 _ASM_EXTABLE_UA(40b, .Ldone32) 95 _ASM_EXTABLE_UA(41b, .Ldone40) 96 _ASM_EXTABLE_UA(42b, .Ldone48) 97 _ASM_EXTABLE_UA(43b, .Ldone56) 98 99 .Lquadwords: 100 cmp $8,%edx 101 jb .Llong 102 50: movq (%rsi),%rax 103 51: movnti %rax,(%rdi) 104 addq $8,%rsi 105 addq $8,%rdi 106 sub $8,%edx 107 jmp .Lquadwords 108 109 /* 110 * If we fail on the last full quadword, we will 111 * not try to do any byte-wise cached accesses. 112 * We will try to do one more 4-byte uncached 113 * one, though. 114 */ 115 _ASM_EXTABLE_UA(50b, .Llast4) 116 _ASM_EXTABLE_UA(51b, .Ldone0) 117 118 .Llong: 119 test $4,%dl 120 je .Lword 121 60: movl (%rsi),%eax 122 61: movnti %eax,(%rdi) 123 addq $4,%rsi 124 addq $4,%rdi 125 sub $4,%edx 126 .Lword: 127 sfence 128 test $2,%dl 129 je .Lbyte 130 70: movw (%rsi),%ax 131 71: movw %ax,(%rdi) 132 addq $2,%rsi 133 addq $2,%rdi 134 sub $2,%edx 135 .Lbyte: 136 test $1,%dl 137 je .Ldone 138 80: movb (%rsi),%al 139 81: movb %al,(%rdi) 140 dec %edx 141 .Ldone: 142 mov %edx,%eax 143 RET 144 145 /* 146 * If we fail on the last four bytes, we won't 147 * bother with any fixups. It's dead, Jim. Note 148 * that there's no need for 'sfence' for any 149 * of this, since the exception will have been 150 * serializing. 151 */ 152 _ASM_EXTABLE_UA(60b, .Ldone) 153 _ASM_EXTABLE_UA(61b, .Ldone) 154 _ASM_EXTABLE_UA(70b, .Ldone) 155 _ASM_EXTABLE_UA(71b, .Ldone) 156 _ASM_EXTABLE_UA(80b, .Ldone) 157 _ASM_EXTABLE_UA(81b, .Ldone) 158 159 /* 160 * This is the "head needs aliging" case when 161 * the destination isn't 8-byte aligned. The 162 * 4-byte case can be done uncached, but any 163 * smaller alignment is done with regular stores. 164 */ 165 .Lalign: 166 test $1,%dil 167 je .Lalign_word 168 test %edx,%edx 169 je .Ldone 170 90: movb (%rsi),%al 171 91: movb %al,(%rdi) 172 inc %rsi 173 inc %rdi 174 dec %edx 175 .Lalign_word: 176 test $2,%dil 177 je .Lalign_long 178 cmp $2,%edx 179 jb .Lbyte 180 92: movw (%rsi),%ax 181 93: movw %ax,(%rdi) 182 addq $2,%rsi 183 addq $2,%rdi 184 sub $2,%edx 185 .Lalign_long: 186 test $4,%dil 187 je .Lis_aligned 188 cmp $4,%edx 189 jb .Lword 190 94: movl (%rsi),%eax 191 95: movnti %eax,(%rdi) 192 addq $4,%rsi 193 addq $4,%rdi 194 sub $4,%edx 195 jmp .Lis_aligned 196 197 /* 198 * If we fail on the initial alignment accesses, 199 * we're all done. Again, no point in trying to 200 * do byte-by-byte probing if the 4-byte load 201 * fails - we're not doing any uncached accesses 202 * any more. 203 */ 204 _ASM_EXTABLE_UA(90b, .Ldone) 205 _ASM_EXTABLE_UA(91b, .Ldone) 206 _ASM_EXTABLE_UA(92b, .Ldone) 207 _ASM_EXTABLE_UA(93b, .Ldone) 208 _ASM_EXTABLE_UA(94b, .Ldone) 209 _ASM_EXTABLE_UA(95b, .Ldone) 210 211 /* 212 * Exception table fixups for faults in the middle 213 */ 214 .Ldone56: sub $8,%edx 215 .Ldone48: sub $8,%edx 216 .Ldone40: sub $8,%edx 217 .Ldone32: sub $8,%edx 218 .Ldone24: sub $8,%edx 219 .Ldone16: sub $8,%edx 220 .Ldone8: sub $8,%edx 221 .Ldone0: 222 mov %edx,%eax 223 RET 224 225 .Lfixup32: 226 addq $32,%rsi 227 addq $32,%rdi 228 sub $32,%edx 229 jmp .Lquadwords 230 231 .Llast4: 232 52: movl (%rsi),%eax 233 53: movnti %eax,(%rdi) 234 sfence 235 sub $4,%edx 236 mov %edx,%eax 237 RET 238 _ASM_EXTABLE_UA(52b, .Ldone0) 239 _ASM_EXTABLE_UA(53b, .Ldone0) 240 241 SYM_FUNC_END(__copy_user_nocache) 242 EXPORT_SYMBOL(__copy_user_nocache)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.