1 // SPDX-License-Identifier: GPL-2.0 << 2 /* 1 /* 3 * S390 version !! 2 * This file is subject to the terms and conditions of the GNU General Public 4 * Copyright IBM Corp. 1999 !! 3 * License. See the file "COPYING" in the main directory of this archive 5 * Author(s): Hartmut Penner (hp@de.ibm.com !! 4 * for more details. 6 * Ulrich Weigand (uweigand@de.i << 7 * 5 * 8 * Derived from "arch/i386/mm/fault.c" !! 6 * Copyright (C) 1995 - 2000 by Ralf Baechle 9 * Copyright (C) 1995 Linus Torvalds << 10 */ 7 */ 11 !! 8 #include <linux/context_tracking.h> 12 #include <linux/kernel_stat.h> << 13 #include <linux/mmu_context.h> << 14 #include <linux/perf_event.h> << 15 #include <linux/signal.h> 9 #include <linux/signal.h> 16 #include <linux/sched.h> 10 #include <linux/sched.h> 17 #include <linux/sched/debug.h> !! 11 #include <linux/interrupt.h> 18 #include <linux/jump_label.h> << 19 #include <linux/kernel.h> 12 #include <linux/kernel.h> 20 #include <linux/errno.h> 13 #include <linux/errno.h> 21 #include <linux/string.h> 14 #include <linux/string.h> 22 #include <linux/types.h> 15 #include <linux/types.h> 23 #include <linux/ptrace.h> 16 #include <linux/ptrace.h> >> 17 #include <linux/ratelimit.h> 24 #include <linux/mman.h> 18 #include <linux/mman.h> 25 #include <linux/mm.h> 19 #include <linux/mm.h> 26 #include <linux/compat.h> << 27 #include <linux/smp.h> 20 #include <linux/smp.h> 28 #include <linux/kdebug.h> << 29 #include <linux/init.h> << 30 #include <linux/console.h> << 31 #include <linux/extable.h> << 32 #include <linux/hardirq.h> << 33 #include <linux/kprobes.h> 21 #include <linux/kprobes.h> >> 22 #include <linux/perf_event.h> 34 #include <linux/uaccess.h> 23 #include <linux/uaccess.h> 35 #include <linux/hugetlb.h> << 36 #include <linux/kfence.h> << 37 #include <linux/pagewalk.h> << 38 #include <asm/asm-extable.h> << 39 #include <asm/asm-offsets.h> << 40 #include <asm/ptrace.h> << 41 #include <asm/fault.h> << 42 #include <asm/diag.h> << 43 #include <asm/gmap.h> << 44 #include <asm/irq.h> << 45 #include <asm/facility.h> << 46 #include <asm/uv.h> << 47 #include "../kernel/entry.h" << 48 << 49 enum fault_type { << 50 KERNEL_FAULT, << 51 USER_FAULT, << 52 GMAP_FAULT, << 53 }; << 54 << 55 static DEFINE_STATIC_KEY_FALSE(have_store_indi << 56 24 57 static int __init fault_init(void) !! 25 #include <asm/branch.h> 58 { !! 26 #include <asm/mmu_context.h> 59 if (test_facility(75)) !! 27 #include <asm/ptrace.h> 60 static_branch_enable(&have_sto !! 28 #include <asm/highmem.h> /* For VMALLOC_END */ 61 return 0; !! 29 #include <asm/traps.h> 62 } !! 30 #include <linux/kdebug.h> 63 early_initcall(fault_init); << 64 << 65 /* << 66 * Find out which address space caused the exc << 67 */ << 68 static enum fault_type get_fault_type(struct p << 69 { << 70 union teid teid = { .val = regs->int_p << 71 struct gmap *gmap; << 72 << 73 if (likely(teid.as == PSW_BITS_AS_PRIM << 74 if (user_mode(regs)) << 75 return USER_FAULT; << 76 if (!IS_ENABLED(CONFIG_PGSTE)) << 77 return KERNEL_FAULT; << 78 gmap = (struct gmap *)get_lowc << 79 if (gmap && gmap->asce == regs << 80 return GMAP_FAULT; << 81 return KERNEL_FAULT; << 82 } << 83 if (teid.as == PSW_BITS_AS_SECONDARY) << 84 return USER_FAULT; << 85 /* Access register mode, not used in t << 86 if (teid.as == PSW_BITS_AS_ACCREG) << 87 return USER_FAULT; << 88 /* Home space -> access via kernel ASC << 89 return KERNEL_FAULT; << 90 } << 91 << 92 static unsigned long get_fault_address(struct << 93 { << 94 union teid teid = { .val = regs->int_p << 95 << 96 return teid.addr * PAGE_SIZE; << 97 } << 98 << 99 static __always_inline bool fault_is_write(str << 100 { << 101 union teid teid = { .val = regs->int_p << 102 << 103 if (static_branch_likely(&have_store_i << 104 return teid.fsi == TEID_FSI_ST << 105 return false; << 106 } << 107 << 108 static void dump_pagetable(unsigned long asce, << 109 { << 110 unsigned long entry, *table = __va(asc << 111 << 112 pr_alert("AS:%016lx ", asce); << 113 switch (asce & _ASCE_TYPE_MASK) { << 114 case _ASCE_TYPE_REGION1: << 115 table += (address & _REGION1_I << 116 if (get_kernel_nofault(entry, << 117 goto bad; << 118 pr_cont("R1:%016lx ", entry); << 119 if (entry & _REGION_ENTRY_INVA << 120 goto out; << 121 table = __va(entry & _REGION_E << 122 fallthrough; << 123 case _ASCE_TYPE_REGION2: << 124 table += (address & _REGION2_I << 125 if (get_kernel_nofault(entry, << 126 goto bad; << 127 pr_cont("R2:%016lx ", entry); << 128 if (entry & _REGION_ENTRY_INVA << 129 goto out; << 130 table = __va(entry & _REGION_E << 131 fallthrough; << 132 case _ASCE_TYPE_REGION3: << 133 table += (address & _REGION3_I << 134 if (get_kernel_nofault(entry, << 135 goto bad; << 136 pr_cont("R3:%016lx ", entry); << 137 if (entry & (_REGION_ENTRY_INV << 138 goto out; << 139 table = __va(entry & _REGION_E << 140 fallthrough; << 141 case _ASCE_TYPE_SEGMENT: << 142 table += (address & _SEGMENT_I << 143 if (get_kernel_nofault(entry, << 144 goto bad; << 145 pr_cont("S:%016lx ", entry); << 146 if (entry & (_SEGMENT_ENTRY_IN << 147 goto out; << 148 table = __va(entry & _SEGMENT_ << 149 } << 150 table += (address & _PAGE_INDEX) >> _P << 151 if (get_kernel_nofault(entry, table)) << 152 goto bad; << 153 pr_cont("P:%016lx ", entry); << 154 out: << 155 pr_cont("\n"); << 156 return; << 157 bad: << 158 pr_cont("BAD\n"); << 159 } << 160 << 161 static void dump_fault_info(struct pt_regs *re << 162 { << 163 union teid teid = { .val = regs->int_p << 164 unsigned long asce; << 165 << 166 pr_alert("Failing address: %016lx TEID << 167 get_fault_address(regs), teid << 168 pr_alert("Fault in "); << 169 switch (teid.as) { << 170 case PSW_BITS_AS_HOME: << 171 pr_cont("home space "); << 172 break; << 173 case PSW_BITS_AS_SECONDARY: << 174 pr_cont("secondary space "); << 175 break; << 176 case PSW_BITS_AS_ACCREG: << 177 pr_cont("access register "); << 178 break; << 179 case PSW_BITS_AS_PRIMARY: << 180 pr_cont("primary space "); << 181 break; << 182 } << 183 pr_cont("mode while using "); << 184 switch (get_fault_type(regs)) { << 185 case USER_FAULT: << 186 asce = get_lowcore()->user_asc << 187 pr_cont("user "); << 188 break; << 189 case GMAP_FAULT: << 190 asce = ((struct gmap *)get_low << 191 pr_cont("gmap "); << 192 break; << 193 case KERNEL_FAULT: << 194 asce = get_lowcore()->kernel_a << 195 pr_cont("kernel "); << 196 break; << 197 default: << 198 unreachable(); << 199 } << 200 pr_cont("ASCE.\n"); << 201 dump_pagetable(asce, get_fault_address << 202 } << 203 31 204 int show_unhandled_signals = 1; 32 int show_unhandled_signals = 1; 205 33 206 void report_user_fault(struct pt_regs *regs, l << 207 { << 208 static DEFINE_RATELIMIT_STATE(rs, DEFA << 209 << 210 if ((task_pid_nr(current) > 1) && !sho << 211 return; << 212 if (!unhandled_signal(current, signr)) << 213 return; << 214 if (!__ratelimit(&rs)) << 215 return; << 216 pr_alert("User process fault: interrup << 217 regs->int_code & 0xffff, regs << 218 print_vma_addr(KERN_CONT "in ", regs-> << 219 pr_cont("\n"); << 220 if (is_mm_fault) << 221 dump_fault_info(regs); << 222 show_regs(regs); << 223 } << 224 << 225 static void do_sigsegv(struct pt_regs *regs, i << 226 { << 227 report_user_fault(regs, SIGSEGV, 1); << 228 force_sig_fault(SIGSEGV, si_code, (voi << 229 } << 230 << 231 static void handle_fault_error_nolock(struct p << 232 { << 233 enum fault_type fault_type; << 234 unsigned long address; << 235 bool is_write; << 236 << 237 if (user_mode(regs)) { << 238 if (WARN_ON_ONCE(!si_code)) << 239 si_code = SEGV_MAPERR; << 240 return do_sigsegv(regs, si_cod << 241 } << 242 if (fixup_exception(regs)) << 243 return; << 244 fault_type = get_fault_type(regs); << 245 if (fault_type == KERNEL_FAULT) { << 246 address = get_fault_address(re << 247 is_write = fault_is_write(regs << 248 if (kfence_handle_page_fault(a << 249 return; << 250 } << 251 if (fault_type == KERNEL_FAULT) << 252 pr_alert("Unable to handle ker << 253 else << 254 pr_alert("Unable to handle ker << 255 dump_fault_info(regs); << 256 die(regs, "Oops"); << 257 } << 258 << 259 static void handle_fault_error(struct pt_regs << 260 { << 261 struct mm_struct *mm = current->mm; << 262 << 263 mmap_read_unlock(mm); << 264 handle_fault_error_nolock(regs, si_cod << 265 } << 266 << 267 static void do_sigbus(struct pt_regs *regs) << 268 { << 269 force_sig_fault(SIGBUS, BUS_ADRERR, (v << 270 } << 271 << 272 /* 34 /* 273 * This routine handles page faults. It deter 35 * This routine handles page faults. It determines the address, 274 * and the problem, and then passes it off to 36 * and the problem, and then passes it off to one of the appropriate 275 * routines. 37 * routines. 276 * << 277 * interruption code (int_code): << 278 * 04 Protection -> Write-P << 279 * 10 Segment translation -> Not pre << 280 * 11 Page translation -> Not pre << 281 * 3b Region third trans. -> Not pre << 282 */ 38 */ 283 static void do_exception(struct pt_regs *regs, !! 39 static void __do_page_fault(struct pt_regs *regs, unsigned long write, >> 40 unsigned long address) 284 { 41 { 285 struct vm_area_struct *vma; !! 42 struct vm_area_struct * vma = NULL; 286 unsigned long address; !! 43 struct task_struct *tsk = current; 287 struct mm_struct *mm; !! 44 struct mm_struct *mm = tsk->mm; 288 enum fault_type type; !! 45 const int field = sizeof(unsigned long) * 2; 289 unsigned int flags; !! 46 int si_code; 290 struct gmap *gmap; << 291 vm_fault_t fault; 47 vm_fault_t fault; 292 bool is_write; !! 48 unsigned int flags = FAULT_FLAG_DEFAULT; >> 49 >> 50 static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); 293 51 >> 52 #if 0 >> 53 printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(), >> 54 current->comm, current->pid, field, address, write, >> 55 field, regs->cp0_epc); >> 56 #endif >> 57 >> 58 #ifdef CONFIG_KPROBES 294 /* 59 /* 295 * The instruction that caused the pro !! 60 * This is to notify the fault handler of the kprobes. 296 * been nullified. Don't signal single << 297 */ 61 */ 298 clear_thread_flag(TIF_PER_TRAP); !! 62 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1, 299 if (kprobe_page_fault(regs, 14)) !! 63 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) 300 return; 64 return; 301 mm = current->mm; !! 65 #endif 302 address = get_fault_address(regs); !! 66 303 is_write = fault_is_write(regs); !! 67 si_code = SEGV_MAPERR; 304 type = get_fault_type(regs); !! 68 305 switch (type) { !! 69 /* 306 case KERNEL_FAULT: !! 70 * We fault-in kernel-space virtual memory on-demand. The 307 return handle_fault_error_nolo !! 71 * 'reference' page table is init_mm.pgd. 308 case USER_FAULT: !! 72 * 309 case GMAP_FAULT: !! 73 * NOTE! We MUST NOT take any locks for this case. We may 310 if (faulthandler_disabled() || !! 74 * be in an interrupt or a critical region, and should 311 return handle_fault_er !! 75 * only copy the information from the master page table, 312 break; !! 76 * nothing more. 313 } !! 77 */ 314 perf_sw_event(PERF_COUNT_SW_PAGE_FAULT !! 78 #ifdef CONFIG_64BIT 315 flags = FAULT_FLAG_DEFAULT; !! 79 # define VMALLOC_FAULT_TARGET no_context >> 80 #else >> 81 # define VMALLOC_FAULT_TARGET vmalloc_fault >> 82 #endif >> 83 >> 84 if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END)) >> 85 goto VMALLOC_FAULT_TARGET; >> 86 #ifdef MODULES_VADDR >> 87 if (unlikely(address >= MODULES_VADDR && address < MODULES_END)) >> 88 goto VMALLOC_FAULT_TARGET; >> 89 #endif >> 90 >> 91 /* >> 92 * If we're in an interrupt or have no user >> 93 * context, we must not take the fault.. >> 94 */ >> 95 if (faulthandler_disabled() || !mm) >> 96 goto bad_area_nosemaphore; >> 97 316 if (user_mode(regs)) 98 if (user_mode(regs)) 317 flags |= FAULT_FLAG_USER; 99 flags |= FAULT_FLAG_USER; 318 if (is_write) << 319 access = VM_WRITE; << 320 if (access == VM_WRITE) << 321 flags |= FAULT_FLAG_WRITE; << 322 if (!(flags & FAULT_FLAG_USER)) << 323 goto lock_mmap; << 324 vma = lock_vma_under_rcu(mm, address); << 325 if (!vma) << 326 goto lock_mmap; << 327 if (!(vma->vm_flags & access)) { << 328 vma_end_read(vma); << 329 count_vm_vma_lock_event(VMA_LO << 330 return handle_fault_error_nolo << 331 } << 332 fault = handle_mm_fault(vma, address, << 333 if (!(fault & (VM_FAULT_RETRY | VM_FAU << 334 vma_end_read(vma); << 335 if (!(fault & VM_FAULT_RETRY)) { << 336 count_vm_vma_lock_event(VMA_LO << 337 if (unlikely(fault & VM_FAULT_ << 338 goto error; << 339 return; << 340 } << 341 count_vm_vma_lock_event(VMA_LOCK_RETRY << 342 if (fault & VM_FAULT_MAJOR) << 343 flags |= FAULT_FLAG_TRIED; << 344 100 345 /* Quick path to respond to signals */ !! 101 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); 346 if (fault_signal_pending(fault, regs)) << 347 if (!user_mode(regs)) << 348 handle_fault_error_nol << 349 return; << 350 } << 351 lock_mmap: << 352 mmap_read_lock(mm); << 353 gmap = NULL; << 354 if (IS_ENABLED(CONFIG_PGSTE) && type = << 355 gmap = (struct gmap *)get_lowc << 356 current->thread.gmap_addr = ad << 357 current->thread.gmap_write_fla << 358 current->thread.gmap_int_code << 359 address = __gmap_translate(gma << 360 if (address == -EFAULT) << 361 return handle_fault_er << 362 if (gmap->pfault_enabled) << 363 flags |= FAULT_FLAG_RE << 364 } << 365 retry: 102 retry: 366 vma = find_vma(mm, address); !! 103 vma = lock_mm_and_find_vma(mm, address, regs); 367 if (!vma) 104 if (!vma) 368 return handle_fault_error(regs !! 105 goto bad_area_nosemaphore; 369 if (unlikely(vma->vm_start > address)) !! 106 /* 370 if (!(vma->vm_flags & VM_GROWS !! 107 * Ok, we have a good vm_area for this memory access, so 371 return handle_fault_er !! 108 * we can handle it.. 372 vma = expand_stack(mm, address !! 109 */ 373 if (!vma) !! 110 si_code = SEGV_ACCERR; 374 return handle_fault_er !! 111 >> 112 if (write) { >> 113 if (!(vma->vm_flags & VM_WRITE)) >> 114 goto bad_area; >> 115 flags |= FAULT_FLAG_WRITE; >> 116 } else { >> 117 if (cpu_has_rixi) { >> 118 if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) { >> 119 #if 0 >> 120 pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] XI violation\n", >> 121 raw_smp_processor_id(), >> 122 current->comm, current->pid, >> 123 field, address, write, >> 124 field, regs->cp0_epc); >> 125 #endif >> 126 goto bad_area; >> 127 } >> 128 if (!(vma->vm_flags & VM_READ) && >> 129 exception_epc(regs) != address) { >> 130 #if 0 >> 131 pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] RI violation\n", >> 132 raw_smp_processor_id(), >> 133 current->comm, current->pid, >> 134 field, address, write, >> 135 field, regs->cp0_epc); >> 136 #endif >> 137 goto bad_area; >> 138 } >> 139 } else { >> 140 if (unlikely(!vma_is_accessible(vma))) >> 141 goto bad_area; >> 142 } 375 } 143 } 376 if (unlikely(!(vma->vm_flags & access) !! 144 377 return handle_fault_error(regs !! 145 /* >> 146 * If for any reason at all we couldn't handle the fault, >> 147 * make sure we exit gracefully rather than endlessly redo >> 148 * the fault. >> 149 */ 378 fault = handle_mm_fault(vma, address, 150 fault = handle_mm_fault(vma, address, flags, regs); >> 151 379 if (fault_signal_pending(fault, regs)) 152 if (fault_signal_pending(fault, regs)) { 380 if (flags & FAULT_FLAG_RETRY_N << 381 mmap_read_unlock(mm); << 382 if (!user_mode(regs)) 153 if (!user_mode(regs)) 383 handle_fault_error_nol !! 154 goto no_context; 384 return; 155 return; 385 } 156 } >> 157 386 /* The fault is fully completed (inclu 158 /* The fault is fully completed (including releasing mmap lock) */ 387 if (fault & VM_FAULT_COMPLETED) { !! 159 if (fault & VM_FAULT_COMPLETED) 388 if (gmap) { << 389 mmap_read_lock(mm); << 390 goto gmap; << 391 } << 392 return; 160 return; 393 } !! 161 394 if (unlikely(fault & VM_FAULT_ERROR)) 162 if (unlikely(fault & VM_FAULT_ERROR)) { 395 mmap_read_unlock(mm); !! 163 if (fault & VM_FAULT_OOM) 396 goto error; !! 164 goto out_of_memory; >> 165 else if (fault & VM_FAULT_SIGSEGV) >> 166 goto bad_area; >> 167 else if (fault & VM_FAULT_SIGBUS) >> 168 goto do_sigbus; >> 169 BUG(); 397 } 170 } >> 171 398 if (fault & VM_FAULT_RETRY) { 172 if (fault & VM_FAULT_RETRY) { 399 if (IS_ENABLED(CONFIG_PGSTE) & << 400 /* << 401 * FAULT_FLAG_RETRY_NO << 402 * mmap_lock has not b << 403 */ << 404 current->thread.gmap_p << 405 return handle_fault_er << 406 } << 407 flags &= ~FAULT_FLAG_RETRY_NOW << 408 flags |= FAULT_FLAG_TRIED; 173 flags |= FAULT_FLAG_TRIED; 409 mmap_read_lock(mm); !! 174 >> 175 /* >> 176 * No need to mmap_read_unlock(mm) as we would >> 177 * have already released it in __lock_page_or_retry >> 178 * in mm/filemap.c. >> 179 */ >> 180 410 goto retry; 181 goto retry; 411 } 182 } 412 gmap: !! 183 413 if (IS_ENABLED(CONFIG_PGSTE) && gmap) << 414 address = __gmap_link(gmap, c << 415 address << 416 if (address == -EFAULT) << 417 return handle_fault_er << 418 if (address == -ENOMEM) { << 419 fault = VM_FAULT_OOM; << 420 mmap_read_unlock(mm); << 421 goto error; << 422 } << 423 } << 424 mmap_read_unlock(mm); 184 mmap_read_unlock(mm); 425 return; 185 return; 426 error: !! 186 427 if (fault & VM_FAULT_OOM) { !! 187 /* 428 if (!user_mode(regs)) !! 188 * Something tried to access memory that isn't in our memory map.. 429 handle_fault_error_nol !! 189 * Fix it, but check if it's kernel or user first.. 430 else !! 190 */ 431 pagefault_out_of_memor !! 191 bad_area: 432 } else if (fault & VM_FAULT_SIGSEGV) { !! 192 mmap_read_unlock(mm); 433 if (!user_mode(regs)) !! 193 434 handle_fault_error_nol !! 194 bad_area_nosemaphore: 435 else !! 195 /* User mode accesses just cause a SIGSEGV */ 436 do_sigsegv(regs, SEGV_ !! 196 if (user_mode(regs)) { 437 } else if (fault & (VM_FAULT_SIGBUS | !! 197 tsk->thread.cp0_badvaddr = address; 438 if (!user_mode(regs)) !! 198 tsk->thread.error_code = write; 439 handle_fault_error_nol !! 199 if (show_unhandled_signals && 440 else !! 200 unhandled_signal(tsk, SIGSEGV) && 441 do_sigbus(regs); !! 201 __ratelimit(&ratelimit_state)) { 442 } else { !! 202 pr_info("do_page_fault(): sending SIGSEGV to %s for invalid %s %0*lx\n", 443 pr_emerg("Unexpected fault fla !! 203 tsk->comm, 444 BUG(); !! 204 write ? "write access to" : "read access from", >> 205 field, address); >> 206 pr_info("epc = %0*lx in", field, >> 207 (unsigned long) regs->cp0_epc); >> 208 print_vma_addr(KERN_CONT " ", regs->cp0_epc); >> 209 pr_cont("\n"); >> 210 pr_info("ra = %0*lx in", field, >> 211 (unsigned long) regs->regs[31]); >> 212 print_vma_addr(KERN_CONT " ", regs->regs[31]); >> 213 pr_cont("\n"); >> 214 } >> 215 current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f; >> 216 force_sig_fault(SIGSEGV, si_code, (void __user *)address); >> 217 return; 445 } 218 } 446 } << 447 219 448 void do_protection_exception(struct pt_regs *r !! 220 no_context: 449 { !! 221 /* Are we prepared to handle this kernel fault? */ 450 union teid teid = { .val = regs->int_p !! 222 if (fixup_exception(regs)) { >> 223 current->thread.cp0_baduaddr = address; >> 224 return; >> 225 } 451 226 452 /* 227 /* 453 * Protection exceptions are suppressi !! 228 * Oops. The kernel tried to access some bad page. We'll have to 454 * The exception to this rule are abor !! 229 * terminate things with extreme prejudice. 455 * the PSW already points to the corre << 456 */ 230 */ 457 if (!(regs->int_code & 0x200)) !! 231 bust_spinlocks(1); 458 regs->psw.addr = __rewind_psw( !! 232 >> 233 printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at " >> 234 "virtual address %0*lx, epc == %0*lx, ra == %0*lx\n", >> 235 raw_smp_processor_id(), field, address, field, regs->cp0_epc, >> 236 field, regs->regs[31]); >> 237 die("Oops", regs); >> 238 >> 239 out_of_memory: 459 /* 240 /* 460 * Check for low-address protection. !! 241 * We ran out of memory, call the OOM killer, and return the userspace 461 * as a special case because the trans !! 242 * (which will retry the fault, or kill us if we got oom-killed). 462 * field is not guaranteed to contain << 463 */ 243 */ 464 if (unlikely(!teid.b61)) { !! 244 mmap_read_unlock(mm); 465 if (user_mode(regs)) { !! 245 if (!user_mode(regs)) 466 /* Low-address protect !! 246 goto no_context; 467 die(regs, "Low-address !! 247 pagefault_out_of_memory(); 468 } !! 248 return; 469 /* << 470 * Low-address protection in k << 471 * NULL pointer write access i << 472 */ << 473 return handle_fault_error_nolo << 474 } << 475 if (unlikely(MACHINE_HAS_NX && teid.b5 << 476 regs->int_parm_long = (teid.ad << 477 return handle_fault_error_nolo << 478 } << 479 do_exception(regs, VM_WRITE); << 480 } << 481 NOKPROBE_SYMBOL(do_protection_exception); << 482 249 483 void do_dat_exception(struct pt_regs *regs) !! 250 do_sigbus: 484 { !! 251 mmap_read_unlock(mm); 485 do_exception(regs, VM_ACCESS_FLAGS); << 486 } << 487 NOKPROBE_SYMBOL(do_dat_exception); << 488 252 489 #if IS_ENABLED(CONFIG_PGSTE) !! 253 /* Kernel mode? Handle exceptions or die */ 490 !! 254 if (!user_mode(regs)) 491 void do_secure_storage_access(struct pt_regs * !! 255 goto no_context; 492 { << 493 union teid teid = { .val = regs->int_p << 494 unsigned long addr = get_fault_address << 495 struct vm_area_struct *vma; << 496 struct folio_walk fw; << 497 struct mm_struct *mm; << 498 struct folio *folio; << 499 struct gmap *gmap; << 500 int rc; << 501 256 502 /* 257 /* 503 * Bit 61 indicates if the address is !! 258 * Send a sigbus, regardless of whether we were in kernel 504 * kernel should be stopped or SIGSEGV !! 259 * or user mode. 505 * process. Bit 61 is not reliable wit << 506 * therefore this needs to be checked << 507 */ 260 */ 508 if (uv_has_feature(BIT_UV_FEAT_MISC) & !! 261 #if 0 509 /* !! 262 printk("do_page_fault() #3: sending SIGBUS to %s for " 510 * When this happens, userspac !! 263 "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n", 511 * was not supposed to do, e.g !! 264 tsk->comm, 512 * memory. Trigger a segmentat !! 265 write ? "write access to" : "read access from", 513 */ !! 266 field, address, 514 if (user_mode(regs)) { !! 267 field, (unsigned long) regs->cp0_epc, 515 send_sig(SIGSEGV, curr !! 268 field, (unsigned long) regs->regs[31]); 516 return; !! 269 #endif 517 } !! 270 current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f; >> 271 tsk->thread.cp0_badvaddr = address; >> 272 force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); >> 273 >> 274 return; >> 275 #ifndef CONFIG_64BIT >> 276 vmalloc_fault: >> 277 { 518 /* 278 /* 519 * The kernel should never run !! 279 * Synchronize this task's top level page-table 520 * there is no way out of this !! 280 * with the 'reference' page table. >> 281 * >> 282 * Do _not_ use "tsk" here. We might be inside >> 283 * an interrupt in the middle of a task switch.. 521 */ 284 */ 522 panic("Unexpected PGM 0x3d wit !! 285 int offset = pgd_index(address); 523 } !! 286 pgd_t *pgd, *pgd_k; 524 switch (get_fault_type(regs)) { !! 287 p4d_t *p4d, *p4d_k; 525 case GMAP_FAULT: !! 288 pud_t *pud, *pud_k; 526 mm = current->mm; !! 289 pmd_t *pmd, *pmd_k; 527 gmap = (struct gmap *)get_lowc !! 290 pte_t *pte_k; 528 mmap_read_lock(mm); !! 291 529 addr = __gmap_translate(gmap, !! 292 pgd = (pgd_t *) pgd_current[raw_smp_processor_id()] + offset; 530 mmap_read_unlock(mm); !! 293 pgd_k = init_mm.pgd + offset; 531 if (IS_ERR_VALUE(addr)) !! 294 532 return handle_fault_er !! 295 if (!pgd_present(*pgd_k)) 533 fallthrough; !! 296 goto no_context; 534 case USER_FAULT: !! 297 set_pgd(pgd, *pgd_k); 535 mm = current->mm; !! 298 536 mmap_read_lock(mm); !! 299 p4d = p4d_offset(pgd, address); 537 vma = find_vma(mm, addr); !! 300 p4d_k = p4d_offset(pgd_k, address); 538 if (!vma) !! 301 if (!p4d_present(*p4d_k)) 539 return handle_fault_er !! 302 goto no_context; 540 folio = folio_walk_start(&fw, !! 303 541 if (!folio) { !! 304 pud = pud_offset(p4d, address); 542 mmap_read_unlock(mm); !! 305 pud_k = pud_offset(p4d_k, address); 543 break; !! 306 if (!pud_present(*pud_k)) 544 } !! 307 goto no_context; 545 /* arch_make_folio_accessible( !! 308 546 folio_get(folio); !! 309 pmd = pmd_offset(pud, address); 547 rc = arch_make_folio_accessibl !! 310 pmd_k = pmd_offset(pud_k, address); 548 folio_put(folio); !! 311 if (!pmd_present(*pmd_k)) 549 folio_walk_end(&fw, vma); !! 312 goto no_context; 550 if (rc) !! 313 set_pmd(pmd, *pmd_k); 551 send_sig(SIGSEGV, curr !! 314 552 mmap_read_unlock(mm); !! 315 pte_k = pte_offset_kernel(pmd_k, address); 553 break; !! 316 if (!pte_present(*pte_k)) 554 case KERNEL_FAULT: !! 317 goto no_context; 555 folio = phys_to_folio(addr); !! 318 return; 556 if (unlikely(!folio_try_get(fo << 557 break; << 558 rc = arch_make_folio_accessibl << 559 folio_put(folio); << 560 if (rc) << 561 BUG(); << 562 break; << 563 default: << 564 unreachable(); << 565 } 319 } >> 320 #endif 566 } 321 } 567 NOKPROBE_SYMBOL(do_secure_storage_access); !! 322 NOKPROBE_SYMBOL(__do_page_fault); 568 << 569 void do_non_secure_storage_access(struct pt_re << 570 { << 571 struct gmap *gmap = (struct gmap *)get << 572 unsigned long gaddr = get_fault_addres << 573 << 574 if (WARN_ON_ONCE(get_fault_type(regs) << 575 return handle_fault_error_nolo << 576 if (gmap_convert_to_secure(gmap, gaddr << 577 send_sig(SIGSEGV, current, 0); << 578 } << 579 NOKPROBE_SYMBOL(do_non_secure_storage_access); << 580 323 581 void do_secure_storage_violation(struct pt_reg !! 324 asmlinkage void do_page_fault(struct pt_regs *regs, >> 325 unsigned long write, unsigned long address) 582 { 326 { 583 struct gmap *gmap = (struct gmap *)get !! 327 enum ctx_state prev_state; 584 unsigned long gaddr = get_fault_addres << 585 328 586 /* !! 329 prev_state = exception_enter(); 587 * If the VM has been rebooted, its ad !! 330 __do_page_fault(regs, write, address); 588 * secure pages from the previous boot !! 331 exception_exit(prev_state); 589 * Clear the page so it can be reused. << 590 */ << 591 if (!gmap_destroy_page(gmap, gaddr)) << 592 return; << 593 /* << 594 * Either KVM messed up the secure gue << 595 * page is mapped into multiple secure << 596 * << 597 * This exception is only triggered wh << 598 * and can therefore never occur in ke << 599 */ << 600 pr_warn_ratelimited("Secure storage vi << 601 current->comm, cur << 602 send_sig(SIGSEGV, current, 0); << 603 } 332 } 604 !! 333 NOKPROBE_SYMBOL(do_page_fault); 605 #endif /* CONFIG_PGSTE */ << 606 334
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.