1 // SPDX-License-Identifier: GPL-2.0 1 2 /* 3 * KVM guest address space mapping code 4 * 5 * Copyright IBM Corp. 2007, 2020 6 * Author(s): Martin Schwidefsky <schwidefs 7 * David Hildenbrand <david@redh 8 * Janosch Frank <frankja@linux. 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/pagewalk.h> 13 #include <linux/swap.h> 14 #include <linux/smp.h> 15 #include <linux/spinlock.h> 16 #include <linux/slab.h> 17 #include <linux/swapops.h> 18 #include <linux/ksm.h> 19 #include <linux/mman.h> 20 #include <linux/pgtable.h> 21 #include <asm/page-states.h> 22 #include <asm/pgalloc.h> 23 #include <asm/gmap.h> 24 #include <asm/page.h> 25 #include <asm/tlb.h> 26 27 #define GMAP_SHADOW_FAKE_TABLE 1ULL 28 29 static struct page *gmap_alloc_crst(void) 30 { 31 struct page *page; 32 33 page = alloc_pages(GFP_KERNEL_ACCOUNT, 34 if (!page) 35 return NULL; 36 __arch_set_page_dat(page_to_virt(page) 37 return page; 38 } 39 40 /** 41 * gmap_alloc - allocate and initialize a gues 42 * @limit: maximum address of the gmap address 43 * 44 * Returns a guest address space structure. 45 */ 46 static struct gmap *gmap_alloc(unsigned long l 47 { 48 struct gmap *gmap; 49 struct page *page; 50 unsigned long *table; 51 unsigned long etype, atype; 52 53 if (limit < _REGION3_SIZE) { 54 limit = _REGION3_SIZE - 1; 55 atype = _ASCE_TYPE_SEGMENT; 56 etype = _SEGMENT_ENTRY_EMPTY; 57 } else if (limit < _REGION2_SIZE) { 58 limit = _REGION2_SIZE - 1; 59 atype = _ASCE_TYPE_REGION3; 60 etype = _REGION3_ENTRY_EMPTY; 61 } else if (limit < _REGION1_SIZE) { 62 limit = _REGION1_SIZE - 1; 63 atype = _ASCE_TYPE_REGION2; 64 etype = _REGION2_ENTRY_EMPTY; 65 } else { 66 limit = -1UL; 67 atype = _ASCE_TYPE_REGION1; 68 etype = _REGION1_ENTRY_EMPTY; 69 } 70 gmap = kzalloc(sizeof(struct gmap), GF 71 if (!gmap) 72 goto out; 73 INIT_LIST_HEAD(&gmap->crst_list); 74 INIT_LIST_HEAD(&gmap->children); 75 INIT_LIST_HEAD(&gmap->pt_list); 76 INIT_RADIX_TREE(&gmap->guest_to_host, 77 INIT_RADIX_TREE(&gmap->host_to_guest, 78 INIT_RADIX_TREE(&gmap->host_to_rmap, G 79 spin_lock_init(&gmap->guest_table_lock 80 spin_lock_init(&gmap->shadow_lock); 81 refcount_set(&gmap->ref_count, 1); 82 page = gmap_alloc_crst(); 83 if (!page) 84 goto out_free; 85 page->index = 0; 86 list_add(&page->lru, &gmap->crst_list) 87 table = page_to_virt(page); 88 crst_table_init(table, etype); 89 gmap->table = table; 90 gmap->asce = atype | _ASCE_TABLE_LENGT 91 _ASCE_USER_BITS | __pa(table); 92 gmap->asce_end = limit; 93 return gmap; 94 95 out_free: 96 kfree(gmap); 97 out: 98 return NULL; 99 } 100 101 /** 102 * gmap_create - create a guest address space 103 * @mm: pointer to the parent mm_struct 104 * @limit: maximum size of the gmap address sp 105 * 106 * Returns a guest address space structure. 107 */ 108 struct gmap *gmap_create(struct mm_struct *mm, 109 { 110 struct gmap *gmap; 111 unsigned long gmap_asce; 112 113 gmap = gmap_alloc(limit); 114 if (!gmap) 115 return NULL; 116 gmap->mm = mm; 117 spin_lock(&mm->context.lock); 118 list_add_rcu(&gmap->list, &mm->context 119 if (list_is_singular(&mm->context.gmap 120 gmap_asce = gmap->asce; 121 else 122 gmap_asce = -1UL; 123 WRITE_ONCE(mm->context.gmap_asce, gmap 124 spin_unlock(&mm->context.lock); 125 return gmap; 126 } 127 EXPORT_SYMBOL_GPL(gmap_create); 128 129 static void gmap_flush_tlb(struct gmap *gmap) 130 { 131 if (MACHINE_HAS_IDTE) 132 __tlb_flush_idte(gmap->asce); 133 else 134 __tlb_flush_global(); 135 } 136 137 static void gmap_radix_tree_free(struct radix_ 138 { 139 struct radix_tree_iter iter; 140 unsigned long indices[16]; 141 unsigned long index; 142 void __rcu **slot; 143 int i, nr; 144 145 /* A radix tree is freed by deleting a 146 index = 0; 147 do { 148 nr = 0; 149 radix_tree_for_each_slot(slot, 150 indices[nr] = iter.ind 151 if (++nr == 16) 152 break; 153 } 154 for (i = 0; i < nr; i++) { 155 index = indices[i]; 156 radix_tree_delete(root 157 } 158 } while (nr > 0); 159 } 160 161 static void gmap_rmap_radix_tree_free(struct r 162 { 163 struct gmap_rmap *rmap, *rnext, *head; 164 struct radix_tree_iter iter; 165 unsigned long indices[16]; 166 unsigned long index; 167 void __rcu **slot; 168 int i, nr; 169 170 /* A radix tree is freed by deleting a 171 index = 0; 172 do { 173 nr = 0; 174 radix_tree_for_each_slot(slot, 175 indices[nr] = iter.ind 176 if (++nr == 16) 177 break; 178 } 179 for (i = 0; i < nr; i++) { 180 index = indices[i]; 181 head = radix_tree_dele 182 gmap_for_each_rmap_saf 183 kfree(rmap); 184 } 185 } while (nr > 0); 186 } 187 188 /** 189 * gmap_free - free a guest address space 190 * @gmap: pointer to the guest address space s 191 * 192 * No locks required. There are no references 193 */ 194 static void gmap_free(struct gmap *gmap) 195 { 196 struct page *page, *next; 197 198 /* Flush tlb of all gmaps (if not alre 199 if (!(gmap_is_shadow(gmap) && gmap->re 200 gmap_flush_tlb(gmap); 201 /* Free all segment & region tables. * 202 list_for_each_entry_safe(page, next, & 203 __free_pages(page, CRST_ALLOC_ 204 gmap_radix_tree_free(&gmap->guest_to_h 205 gmap_radix_tree_free(&gmap->host_to_gu 206 207 /* Free additional data for a shadow g 208 if (gmap_is_shadow(gmap)) { 209 struct ptdesc *ptdesc, *n; 210 211 /* Free all page tables. */ 212 list_for_each_entry_safe(ptdes 213 page_table_free_pgste( 214 gmap_rmap_radix_tree_free(&gma 215 /* Release reference to the pa 216 gmap_put(gmap->parent); 217 } 218 219 kfree(gmap); 220 } 221 222 /** 223 * gmap_get - increase reference counter for g 224 * @gmap: pointer to the guest address space s 225 * 226 * Returns the gmap pointer 227 */ 228 struct gmap *gmap_get(struct gmap *gmap) 229 { 230 refcount_inc(&gmap->ref_count); 231 return gmap; 232 } 233 EXPORT_SYMBOL_GPL(gmap_get); 234 235 /** 236 * gmap_put - decrease reference counter for g 237 * @gmap: pointer to the guest address space s 238 * 239 * If the reference counter reaches zero the g 240 */ 241 void gmap_put(struct gmap *gmap) 242 { 243 if (refcount_dec_and_test(&gmap->ref_c 244 gmap_free(gmap); 245 } 246 EXPORT_SYMBOL_GPL(gmap_put); 247 248 /** 249 * gmap_remove - remove a guest address space 250 * @gmap: pointer to the guest address space s 251 */ 252 void gmap_remove(struct gmap *gmap) 253 { 254 struct gmap *sg, *next; 255 unsigned long gmap_asce; 256 257 /* Remove all shadow gmaps linked to t 258 if (!list_empty(&gmap->children)) { 259 spin_lock(&gmap->shadow_lock); 260 list_for_each_entry_safe(sg, n 261 list_del(&sg->list); 262 gmap_put(sg); 263 } 264 spin_unlock(&gmap->shadow_lock 265 } 266 /* Remove gmap from the pre-mm list */ 267 spin_lock(&gmap->mm->context.lock); 268 list_del_rcu(&gmap->list); 269 if (list_empty(&gmap->mm->context.gmap 270 gmap_asce = 0; 271 else if (list_is_singular(&gmap->mm->c 272 gmap_asce = list_first_entry(& 273 s 274 else 275 gmap_asce = -1UL; 276 WRITE_ONCE(gmap->mm->context.gmap_asce 277 spin_unlock(&gmap->mm->context.lock); 278 synchronize_rcu(); 279 /* Put reference */ 280 gmap_put(gmap); 281 } 282 EXPORT_SYMBOL_GPL(gmap_remove); 283 284 /** 285 * gmap_enable - switch primary space to the g 286 * @gmap: pointer to the guest address space s 287 */ 288 void gmap_enable(struct gmap *gmap) 289 { 290 get_lowcore()->gmap = (unsigned long)g 291 } 292 EXPORT_SYMBOL_GPL(gmap_enable); 293 294 /** 295 * gmap_disable - switch back to the standard 296 * @gmap: pointer to the guest address space s 297 */ 298 void gmap_disable(struct gmap *gmap) 299 { 300 get_lowcore()->gmap = 0UL; 301 } 302 EXPORT_SYMBOL_GPL(gmap_disable); 303 304 /** 305 * gmap_get_enabled - get a pointer to the cur 306 * 307 * Returns a pointer to the currently enabled 308 */ 309 struct gmap *gmap_get_enabled(void) 310 { 311 return (struct gmap *)get_lowcore()->g 312 } 313 EXPORT_SYMBOL_GPL(gmap_get_enabled); 314 315 /* 316 * gmap_alloc_table is assumed to be called wi 317 */ 318 static int gmap_alloc_table(struct gmap *gmap, 319 unsigned long init 320 { 321 struct page *page; 322 unsigned long *new; 323 324 /* since we dont free the gmap table u 325 page = gmap_alloc_crst(); 326 if (!page) 327 return -ENOMEM; 328 new = page_to_virt(page); 329 crst_table_init(new, init); 330 spin_lock(&gmap->guest_table_lock); 331 if (*table & _REGION_ENTRY_INVALID) { 332 list_add(&page->lru, &gmap->cr 333 *table = __pa(new) | _REGION_E 334 (*table & _REGION_ENTR 335 page->index = gaddr; 336 page = NULL; 337 } 338 spin_unlock(&gmap->guest_table_lock); 339 if (page) 340 __free_pages(page, CRST_ALLOC_ 341 return 0; 342 } 343 344 /** 345 * __gmap_segment_gaddr - find virtual address 346 * @entry: pointer to a segment table entry in 347 * 348 * Returns the virtual address in the guest ad 349 */ 350 static unsigned long __gmap_segment_gaddr(unsi 351 { 352 struct page *page; 353 unsigned long offset; 354 355 offset = (unsigned long) entry / sizeo 356 offset = (offset & (PTRS_PER_PMD - 1)) 357 page = pmd_pgtable_page((pmd_t *) entr 358 return page->index + offset; 359 } 360 361 /** 362 * __gmap_unlink_by_vmaddr - unlink a single s 363 * @gmap: pointer to the guest address space s 364 * @vmaddr: address in the host process addres 365 * 366 * Returns 1 if a TLB flush is required 367 */ 368 static int __gmap_unlink_by_vmaddr(struct gmap 369 { 370 unsigned long *entry; 371 int flush = 0; 372 373 BUG_ON(gmap_is_shadow(gmap)); 374 spin_lock(&gmap->guest_table_lock); 375 entry = radix_tree_delete(&gmap->host_ 376 if (entry) { 377 flush = (*entry != _SEGMENT_EN 378 *entry = _SEGMENT_ENTRY_EMPTY; 379 } 380 spin_unlock(&gmap->guest_table_lock); 381 return flush; 382 } 383 384 /** 385 * __gmap_unmap_by_gaddr - unmap a single segm 386 * @gmap: pointer to the guest address space s 387 * @gaddr: address in the guest address space 388 * 389 * Returns 1 if a TLB flush is required 390 */ 391 static int __gmap_unmap_by_gaddr(struct gmap * 392 { 393 unsigned long vmaddr; 394 395 vmaddr = (unsigned long) radix_tree_de 396 397 return vmaddr ? __gmap_unlink_by_vmadd 398 } 399 400 /** 401 * gmap_unmap_segment - unmap segment from the 402 * @gmap: pointer to the guest address space s 403 * @to: address in the guest address space 404 * @len: length of the memory area to unmap 405 * 406 * Returns 0 if the unmap succeeded, -EINVAL i 407 */ 408 int gmap_unmap_segment(struct gmap *gmap, unsi 409 { 410 unsigned long off; 411 int flush; 412 413 BUG_ON(gmap_is_shadow(gmap)); 414 if ((to | len) & (PMD_SIZE - 1)) 415 return -EINVAL; 416 if (len == 0 || to + len < to) 417 return -EINVAL; 418 419 flush = 0; 420 mmap_write_lock(gmap->mm); 421 for (off = 0; off < len; off += PMD_SI 422 flush |= __gmap_unmap_by_gaddr 423 mmap_write_unlock(gmap->mm); 424 if (flush) 425 gmap_flush_tlb(gmap); 426 return 0; 427 } 428 EXPORT_SYMBOL_GPL(gmap_unmap_segment); 429 430 /** 431 * gmap_map_segment - map a segment to the gue 432 * @gmap: pointer to the guest address space s 433 * @from: source address in the parent address 434 * @to: target address in the guest address sp 435 * @len: length of the memory area to map 436 * 437 * Returns 0 if the mmap succeeded, -EINVAL or 438 */ 439 int gmap_map_segment(struct gmap *gmap, unsign 440 unsigned long to, unsigne 441 { 442 unsigned long off; 443 int flush; 444 445 BUG_ON(gmap_is_shadow(gmap)); 446 if ((from | to | len) & (PMD_SIZE - 1) 447 return -EINVAL; 448 if (len == 0 || from + len < from || t 449 from + len - 1 > TASK_SIZE_MAX || 450 return -EINVAL; 451 452 flush = 0; 453 mmap_write_lock(gmap->mm); 454 for (off = 0; off < len; off += PMD_SI 455 /* Remove old translation */ 456 flush |= __gmap_unmap_by_gaddr 457 /* Store new translation */ 458 if (radix_tree_insert(&gmap->g 459 (to + of 460 (void *) 461 break; 462 } 463 mmap_write_unlock(gmap->mm); 464 if (flush) 465 gmap_flush_tlb(gmap); 466 if (off >= len) 467 return 0; 468 gmap_unmap_segment(gmap, to, len); 469 return -ENOMEM; 470 } 471 EXPORT_SYMBOL_GPL(gmap_map_segment); 472 473 /** 474 * __gmap_translate - translate a guest addres 475 * @gmap: pointer to guest mapping meta data s 476 * @gaddr: guest address 477 * 478 * Returns user space address which correspond 479 * -EFAULT if no such mapping exists. 480 * This function does not establish potentiall 481 * The mmap_lock of the mm that belongs to the 482 * when this function gets called. 483 * 484 * Note: Can also be called for shadow gmaps. 485 */ 486 unsigned long __gmap_translate(struct gmap *gm 487 { 488 unsigned long vmaddr; 489 490 vmaddr = (unsigned long) 491 radix_tree_lookup(&gmap->guest 492 /* Note: guest_to_host is empty for a 493 return vmaddr ? (vmaddr | (gaddr & ~PM 494 } 495 EXPORT_SYMBOL_GPL(__gmap_translate); 496 497 /** 498 * gmap_translate - translate a guest address 499 * @gmap: pointer to guest mapping meta data s 500 * @gaddr: guest address 501 * 502 * Returns user space address which correspond 503 * -EFAULT if no such mapping exists. 504 * This function does not establish potentiall 505 */ 506 unsigned long gmap_translate(struct gmap *gmap 507 { 508 unsigned long rc; 509 510 mmap_read_lock(gmap->mm); 511 rc = __gmap_translate(gmap, gaddr); 512 mmap_read_unlock(gmap->mm); 513 return rc; 514 } 515 EXPORT_SYMBOL_GPL(gmap_translate); 516 517 /** 518 * gmap_unlink - disconnect a page table from 519 * @mm: pointer to the parent mm_struct 520 * @table: pointer to the host page table 521 * @vmaddr: vm address associated with the hos 522 */ 523 void gmap_unlink(struct mm_struct *mm, unsigne 524 unsigned long vmaddr) 525 { 526 struct gmap *gmap; 527 int flush; 528 529 rcu_read_lock(); 530 list_for_each_entry_rcu(gmap, &mm->con 531 flush = __gmap_unlink_by_vmadd 532 if (flush) 533 gmap_flush_tlb(gmap); 534 } 535 rcu_read_unlock(); 536 } 537 538 static void gmap_pmdp_xchg(struct gmap *gmap, 539 unsigned long gaddr 540 541 /** 542 * __gmap_link - set up shadow page tables to 543 * @gmap: pointer to guest mapping meta data s 544 * @gaddr: guest address 545 * @vmaddr: vm address 546 * 547 * Returns 0 on success, -ENOMEM for out of me 548 * if the vm address is already mapped to a di 549 * The mmap_lock of the mm that belongs to the 550 * when this function gets called. 551 */ 552 int __gmap_link(struct gmap *gmap, unsigned lo 553 { 554 struct mm_struct *mm; 555 unsigned long *table; 556 spinlock_t *ptl; 557 pgd_t *pgd; 558 p4d_t *p4d; 559 pud_t *pud; 560 pmd_t *pmd; 561 u64 unprot; 562 int rc; 563 564 BUG_ON(gmap_is_shadow(gmap)); 565 /* Create higher level tables in the g 566 table = gmap->table; 567 if ((gmap->asce & _ASCE_TYPE_MASK) >= 568 table += (gaddr & _REGION1_IND 569 if ((*table & _REGION_ENTRY_IN 570 gmap_alloc_table(gmap, tab 571 gaddr & _ 572 return -ENOMEM; 573 table = __va(*table & _REGION_ 574 } 575 if ((gmap->asce & _ASCE_TYPE_MASK) >= 576 table += (gaddr & _REGION2_IND 577 if ((*table & _REGION_ENTRY_IN 578 gmap_alloc_table(gmap, tab 579 gaddr & _ 580 return -ENOMEM; 581 table = __va(*table & _REGION_ 582 } 583 if ((gmap->asce & _ASCE_TYPE_MASK) >= 584 table += (gaddr & _REGION3_IND 585 if ((*table & _REGION_ENTRY_IN 586 gmap_alloc_table(gmap, tab 587 gaddr & _ 588 return -ENOMEM; 589 table = __va(*table & _REGION_ 590 } 591 table += (gaddr & _SEGMENT_INDEX) >> _ 592 /* Walk the parent mm page table */ 593 mm = gmap->mm; 594 pgd = pgd_offset(mm, vmaddr); 595 VM_BUG_ON(pgd_none(*pgd)); 596 p4d = p4d_offset(pgd, vmaddr); 597 VM_BUG_ON(p4d_none(*p4d)); 598 pud = pud_offset(p4d, vmaddr); 599 VM_BUG_ON(pud_none(*pud)); 600 /* large puds cannot yet be handled */ 601 if (pud_leaf(*pud)) 602 return -EFAULT; 603 pmd = pmd_offset(pud, vmaddr); 604 VM_BUG_ON(pmd_none(*pmd)); 605 /* Are we allowed to use huge pages? * 606 if (pmd_leaf(*pmd) && !gmap->mm->conte 607 return -EFAULT; 608 /* Link gmap segment table entry locat 609 rc = radix_tree_preload(GFP_KERNEL_ACC 610 if (rc) 611 return rc; 612 ptl = pmd_lock(mm, pmd); 613 spin_lock(&gmap->guest_table_lock); 614 if (*table == _SEGMENT_ENTRY_EMPTY) { 615 rc = radix_tree_insert(&gmap-> 616 vmaddr 617 if (!rc) { 618 if (pmd_leaf(*pmd)) { 619 *table = (pmd_ 620 _SEG 621 | _SEG 622 } else 623 *table = pmd_v 624 _SEGME 625 } 626 } else if (*table & _SEGMENT_ENTRY_PRO 627 !(pmd_val(*pmd) & _SEGMENT_ 628 unprot = (u64)*table; 629 unprot &= ~_SEGMENT_ENTRY_PROT 630 unprot |= _SEGMENT_ENTRY_GMAP_ 631 gmap_pmdp_xchg(gmap, (pmd_t *) 632 } 633 spin_unlock(&gmap->guest_table_lock); 634 spin_unlock(ptl); 635 radix_tree_preload_end(); 636 return rc; 637 } 638 639 /** 640 * gmap_fault - resolve a fault on a guest add 641 * @gmap: pointer to guest mapping meta data s 642 * @gaddr: guest address 643 * @fault_flags: flags to pass down to handle_ 644 * 645 * Returns 0 on success, -ENOMEM for out of me 646 * if the vm address is already mapped to a di 647 */ 648 int gmap_fault(struct gmap *gmap, unsigned lon 649 unsigned int fault_flags) 650 { 651 unsigned long vmaddr; 652 int rc; 653 bool unlocked; 654 655 mmap_read_lock(gmap->mm); 656 657 retry: 658 unlocked = false; 659 vmaddr = __gmap_translate(gmap, gaddr) 660 if (IS_ERR_VALUE(vmaddr)) { 661 rc = vmaddr; 662 goto out_up; 663 } 664 if (fixup_user_fault(gmap->mm, vmaddr, 665 &unlocked)) { 666 rc = -EFAULT; 667 goto out_up; 668 } 669 /* 670 * In the case that fixup_user_fault u 671 * faultin redo __gmap_translate to no 672 */ 673 if (unlocked) 674 goto retry; 675 676 rc = __gmap_link(gmap, gaddr, vmaddr); 677 out_up: 678 mmap_read_unlock(gmap->mm); 679 return rc; 680 } 681 EXPORT_SYMBOL_GPL(gmap_fault); 682 683 /* 684 * this function is assumed to be called with 685 */ 686 void __gmap_zap(struct gmap *gmap, unsigned lo 687 { 688 struct vm_area_struct *vma; 689 unsigned long vmaddr; 690 spinlock_t *ptl; 691 pte_t *ptep; 692 693 /* Find the vm address for the guest a 694 vmaddr = (unsigned long) radix_tree_lo 695 696 if (vmaddr) { 697 vmaddr |= gaddr & ~PMD_MASK; 698 699 vma = vma_lookup(gmap->mm, vma 700 if (!vma || is_vm_hugetlb_page 701 return; 702 703 /* Get pointer to the page tab 704 ptep = get_locked_pte(gmap->mm 705 if (likely(ptep)) { 706 ptep_zap_unused(gmap-> 707 pte_unmap_unlock(ptep, 708 } 709 } 710 } 711 EXPORT_SYMBOL_GPL(__gmap_zap); 712 713 void gmap_discard(struct gmap *gmap, unsigned 714 { 715 unsigned long gaddr, vmaddr, size; 716 struct vm_area_struct *vma; 717 718 mmap_read_lock(gmap->mm); 719 for (gaddr = from; gaddr < to; 720 gaddr = (gaddr + PMD_SIZE) & PMD_ 721 /* Find the vm address for the 722 vmaddr = (unsigned long) 723 radix_tree_lookup(&gma 724 gadd 725 if (!vmaddr) 726 continue; 727 vmaddr |= gaddr & ~PMD_MASK; 728 /* Find vma in the parent mm * 729 vma = find_vma(gmap->mm, vmadd 730 if (!vma) 731 continue; 732 /* 733 * We do not discard pages tha 734 * hugetlbfs, so we don't have 735 */ 736 if (is_vm_hugetlb_page(vma)) 737 continue; 738 size = min(to - gaddr, PMD_SIZ 739 zap_page_range_single(vma, vma 740 } 741 mmap_read_unlock(gmap->mm); 742 } 743 EXPORT_SYMBOL_GPL(gmap_discard); 744 745 static LIST_HEAD(gmap_notifier_list); 746 static DEFINE_SPINLOCK(gmap_notifier_lock); 747 748 /** 749 * gmap_register_pte_notifier - register a pte 750 * @nb: pointer to the gmap notifier block 751 */ 752 void gmap_register_pte_notifier(struct gmap_no 753 { 754 spin_lock(&gmap_notifier_lock); 755 list_add_rcu(&nb->list, &gmap_notifier 756 spin_unlock(&gmap_notifier_lock); 757 } 758 EXPORT_SYMBOL_GPL(gmap_register_pte_notifier); 759 760 /** 761 * gmap_unregister_pte_notifier - remove a pte 762 * @nb: pointer to the gmap notifier block 763 */ 764 void gmap_unregister_pte_notifier(struct gmap_ 765 { 766 spin_lock(&gmap_notifier_lock); 767 list_del_rcu(&nb->list); 768 spin_unlock(&gmap_notifier_lock); 769 synchronize_rcu(); 770 } 771 EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier 772 773 /** 774 * gmap_call_notifier - call all registered in 775 * @gmap: pointer to guest mapping meta data s 776 * @start: start virtual address in the guest 777 * @end: end virtual address in the guest addr 778 */ 779 static void gmap_call_notifier(struct gmap *gm 780 unsigned long e 781 { 782 struct gmap_notifier *nb; 783 784 list_for_each_entry(nb, &gmap_notifier 785 nb->notifier_call(gmap, start, 786 } 787 788 /** 789 * gmap_table_walk - walk the gmap page tables 790 * @gmap: pointer to guest mapping meta data s 791 * @gaddr: virtual address in the guest addres 792 * @level: page table level to stop at 793 * 794 * Returns a table entry pointer for the given 795 * @level=0 : returns a pointer to a page tabl 796 * @level=1 : returns a pointer to a segment t 797 * @level=2 : returns a pointer to a region-3 798 * @level=3 : returns a pointer to a region-2 799 * @level=4 : returns a pointer to a region-1 800 * 801 * Returns NULL if the gmap page tables could 802 * requested level. 803 * 804 * Note: Can also be called for shadow gmaps. 805 */ 806 static inline unsigned long *gmap_table_walk(s 807 u 808 { 809 const int asce_type = gmap->asce & _AS 810 unsigned long *table = gmap->table; 811 812 if (gmap_is_shadow(gmap) && gmap->remo 813 return NULL; 814 815 if (WARN_ON_ONCE(level > (asce_type >> 816 return NULL; 817 818 if (asce_type != _ASCE_TYPE_REGION1 && 819 gaddr & (-1UL << (31 + (asce_type 820 return NULL; 821 822 switch (asce_type) { 823 case _ASCE_TYPE_REGION1: 824 table += (gaddr & _REGION1_IND 825 if (level == 4) 826 break; 827 if (*table & _REGION_ENTRY_INV 828 return NULL; 829 table = __va(*table & _REGION_ 830 fallthrough; 831 case _ASCE_TYPE_REGION2: 832 table += (gaddr & _REGION2_IND 833 if (level == 3) 834 break; 835 if (*table & _REGION_ENTRY_INV 836 return NULL; 837 table = __va(*table & _REGION_ 838 fallthrough; 839 case _ASCE_TYPE_REGION3: 840 table += (gaddr & _REGION3_IND 841 if (level == 2) 842 break; 843 if (*table & _REGION_ENTRY_INV 844 return NULL; 845 table = __va(*table & _REGION_ 846 fallthrough; 847 case _ASCE_TYPE_SEGMENT: 848 table += (gaddr & _SEGMENT_IND 849 if (level == 1) 850 break; 851 if (*table & _REGION_ENTRY_INV 852 return NULL; 853 table = __va(*table & _SEGMENT 854 table += (gaddr & _PAGE_INDEX) 855 } 856 return table; 857 } 858 859 /** 860 * gmap_pte_op_walk - walk the gmap page table 861 * and return the pte point 862 * @gmap: pointer to guest mapping meta data s 863 * @gaddr: virtual address in the guest addres 864 * @ptl: pointer to the spinlock pointer 865 * 866 * Returns a pointer to the locked pte for a g 867 */ 868 static pte_t *gmap_pte_op_walk(struct gmap *gm 869 spinlock_t **pt 870 { 871 unsigned long *table; 872 873 BUG_ON(gmap_is_shadow(gmap)); 874 /* Walk the gmap page table, lock and 875 table = gmap_table_walk(gmap, gaddr, 1 876 if (!table || *table & _SEGMENT_ENTRY_ 877 return NULL; 878 return pte_alloc_map_lock(gmap->mm, (p 879 } 880 881 /** 882 * gmap_pte_op_fixup - force a page in and con 883 * @gmap: pointer to guest mapping meta data s 884 * @gaddr: virtual address in the guest addres 885 * @vmaddr: address in the host process addres 886 * @prot: indicates access rights: PROT_NONE, 887 * 888 * Returns 0 if the caller can retry __gmap_tr 889 * -ENOMEM if out of memory and -EFAULT if any 890 * up or connecting the gmap page table. 891 */ 892 static int gmap_pte_op_fixup(struct gmap *gmap 893 unsigned long vma 894 { 895 struct mm_struct *mm = gmap->mm; 896 unsigned int fault_flags; 897 bool unlocked = false; 898 899 BUG_ON(gmap_is_shadow(gmap)); 900 fault_flags = (prot == PROT_WRITE) ? F 901 if (fixup_user_fault(mm, vmaddr, fault 902 return -EFAULT; 903 if (unlocked) 904 /* lost mmap_lock, caller has 905 return 0; 906 /* Connect the page tables */ 907 return __gmap_link(gmap, gaddr, vmaddr 908 } 909 910 /** 911 * gmap_pte_op_end - release the page table lo 912 * @ptep: pointer to the locked pte 913 * @ptl: pointer to the page table spinlock 914 */ 915 static void gmap_pte_op_end(pte_t *ptep, spinl 916 { 917 pte_unmap_unlock(ptep, ptl); 918 } 919 920 /** 921 * gmap_pmd_op_walk - walk the gmap tables, ge 922 * and return the pmd point 923 * @gmap: pointer to guest mapping meta data s 924 * @gaddr: virtual address in the guest addres 925 * 926 * Returns a pointer to the pmd for a guest ad 927 */ 928 static inline pmd_t *gmap_pmd_op_walk(struct g 929 { 930 pmd_t *pmdp; 931 932 BUG_ON(gmap_is_shadow(gmap)); 933 pmdp = (pmd_t *) gmap_table_walk(gmap, 934 if (!pmdp) 935 return NULL; 936 937 /* without huge pages, there is no nee 938 if (!gmap->mm->context.allow_gmap_hpag 939 return pmd_none(*pmdp) ? NULL 940 941 spin_lock(&gmap->guest_table_lock); 942 if (pmd_none(*pmdp)) { 943 spin_unlock(&gmap->guest_table 944 return NULL; 945 } 946 947 /* 4k page table entries are locked vi 948 if (!pmd_leaf(*pmdp)) 949 spin_unlock(&gmap->guest_table 950 return pmdp; 951 } 952 953 /** 954 * gmap_pmd_op_end - release the guest_table_l 955 * @gmap: pointer to the guest mapping meta da 956 * @pmdp: pointer to the pmd 957 */ 958 static inline void gmap_pmd_op_end(struct gmap 959 { 960 if (pmd_leaf(*pmdp)) 961 spin_unlock(&gmap->guest_table 962 } 963 964 /* 965 * gmap_protect_pmd - remove access rights to 966 * @pmdp: pointer to the pmd to be protected 967 * @prot: indicates access rights: PROT_NONE, 968 * @bits: notification bits to set 969 * 970 * Returns: 971 * 0 if successfully protected 972 * -EAGAIN if a fixup is needed 973 * -EINVAL if unsupported notifier bits have b 974 * 975 * Expected to be called with sg->mm->mmap_loc 976 * guest_table_lock held. 977 */ 978 static int gmap_protect_pmd(struct gmap *gmap, 979 pmd_t *pmdp, int p 980 { 981 int pmd_i = pmd_val(*pmdp) & _SEGMENT_ 982 int pmd_p = pmd_val(*pmdp) & _SEGMENT_ 983 pmd_t new = *pmdp; 984 985 /* Fixup needed */ 986 if ((pmd_i && (prot != PROT_NONE)) || 987 return -EAGAIN; 988 989 if (prot == PROT_NONE && !pmd_i) { 990 new = set_pmd_bit(new, __pgpro 991 gmap_pmdp_xchg(gmap, pmdp, new 992 } 993 994 if (prot == PROT_READ && !pmd_p) { 995 new = clear_pmd_bit(new, __pgp 996 new = set_pmd_bit(new, __pgpro 997 gmap_pmdp_xchg(gmap, pmdp, new 998 } 999 1000 if (bits & GMAP_NOTIFY_MPROT) 1001 set_pmd(pmdp, set_pmd_bit(*pm 1002 1003 /* Shadow GMAP protection needs split 1004 if (bits & GMAP_NOTIFY_SHADOW) 1005 return -EINVAL; 1006 1007 return 0; 1008 } 1009 1010 /* 1011 * gmap_protect_pte - remove access rights to 1012 * @gmap: pointer to guest mapping meta data 1013 * @gaddr: virtual address in the guest addre 1014 * @pmdp: pointer to the pmd associated with 1015 * @prot: indicates access rights: PROT_NONE, 1016 * @bits: notification bits to set 1017 * 1018 * Returns 0 if successfully protected, -ENOM 1019 * -EAGAIN if a fixup is needed. 1020 * 1021 * Expected to be called with sg->mm->mmap_lo 1022 */ 1023 static int gmap_protect_pte(struct gmap *gmap 1024 pmd_t *pmdp, int 1025 { 1026 int rc; 1027 pte_t *ptep; 1028 spinlock_t *ptl; 1029 unsigned long pbits = 0; 1030 1031 if (pmd_val(*pmdp) & _SEGMENT_ENTRY_I 1032 return -EAGAIN; 1033 1034 ptep = pte_alloc_map_lock(gmap->mm, p 1035 if (!ptep) 1036 return -ENOMEM; 1037 1038 pbits |= (bits & GMAP_NOTIFY_MPROT) ? 1039 pbits |= (bits & GMAP_NOTIFY_SHADOW) 1040 /* Protect and unlock. */ 1041 rc = ptep_force_prot(gmap->mm, gaddr, 1042 gmap_pte_op_end(ptep, ptl); 1043 return rc; 1044 } 1045 1046 /* 1047 * gmap_protect_range - remove access rights 1048 * @gmap: pointer to guest mapping meta data 1049 * @gaddr: virtual address in the guest addre 1050 * @len: size of area 1051 * @prot: indicates access rights: PROT_NONE, 1052 * @bits: pgste notification bits to set 1053 * 1054 * Returns 0 if successfully protected, -ENOM 1055 * -EFAULT if gaddr is invalid (or mapping fo 1056 * 1057 * Called with sg->mm->mmap_lock in read. 1058 */ 1059 static int gmap_protect_range(struct gmap *gm 1060 unsigned long l 1061 { 1062 unsigned long vmaddr, dist; 1063 pmd_t *pmdp; 1064 int rc; 1065 1066 BUG_ON(gmap_is_shadow(gmap)); 1067 while (len) { 1068 rc = -EAGAIN; 1069 pmdp = gmap_pmd_op_walk(gmap, 1070 if (pmdp) { 1071 if (!pmd_leaf(*pmdp)) 1072 rc = gmap_pro 1073 1074 if (!rc) { 1075 len - 1076 gaddr 1077 } 1078 } else { 1079 rc = gmap_pro 1080 1081 if (!rc) { 1082 dist 1083 len = 1084 gaddr 1085 } 1086 } 1087 gmap_pmd_op_end(gmap, 1088 } 1089 if (rc) { 1090 if (rc == -EINVAL) 1091 return rc; 1092 1093 /* -EAGAIN, fixup of 1094 vmaddr = __gmap_trans 1095 if (IS_ERR_VALUE(vmad 1096 return vmaddr 1097 rc = gmap_pte_op_fixu 1098 if (rc) 1099 return rc; 1100 } 1101 } 1102 return 0; 1103 } 1104 1105 /** 1106 * gmap_mprotect_notify - change access right 1107 * call the notifier i 1108 * @gmap: pointer to guest mapping meta data 1109 * @gaddr: virtual address in the guest addre 1110 * @len: size of area 1111 * @prot: indicates access rights: PROT_NONE, 1112 * 1113 * Returns 0 if for each page in the given ra 1114 * the new access rights could be set and the 1115 * If the gmap mapping is missing for one or 1116 * returned. If no memory could be allocated 1117 * This function establishes missing page tab 1118 */ 1119 int gmap_mprotect_notify(struct gmap *gmap, u 1120 unsigned long len, i 1121 { 1122 int rc; 1123 1124 if ((gaddr & ~PAGE_MASK) || (len & ~P 1125 return -EINVAL; 1126 if (!MACHINE_HAS_ESOP && prot == PROT 1127 return -EINVAL; 1128 mmap_read_lock(gmap->mm); 1129 rc = gmap_protect_range(gmap, gaddr, 1130 mmap_read_unlock(gmap->mm); 1131 return rc; 1132 } 1133 EXPORT_SYMBOL_GPL(gmap_mprotect_notify); 1134 1135 /** 1136 * gmap_read_table - get an unsigned long val 1137 * absolute addressing, wit 1138 * @gmap: pointer to guest mapping meta data 1139 * @gaddr: virtual address in the guest addre 1140 * @val: pointer to the unsigned long value t 1141 * 1142 * Returns 0 if the value was read, -ENOMEM i 1143 * if reading using the virtual address faile 1144 * shadow. 1145 * 1146 * Called with gmap->mm->mmap_lock in read. 1147 */ 1148 int gmap_read_table(struct gmap *gmap, unsign 1149 { 1150 unsigned long address, vmaddr; 1151 spinlock_t *ptl; 1152 pte_t *ptep, pte; 1153 int rc; 1154 1155 if (gmap_is_shadow(gmap)) 1156 return -EINVAL; 1157 1158 while (1) { 1159 rc = -EAGAIN; 1160 ptep = gmap_pte_op_walk(gmap, 1161 if (ptep) { 1162 pte = *ptep; 1163 if (pte_present(pte) 1164 address = pte 1165 address += ga 1166 *val = *(unsi 1167 set_pte(ptep, 1168 /* Do *NOT* c 1169 rc = 0; 1170 } 1171 gmap_pte_op_end(ptep, 1172 } 1173 if (!rc) 1174 break; 1175 vmaddr = __gmap_translate(gma 1176 if (IS_ERR_VALUE(vmaddr)) { 1177 rc = vmaddr; 1178 break; 1179 } 1180 rc = gmap_pte_op_fixup(gmap, 1181 if (rc) 1182 break; 1183 } 1184 return rc; 1185 } 1186 EXPORT_SYMBOL_GPL(gmap_read_table); 1187 1188 /** 1189 * gmap_insert_rmap - add a rmap to the host_ 1190 * @sg: pointer to the shadow guest address s 1191 * @vmaddr: vm address associated with the rm 1192 * @rmap: pointer to the rmap structure 1193 * 1194 * Called with the sg->guest_table_lock 1195 */ 1196 static inline void gmap_insert_rmap(struct gm 1197 struct gm 1198 { 1199 struct gmap_rmap *temp; 1200 void __rcu **slot; 1201 1202 BUG_ON(!gmap_is_shadow(sg)); 1203 slot = radix_tree_lookup_slot(&sg->ho 1204 if (slot) { 1205 rmap->next = radix_tree_deref 1206 1207 for (temp = rmap->next; temp; 1208 if (temp->raddr == rm 1209 kfree(rmap); 1210 return; 1211 } 1212 } 1213 radix_tree_replace_slot(&sg-> 1214 } else { 1215 rmap->next = NULL; 1216 radix_tree_insert(&sg->host_t 1217 rmap); 1218 } 1219 } 1220 1221 /** 1222 * gmap_protect_rmap - restrict access rights 1223 * @sg: pointer to the shadow guest address s 1224 * @raddr: rmap address in the shadow gmap 1225 * @paddr: address in the parent guest addres 1226 * @len: length of the memory area to protect 1227 * 1228 * Returns 0 if successfully protected and th 1229 * if out of memory and -EFAULT if paddr is i 1230 */ 1231 static int gmap_protect_rmap(struct gmap *sg, 1232 unsigned long pa 1233 { 1234 struct gmap *parent; 1235 struct gmap_rmap *rmap; 1236 unsigned long vmaddr; 1237 spinlock_t *ptl; 1238 pte_t *ptep; 1239 int rc; 1240 1241 BUG_ON(!gmap_is_shadow(sg)); 1242 parent = sg->parent; 1243 while (len) { 1244 vmaddr = __gmap_translate(par 1245 if (IS_ERR_VALUE(vmaddr)) 1246 return vmaddr; 1247 rmap = kzalloc(sizeof(*rmap), 1248 if (!rmap) 1249 return -ENOMEM; 1250 rmap->raddr = raddr; 1251 rc = radix_tree_preload(GFP_K 1252 if (rc) { 1253 kfree(rmap); 1254 return rc; 1255 } 1256 rc = -EAGAIN; 1257 ptep = gmap_pte_op_walk(paren 1258 if (ptep) { 1259 spin_lock(&sg->guest_ 1260 rc = ptep_force_prot( 1261 1262 if (!rc) 1263 gmap_insert_r 1264 spin_unlock(&sg->gues 1265 gmap_pte_op_end(ptep, 1266 } 1267 radix_tree_preload_end(); 1268 if (rc) { 1269 kfree(rmap); 1270 rc = gmap_pte_op_fixu 1271 if (rc) 1272 return rc; 1273 continue; 1274 } 1275 paddr += PAGE_SIZE; 1276 len -= PAGE_SIZE; 1277 } 1278 return 0; 1279 } 1280 1281 #define _SHADOW_RMAP_MASK 0x7 1282 #define _SHADOW_RMAP_REGION1 0x5 1283 #define _SHADOW_RMAP_REGION2 0x4 1284 #define _SHADOW_RMAP_REGION3 0x3 1285 #define _SHADOW_RMAP_SEGMENT 0x2 1286 #define _SHADOW_RMAP_PGTABLE 0x1 1287 1288 /** 1289 * gmap_idte_one - invalidate a single region 1290 * @asce: region or segment table *origin* + 1291 * @vaddr: virtual address to identify the ta 1292 * 1293 * The invalid bit of a single region or segm 1294 * and the associated TLB entries depending o 1295 * The table-type of the @asce identifies the 1296 * that is used as the invalidation index. 1297 */ 1298 static inline void gmap_idte_one(unsigned lon 1299 { 1300 asm volatile( 1301 " idte %0,0,%1" 1302 : : "a" (asce), "a" (vaddr) : 1303 } 1304 1305 /** 1306 * gmap_unshadow_page - remove a page from a 1307 * @sg: pointer to the shadow guest address s 1308 * @raddr: rmap address in the shadow guest a 1309 * 1310 * Called with the sg->guest_table_lock 1311 */ 1312 static void gmap_unshadow_page(struct gmap *s 1313 { 1314 unsigned long *table; 1315 1316 BUG_ON(!gmap_is_shadow(sg)); 1317 table = gmap_table_walk(sg, raddr, 0) 1318 if (!table || *table & _PAGE_INVALID) 1319 return; 1320 gmap_call_notifier(sg, raddr, raddr + 1321 ptep_unshadow_pte(sg->mm, raddr, (pte 1322 } 1323 1324 /** 1325 * __gmap_unshadow_pgt - remove all entries f 1326 * @sg: pointer to the shadow guest address s 1327 * @raddr: rmap address in the shadow guest a 1328 * @pgt: pointer to the start of a shadow pag 1329 * 1330 * Called with the sg->guest_table_lock 1331 */ 1332 static void __gmap_unshadow_pgt(struct gmap * 1333 unsigned long 1334 { 1335 int i; 1336 1337 BUG_ON(!gmap_is_shadow(sg)); 1338 for (i = 0; i < _PAGE_ENTRIES; i++, r 1339 pgt[i] = _PAGE_INVALID; 1340 } 1341 1342 /** 1343 * gmap_unshadow_pgt - remove a shadow page t 1344 * @sg: pointer to the shadow guest address s 1345 * @raddr: address in the shadow guest addres 1346 * 1347 * Called with the sg->guest_table_lock 1348 */ 1349 static void gmap_unshadow_pgt(struct gmap *sg 1350 { 1351 unsigned long *ste; 1352 phys_addr_t sto, pgt; 1353 struct ptdesc *ptdesc; 1354 1355 BUG_ON(!gmap_is_shadow(sg)); 1356 ste = gmap_table_walk(sg, raddr, 1); 1357 if (!ste || !(*ste & _SEGMENT_ENTRY_O 1358 return; 1359 gmap_call_notifier(sg, raddr, raddr + 1360 sto = __pa(ste - ((raddr & _SEGMENT_I 1361 gmap_idte_one(sto | _ASCE_TYPE_SEGMEN 1362 pgt = *ste & _SEGMENT_ENTRY_ORIGIN; 1363 *ste = _SEGMENT_ENTRY_EMPTY; 1364 __gmap_unshadow_pgt(sg, raddr, __va(p 1365 /* Free page table */ 1366 ptdesc = page_ptdesc(phys_to_page(pgt 1367 list_del(&ptdesc->pt_list); 1368 page_table_free_pgste(ptdesc); 1369 } 1370 1371 /** 1372 * __gmap_unshadow_sgt - remove all entries f 1373 * @sg: pointer to the shadow guest address s 1374 * @raddr: rmap address in the shadow guest a 1375 * @sgt: pointer to the start of a shadow seg 1376 * 1377 * Called with the sg->guest_table_lock 1378 */ 1379 static void __gmap_unshadow_sgt(struct gmap * 1380 unsigned long 1381 { 1382 struct ptdesc *ptdesc; 1383 phys_addr_t pgt; 1384 int i; 1385 1386 BUG_ON(!gmap_is_shadow(sg)); 1387 for (i = 0; i < _CRST_ENTRIES; i++, r 1388 if (!(sgt[i] & _SEGMENT_ENTRY 1389 continue; 1390 pgt = sgt[i] & _REGION_ENTRY_ 1391 sgt[i] = _SEGMENT_ENTRY_EMPTY 1392 __gmap_unshadow_pgt(sg, raddr 1393 /* Free page table */ 1394 ptdesc = page_ptdesc(phys_to_ 1395 list_del(&ptdesc->pt_list); 1396 page_table_free_pgste(ptdesc) 1397 } 1398 } 1399 1400 /** 1401 * gmap_unshadow_sgt - remove a shadow segmen 1402 * @sg: pointer to the shadow guest address s 1403 * @raddr: rmap address in the shadow guest a 1404 * 1405 * Called with the shadow->guest_table_lock 1406 */ 1407 static void gmap_unshadow_sgt(struct gmap *sg 1408 { 1409 unsigned long r3o, *r3e; 1410 phys_addr_t sgt; 1411 struct page *page; 1412 1413 BUG_ON(!gmap_is_shadow(sg)); 1414 r3e = gmap_table_walk(sg, raddr, 2); 1415 if (!r3e || !(*r3e & _REGION_ENTRY_OR 1416 return; 1417 gmap_call_notifier(sg, raddr, raddr + 1418 r3o = (unsigned long) (r3e - ((raddr 1419 gmap_idte_one(__pa(r3o) | _ASCE_TYPE_ 1420 sgt = *r3e & _REGION_ENTRY_ORIGIN; 1421 *r3e = _REGION3_ENTRY_EMPTY; 1422 __gmap_unshadow_sgt(sg, raddr, __va(s 1423 /* Free segment table */ 1424 page = phys_to_page(sgt); 1425 list_del(&page->lru); 1426 __free_pages(page, CRST_ALLOC_ORDER); 1427 } 1428 1429 /** 1430 * __gmap_unshadow_r3t - remove all entries f 1431 * @sg: pointer to the shadow guest address s 1432 * @raddr: address in the shadow guest addres 1433 * @r3t: pointer to the start of a shadow reg 1434 * 1435 * Called with the sg->guest_table_lock 1436 */ 1437 static void __gmap_unshadow_r3t(struct gmap * 1438 unsigned long 1439 { 1440 struct page *page; 1441 phys_addr_t sgt; 1442 int i; 1443 1444 BUG_ON(!gmap_is_shadow(sg)); 1445 for (i = 0; i < _CRST_ENTRIES; i++, r 1446 if (!(r3t[i] & _REGION_ENTRY_ 1447 continue; 1448 sgt = r3t[i] & _REGION_ENTRY_ 1449 r3t[i] = _REGION3_ENTRY_EMPTY 1450 __gmap_unshadow_sgt(sg, raddr 1451 /* Free segment table */ 1452 page = phys_to_page(sgt); 1453 list_del(&page->lru); 1454 __free_pages(page, CRST_ALLOC 1455 } 1456 } 1457 1458 /** 1459 * gmap_unshadow_r3t - remove a shadow region 1460 * @sg: pointer to the shadow guest address s 1461 * @raddr: rmap address in the shadow guest a 1462 * 1463 * Called with the sg->guest_table_lock 1464 */ 1465 static void gmap_unshadow_r3t(struct gmap *sg 1466 { 1467 unsigned long r2o, *r2e; 1468 phys_addr_t r3t; 1469 struct page *page; 1470 1471 BUG_ON(!gmap_is_shadow(sg)); 1472 r2e = gmap_table_walk(sg, raddr, 3); 1473 if (!r2e || !(*r2e & _REGION_ENTRY_OR 1474 return; 1475 gmap_call_notifier(sg, raddr, raddr + 1476 r2o = (unsigned long) (r2e - ((raddr 1477 gmap_idte_one(__pa(r2o) | _ASCE_TYPE_ 1478 r3t = *r2e & _REGION_ENTRY_ORIGIN; 1479 *r2e = _REGION2_ENTRY_EMPTY; 1480 __gmap_unshadow_r3t(sg, raddr, __va(r 1481 /* Free region 3 table */ 1482 page = phys_to_page(r3t); 1483 list_del(&page->lru); 1484 __free_pages(page, CRST_ALLOC_ORDER); 1485 } 1486 1487 /** 1488 * __gmap_unshadow_r2t - remove all entries f 1489 * @sg: pointer to the shadow guest address s 1490 * @raddr: rmap address in the shadow guest a 1491 * @r2t: pointer to the start of a shadow reg 1492 * 1493 * Called with the sg->guest_table_lock 1494 */ 1495 static void __gmap_unshadow_r2t(struct gmap * 1496 unsigned long 1497 { 1498 phys_addr_t r3t; 1499 struct page *page; 1500 int i; 1501 1502 BUG_ON(!gmap_is_shadow(sg)); 1503 for (i = 0; i < _CRST_ENTRIES; i++, r 1504 if (!(r2t[i] & _REGION_ENTRY_ 1505 continue; 1506 r3t = r2t[i] & _REGION_ENTRY_ 1507 r2t[i] = _REGION2_ENTRY_EMPTY 1508 __gmap_unshadow_r3t(sg, raddr 1509 /* Free region 3 table */ 1510 page = phys_to_page(r3t); 1511 list_del(&page->lru); 1512 __free_pages(page, CRST_ALLOC 1513 } 1514 } 1515 1516 /** 1517 * gmap_unshadow_r2t - remove a shadow region 1518 * @sg: pointer to the shadow guest address s 1519 * @raddr: rmap address in the shadow guest a 1520 * 1521 * Called with the sg->guest_table_lock 1522 */ 1523 static void gmap_unshadow_r2t(struct gmap *sg 1524 { 1525 unsigned long r1o, *r1e; 1526 struct page *page; 1527 phys_addr_t r2t; 1528 1529 BUG_ON(!gmap_is_shadow(sg)); 1530 r1e = gmap_table_walk(sg, raddr, 4); 1531 if (!r1e || !(*r1e & _REGION_ENTRY_OR 1532 return; 1533 gmap_call_notifier(sg, raddr, raddr + 1534 r1o = (unsigned long) (r1e - ((raddr 1535 gmap_idte_one(__pa(r1o) | _ASCE_TYPE_ 1536 r2t = *r1e & _REGION_ENTRY_ORIGIN; 1537 *r1e = _REGION1_ENTRY_EMPTY; 1538 __gmap_unshadow_r2t(sg, raddr, __va(r 1539 /* Free region 2 table */ 1540 page = phys_to_page(r2t); 1541 list_del(&page->lru); 1542 __free_pages(page, CRST_ALLOC_ORDER); 1543 } 1544 1545 /** 1546 * __gmap_unshadow_r1t - remove all entries f 1547 * @sg: pointer to the shadow guest address s 1548 * @raddr: rmap address in the shadow guest a 1549 * @r1t: pointer to the start of a shadow reg 1550 * 1551 * Called with the shadow->guest_table_lock 1552 */ 1553 static void __gmap_unshadow_r1t(struct gmap * 1554 unsigned long 1555 { 1556 unsigned long asce; 1557 struct page *page; 1558 phys_addr_t r2t; 1559 int i; 1560 1561 BUG_ON(!gmap_is_shadow(sg)); 1562 asce = __pa(r1t) | _ASCE_TYPE_REGION1 1563 for (i = 0; i < _CRST_ENTRIES; i++, r 1564 if (!(r1t[i] & _REGION_ENTRY_ 1565 continue; 1566 r2t = r1t[i] & _REGION_ENTRY_ 1567 __gmap_unshadow_r2t(sg, raddr 1568 /* Clear entry and flush tran 1569 gmap_idte_one(asce, raddr); 1570 r1t[i] = _REGION1_ENTRY_EMPTY 1571 /* Free region 2 table */ 1572 page = phys_to_page(r2t); 1573 list_del(&page->lru); 1574 __free_pages(page, CRST_ALLOC 1575 } 1576 } 1577 1578 /** 1579 * gmap_unshadow - remove a shadow page table 1580 * @sg: pointer to the shadow guest address s 1581 * 1582 * Called with sg->guest_table_lock 1583 */ 1584 static void gmap_unshadow(struct gmap *sg) 1585 { 1586 unsigned long *table; 1587 1588 BUG_ON(!gmap_is_shadow(sg)); 1589 if (sg->removed) 1590 return; 1591 sg->removed = 1; 1592 gmap_call_notifier(sg, 0, -1UL); 1593 gmap_flush_tlb(sg); 1594 table = __va(sg->asce & _ASCE_ORIGIN) 1595 switch (sg->asce & _ASCE_TYPE_MASK) { 1596 case _ASCE_TYPE_REGION1: 1597 __gmap_unshadow_r1t(sg, 0, ta 1598 break; 1599 case _ASCE_TYPE_REGION2: 1600 __gmap_unshadow_r2t(sg, 0, ta 1601 break; 1602 case _ASCE_TYPE_REGION3: 1603 __gmap_unshadow_r3t(sg, 0, ta 1604 break; 1605 case _ASCE_TYPE_SEGMENT: 1606 __gmap_unshadow_sgt(sg, 0, ta 1607 break; 1608 } 1609 } 1610 1611 /** 1612 * gmap_find_shadow - find a specific asce in 1613 * @parent: pointer to the parent gmap 1614 * @asce: ASCE for which the shadow table is 1615 * @edat_level: edat level to be used for the 1616 * 1617 * Returns the pointer to a gmap if a shadow 1618 * already available, ERR_PTR(-EAGAIN) if ano 1619 * otherwise NULL 1620 */ 1621 static struct gmap *gmap_find_shadow(struct g 1622 int edat 1623 { 1624 struct gmap *sg; 1625 1626 list_for_each_entry(sg, &parent->chil 1627 if (sg->orig_asce != asce || 1628 sg->removed) 1629 continue; 1630 if (!sg->initialized) 1631 return ERR_PTR(-EAGAI 1632 refcount_inc(&sg->ref_count); 1633 return sg; 1634 } 1635 return NULL; 1636 } 1637 1638 /** 1639 * gmap_shadow_valid - check if a shadow gues 1640 * given properties and i 1641 * @sg: pointer to the shadow guest address s 1642 * @asce: ASCE for which the shadow table is 1643 * @edat_level: edat level to be used for the 1644 * 1645 * Returns 1 if the gmap shadow is still vali 1646 * properties, the caller can continue using 1647 * caller has to request a new shadow gmap in 1648 * 1649 */ 1650 int gmap_shadow_valid(struct gmap *sg, unsign 1651 { 1652 if (sg->removed) 1653 return 0; 1654 return sg->orig_asce == asce && sg->e 1655 } 1656 EXPORT_SYMBOL_GPL(gmap_shadow_valid); 1657 1658 /** 1659 * gmap_shadow - create/find a shadow guest a 1660 * @parent: pointer to the parent gmap 1661 * @asce: ASCE for which the shadow table is 1662 * @edat_level: edat level to be used for the 1663 * 1664 * The pages of the top level page table refe 1665 * will be set to read-only and marked in the 1666 * The shadow table will be removed automatic 1667 * PTE mapping for the source table. 1668 * 1669 * Returns a guest address space structure, E 1670 * ERR_PTR(-EAGAIN) if the caller has to retr 1671 * parent gmap table could not be protected. 1672 */ 1673 struct gmap *gmap_shadow(struct gmap *parent, 1674 int edat_level) 1675 { 1676 struct gmap *sg, *new; 1677 unsigned long limit; 1678 int rc; 1679 1680 BUG_ON(parent->mm->context.allow_gmap 1681 BUG_ON(gmap_is_shadow(parent)); 1682 spin_lock(&parent->shadow_lock); 1683 sg = gmap_find_shadow(parent, asce, e 1684 spin_unlock(&parent->shadow_lock); 1685 if (sg) 1686 return sg; 1687 /* Create a new shadow gmap */ 1688 limit = -1UL >> (33 - (((asce & _ASCE 1689 if (asce & _ASCE_REAL_SPACE) 1690 limit = -1UL; 1691 new = gmap_alloc(limit); 1692 if (!new) 1693 return ERR_PTR(-ENOMEM); 1694 new->mm = parent->mm; 1695 new->parent = gmap_get(parent); 1696 new->private = parent->private; 1697 new->orig_asce = asce; 1698 new->edat_level = edat_level; 1699 new->initialized = false; 1700 spin_lock(&parent->shadow_lock); 1701 /* Recheck if another CPU created the 1702 sg = gmap_find_shadow(parent, asce, e 1703 if (sg) { 1704 spin_unlock(&parent->shadow_l 1705 gmap_free(new); 1706 return sg; 1707 } 1708 if (asce & _ASCE_REAL_SPACE) { 1709 /* only allow one real-space 1710 list_for_each_entry(sg, &pare 1711 if (sg->orig_asce & _ 1712 spin_lock(&sg 1713 gmap_unshadow 1714 spin_unlock(& 1715 list_del(&sg- 1716 gmap_put(sg); 1717 break; 1718 } 1719 } 1720 } 1721 refcount_set(&new->ref_count, 2); 1722 list_add(&new->list, &parent->childre 1723 if (asce & _ASCE_REAL_SPACE) { 1724 /* nothing to protect, return 1725 new->initialized = true; 1726 spin_unlock(&parent->shadow_l 1727 return new; 1728 } 1729 spin_unlock(&parent->shadow_lock); 1730 /* protect after insertion, so it wil 1731 mmap_read_lock(parent->mm); 1732 rc = gmap_protect_range(parent, asce 1733 ((asce & _ASC 1734 PROT_READ, GM 1735 mmap_read_unlock(parent->mm); 1736 spin_lock(&parent->shadow_lock); 1737 new->initialized = true; 1738 if (rc) { 1739 list_del(&new->list); 1740 gmap_free(new); 1741 new = ERR_PTR(rc); 1742 } 1743 spin_unlock(&parent->shadow_lock); 1744 return new; 1745 } 1746 EXPORT_SYMBOL_GPL(gmap_shadow); 1747 1748 /** 1749 * gmap_shadow_r2t - create an empty shadow r 1750 * @sg: pointer to the shadow guest address s 1751 * @saddr: faulting address in the shadow gma 1752 * @r2t: parent gmap address of the region 2 1753 * @fake: r2t references contiguous guest mem 1754 * 1755 * The r2t parameter specifies the address of 1756 * four pages of the source table are made re 1757 * address space. A write to the source table 1758 * remove the shadow r2 table and all of its 1759 * 1760 * Returns 0 if successfully shadowed or alre 1761 * shadow table structure is incomplete, -ENO 1762 * -EFAULT if an address in the parent gmap c 1763 * 1764 * Called with sg->mm->mmap_lock in read. 1765 */ 1766 int gmap_shadow_r2t(struct gmap *sg, unsigned 1767 int fake) 1768 { 1769 unsigned long raddr, origin, offset, 1770 unsigned long *table; 1771 phys_addr_t s_r2t; 1772 struct page *page; 1773 int rc; 1774 1775 BUG_ON(!gmap_is_shadow(sg)); 1776 /* Allocate a shadow region second ta 1777 page = gmap_alloc_crst(); 1778 if (!page) 1779 return -ENOMEM; 1780 page->index = r2t & _REGION_ENTRY_ORI 1781 if (fake) 1782 page->index |= GMAP_SHADOW_FA 1783 s_r2t = page_to_phys(page); 1784 /* Install shadow region second table 1785 spin_lock(&sg->guest_table_lock); 1786 table = gmap_table_walk(sg, saddr, 4) 1787 if (!table) { 1788 rc = -EAGAIN; /* Ra 1789 goto out_free; 1790 } 1791 if (!(*table & _REGION_ENTRY_INVALID) 1792 rc = 0; /* Al 1793 goto out_free; 1794 } else if (*table & _REGION_ENTRY_ORI 1795 rc = -EAGAIN; /* Ra 1796 goto out_free; 1797 } 1798 crst_table_init(__va(s_r2t), _REGION2 1799 /* mark as invalid as long as the par 1800 *table = s_r2t | _REGION_ENTRY_LENGTH 1801 _REGION_ENTRY_TYPE_R1 | _REG 1802 if (sg->edat_level >= 1) 1803 *table |= (r2t & _REGION_ENTR 1804 list_add(&page->lru, &sg->crst_list); 1805 if (fake) { 1806 /* nothing to protect for fak 1807 *table &= ~_REGION_ENTRY_INVA 1808 spin_unlock(&sg->guest_table_ 1809 return 0; 1810 } 1811 spin_unlock(&sg->guest_table_lock); 1812 /* Make r2t read-only in parent gmap 1813 raddr = (saddr & _REGION1_MASK) | _SH 1814 origin = r2t & _REGION_ENTRY_ORIGIN; 1815 offset = ((r2t & _REGION_ENTRY_OFFSET 1816 len = ((r2t & _REGION_ENTRY_LENGTH) + 1817 rc = gmap_protect_rmap(sg, raddr, ori 1818 spin_lock(&sg->guest_table_lock); 1819 if (!rc) { 1820 table = gmap_table_walk(sg, s 1821 if (!table || (*table & _REGI 1822 rc = -EAGAIN; 1823 else 1824 *table &= ~_REGION_EN 1825 } else { 1826 gmap_unshadow_r2t(sg, raddr); 1827 } 1828 spin_unlock(&sg->guest_table_lock); 1829 return rc; 1830 out_free: 1831 spin_unlock(&sg->guest_table_lock); 1832 __free_pages(page, CRST_ALLOC_ORDER); 1833 return rc; 1834 } 1835 EXPORT_SYMBOL_GPL(gmap_shadow_r2t); 1836 1837 /** 1838 * gmap_shadow_r3t - create a shadow region 3 1839 * @sg: pointer to the shadow guest address s 1840 * @saddr: faulting address in the shadow gma 1841 * @r3t: parent gmap address of the region 3 1842 * @fake: r3t references contiguous guest mem 1843 * 1844 * Returns 0 if successfully shadowed or alre 1845 * shadow table structure is incomplete, -ENO 1846 * -EFAULT if an address in the parent gmap c 1847 * 1848 * Called with sg->mm->mmap_lock in read. 1849 */ 1850 int gmap_shadow_r3t(struct gmap *sg, unsigned 1851 int fake) 1852 { 1853 unsigned long raddr, origin, offset, 1854 unsigned long *table; 1855 phys_addr_t s_r3t; 1856 struct page *page; 1857 int rc; 1858 1859 BUG_ON(!gmap_is_shadow(sg)); 1860 /* Allocate a shadow region second ta 1861 page = gmap_alloc_crst(); 1862 if (!page) 1863 return -ENOMEM; 1864 page->index = r3t & _REGION_ENTRY_ORI 1865 if (fake) 1866 page->index |= GMAP_SHADOW_FA 1867 s_r3t = page_to_phys(page); 1868 /* Install shadow region second table 1869 spin_lock(&sg->guest_table_lock); 1870 table = gmap_table_walk(sg, saddr, 3) 1871 if (!table) { 1872 rc = -EAGAIN; /* Ra 1873 goto out_free; 1874 } 1875 if (!(*table & _REGION_ENTRY_INVALID) 1876 rc = 0; /* Al 1877 goto out_free; 1878 } else if (*table & _REGION_ENTRY_ORI 1879 rc = -EAGAIN; /* Ra 1880 goto out_free; 1881 } 1882 crst_table_init(__va(s_r3t), _REGION3 1883 /* mark as invalid as long as the par 1884 *table = s_r3t | _REGION_ENTRY_LENGTH 1885 _REGION_ENTRY_TYPE_R2 | _REG 1886 if (sg->edat_level >= 1) 1887 *table |= (r3t & _REGION_ENTR 1888 list_add(&page->lru, &sg->crst_list); 1889 if (fake) { 1890 /* nothing to protect for fak 1891 *table &= ~_REGION_ENTRY_INVA 1892 spin_unlock(&sg->guest_table_ 1893 return 0; 1894 } 1895 spin_unlock(&sg->guest_table_lock); 1896 /* Make r3t read-only in parent gmap 1897 raddr = (saddr & _REGION2_MASK) | _SH 1898 origin = r3t & _REGION_ENTRY_ORIGIN; 1899 offset = ((r3t & _REGION_ENTRY_OFFSET 1900 len = ((r3t & _REGION_ENTRY_LENGTH) + 1901 rc = gmap_protect_rmap(sg, raddr, ori 1902 spin_lock(&sg->guest_table_lock); 1903 if (!rc) { 1904 table = gmap_table_walk(sg, s 1905 if (!table || (*table & _REGI 1906 rc = -EAGAIN; 1907 else 1908 *table &= ~_REGION_EN 1909 } else { 1910 gmap_unshadow_r3t(sg, raddr); 1911 } 1912 spin_unlock(&sg->guest_table_lock); 1913 return rc; 1914 out_free: 1915 spin_unlock(&sg->guest_table_lock); 1916 __free_pages(page, CRST_ALLOC_ORDER); 1917 return rc; 1918 } 1919 EXPORT_SYMBOL_GPL(gmap_shadow_r3t); 1920 1921 /** 1922 * gmap_shadow_sgt - create a shadow segment 1923 * @sg: pointer to the shadow guest address s 1924 * @saddr: faulting address in the shadow gma 1925 * @sgt: parent gmap address of the segment t 1926 * @fake: sgt references contiguous guest mem 1927 * 1928 * Returns: 0 if successfully shadowed or alr 1929 * shadow table structure is incomplete, -ENO 1930 * -EFAULT if an address in the parent gmap c 1931 * 1932 * Called with sg->mm->mmap_lock in read. 1933 */ 1934 int gmap_shadow_sgt(struct gmap *sg, unsigned 1935 int fake) 1936 { 1937 unsigned long raddr, origin, offset, 1938 unsigned long *table; 1939 phys_addr_t s_sgt; 1940 struct page *page; 1941 int rc; 1942 1943 BUG_ON(!gmap_is_shadow(sg) || (sgt & 1944 /* Allocate a shadow segment table */ 1945 page = gmap_alloc_crst(); 1946 if (!page) 1947 return -ENOMEM; 1948 page->index = sgt & _REGION_ENTRY_ORI 1949 if (fake) 1950 page->index |= GMAP_SHADOW_FA 1951 s_sgt = page_to_phys(page); 1952 /* Install shadow region second table 1953 spin_lock(&sg->guest_table_lock); 1954 table = gmap_table_walk(sg, saddr, 2) 1955 if (!table) { 1956 rc = -EAGAIN; /* Ra 1957 goto out_free; 1958 } 1959 if (!(*table & _REGION_ENTRY_INVALID) 1960 rc = 0; /* Al 1961 goto out_free; 1962 } else if (*table & _REGION_ENTRY_ORI 1963 rc = -EAGAIN; /* Ra 1964 goto out_free; 1965 } 1966 crst_table_init(__va(s_sgt), _SEGMENT 1967 /* mark as invalid as long as the par 1968 *table = s_sgt | _REGION_ENTRY_LENGTH 1969 _REGION_ENTRY_TYPE_R3 | _REG 1970 if (sg->edat_level >= 1) 1971 *table |= sgt & _REGION_ENTRY 1972 list_add(&page->lru, &sg->crst_list); 1973 if (fake) { 1974 /* nothing to protect for fak 1975 *table &= ~_REGION_ENTRY_INVA 1976 spin_unlock(&sg->guest_table_ 1977 return 0; 1978 } 1979 spin_unlock(&sg->guest_table_lock); 1980 /* Make sgt read-only in parent gmap 1981 raddr = (saddr & _REGION3_MASK) | _SH 1982 origin = sgt & _REGION_ENTRY_ORIGIN; 1983 offset = ((sgt & _REGION_ENTRY_OFFSET 1984 len = ((sgt & _REGION_ENTRY_LENGTH) + 1985 rc = gmap_protect_rmap(sg, raddr, ori 1986 spin_lock(&sg->guest_table_lock); 1987 if (!rc) { 1988 table = gmap_table_walk(sg, s 1989 if (!table || (*table & _REGI 1990 rc = -EAGAIN; 1991 else 1992 *table &= ~_REGION_EN 1993 } else { 1994 gmap_unshadow_sgt(sg, raddr); 1995 } 1996 spin_unlock(&sg->guest_table_lock); 1997 return rc; 1998 out_free: 1999 spin_unlock(&sg->guest_table_lock); 2000 __free_pages(page, CRST_ALLOC_ORDER); 2001 return rc; 2002 } 2003 EXPORT_SYMBOL_GPL(gmap_shadow_sgt); 2004 2005 /** 2006 * gmap_shadow_pgt_lookup - find a shadow pag 2007 * @sg: pointer to the shadow guest address s 2008 * @saddr: the address in the shadow aguest a 2009 * @pgt: parent gmap address of the page tabl 2010 * @dat_protection: if the pgtable is marked 2011 * @fake: pgt references contiguous guest mem 2012 * 2013 * Returns 0 if the shadow page table was fou 2014 * table was not found. 2015 * 2016 * Called with sg->mm->mmap_lock in read. 2017 */ 2018 int gmap_shadow_pgt_lookup(struct gmap *sg, u 2019 unsigned long *pgt 2020 int *fake) 2021 { 2022 unsigned long *table; 2023 struct page *page; 2024 int rc; 2025 2026 BUG_ON(!gmap_is_shadow(sg)); 2027 spin_lock(&sg->guest_table_lock); 2028 table = gmap_table_walk(sg, saddr, 1) 2029 if (table && !(*table & _SEGMENT_ENTR 2030 /* Shadow page tables are ful 2031 page = pfn_to_page(*table >> 2032 *pgt = page->index & ~GMAP_SH 2033 *dat_protection = !!(*table & 2034 *fake = !!(page->index & GMAP 2035 rc = 0; 2036 } else { 2037 rc = -EAGAIN; 2038 } 2039 spin_unlock(&sg->guest_table_lock); 2040 return rc; 2041 2042 } 2043 EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup); 2044 2045 /** 2046 * gmap_shadow_pgt - instantiate a shadow pag 2047 * @sg: pointer to the shadow guest address s 2048 * @saddr: faulting address in the shadow gma 2049 * @pgt: parent gmap address of the page tabl 2050 * @fake: pgt references contiguous guest mem 2051 * 2052 * Returns 0 if successfully shadowed or alre 2053 * shadow table structure is incomplete, -ENO 2054 * -EFAULT if an address in the parent gmap c 2055 * 2056 * Called with gmap->mm->mmap_lock in read 2057 */ 2058 int gmap_shadow_pgt(struct gmap *sg, unsigned 2059 int fake) 2060 { 2061 unsigned long raddr, origin; 2062 unsigned long *table; 2063 struct ptdesc *ptdesc; 2064 phys_addr_t s_pgt; 2065 int rc; 2066 2067 BUG_ON(!gmap_is_shadow(sg) || (pgt & 2068 /* Allocate a shadow page table */ 2069 ptdesc = page_table_alloc_pgste(sg->m 2070 if (!ptdesc) 2071 return -ENOMEM; 2072 ptdesc->pt_index = pgt & _SEGMENT_ENT 2073 if (fake) 2074 ptdesc->pt_index |= GMAP_SHAD 2075 s_pgt = page_to_phys(ptdesc_page(ptde 2076 /* Install shadow page table */ 2077 spin_lock(&sg->guest_table_lock); 2078 table = gmap_table_walk(sg, saddr, 1) 2079 if (!table) { 2080 rc = -EAGAIN; /* Ra 2081 goto out_free; 2082 } 2083 if (!(*table & _SEGMENT_ENTRY_INVALID 2084 rc = 0; /* Al 2085 goto out_free; 2086 } else if (*table & _SEGMENT_ENTRY_OR 2087 rc = -EAGAIN; /* Ra 2088 goto out_free; 2089 } 2090 /* mark as invalid as long as the par 2091 *table = (unsigned long) s_pgt | _SEG 2092 (pgt & _SEGMENT_ENTRY_PROTEC 2093 list_add(&ptdesc->pt_list, &sg->pt_li 2094 if (fake) { 2095 /* nothing to protect for fak 2096 *table &= ~_SEGMENT_ENTRY_INV 2097 spin_unlock(&sg->guest_table_ 2098 return 0; 2099 } 2100 spin_unlock(&sg->guest_table_lock); 2101 /* Make pgt read-only in parent gmap 2102 raddr = (saddr & _SEGMENT_MASK) | _SH 2103 origin = pgt & _SEGMENT_ENTRY_ORIGIN 2104 rc = gmap_protect_rmap(sg, raddr, ori 2105 spin_lock(&sg->guest_table_lock); 2106 if (!rc) { 2107 table = gmap_table_walk(sg, s 2108 if (!table || (*table & _SEGM 2109 rc = -EAGAIN; 2110 else 2111 *table &= ~_SEGMENT_E 2112 } else { 2113 gmap_unshadow_pgt(sg, raddr); 2114 } 2115 spin_unlock(&sg->guest_table_lock); 2116 return rc; 2117 out_free: 2118 spin_unlock(&sg->guest_table_lock); 2119 page_table_free_pgste(ptdesc); 2120 return rc; 2121 2122 } 2123 EXPORT_SYMBOL_GPL(gmap_shadow_pgt); 2124 2125 /** 2126 * gmap_shadow_page - create a shadow page ma 2127 * @sg: pointer to the shadow guest address s 2128 * @saddr: faulting address in the shadow gma 2129 * @pte: pte in parent gmap address space to 2130 * 2131 * Returns 0 if successfully shadowed or alre 2132 * shadow table structure is incomplete, -ENO 2133 * -EFAULT if an address in the parent gmap c 2134 * 2135 * Called with sg->mm->mmap_lock in read. 2136 */ 2137 int gmap_shadow_page(struct gmap *sg, unsigne 2138 { 2139 struct gmap *parent; 2140 struct gmap_rmap *rmap; 2141 unsigned long vmaddr, paddr; 2142 spinlock_t *ptl; 2143 pte_t *sptep, *tptep; 2144 int prot; 2145 int rc; 2146 2147 BUG_ON(!gmap_is_shadow(sg)); 2148 parent = sg->parent; 2149 prot = (pte_val(pte) & _PAGE_PROTECT) 2150 2151 rmap = kzalloc(sizeof(*rmap), GFP_KER 2152 if (!rmap) 2153 return -ENOMEM; 2154 rmap->raddr = (saddr & PAGE_MASK) | _ 2155 2156 while (1) { 2157 paddr = pte_val(pte) & PAGE_M 2158 vmaddr = __gmap_translate(par 2159 if (IS_ERR_VALUE(vmaddr)) { 2160 rc = vmaddr; 2161 break; 2162 } 2163 rc = radix_tree_preload(GFP_K 2164 if (rc) 2165 break; 2166 rc = -EAGAIN; 2167 sptep = gmap_pte_op_walk(pare 2168 if (sptep) { 2169 spin_lock(&sg->guest_ 2170 /* Get page table poi 2171 tptep = (pte_t *) gma 2172 if (!tptep) { 2173 spin_unlock(& 2174 gmap_pte_op_e 2175 radix_tree_pr 2176 break; 2177 } 2178 rc = ptep_shadow_pte( 2179 if (rc > 0) { 2180 /* Success an 2181 gmap_insert_r 2182 rmap = NULL; 2183 rc = 0; 2184 } 2185 gmap_pte_op_end(sptep 2186 spin_unlock(&sg->gues 2187 } 2188 radix_tree_preload_end(); 2189 if (!rc) 2190 break; 2191 rc = gmap_pte_op_fixup(parent 2192 if (rc) 2193 break; 2194 } 2195 kfree(rmap); 2196 return rc; 2197 } 2198 EXPORT_SYMBOL_GPL(gmap_shadow_page); 2199 2200 /* 2201 * gmap_shadow_notify - handle notifications 2202 * 2203 * Called with sg->parent->shadow_lock. 2204 */ 2205 static void gmap_shadow_notify(struct gmap *s 2206 unsigned long 2207 { 2208 struct gmap_rmap *rmap, *rnext, *head 2209 unsigned long start, end, bits, raddr 2210 2211 BUG_ON(!gmap_is_shadow(sg)); 2212 2213 spin_lock(&sg->guest_table_lock); 2214 if (sg->removed) { 2215 spin_unlock(&sg->guest_table_ 2216 return; 2217 } 2218 /* Check for top level table */ 2219 start = sg->orig_asce & _ASCE_ORIGIN; 2220 end = start + ((sg->orig_asce & _ASCE 2221 if (!(sg->orig_asce & _ASCE_REAL_SPAC 2222 gaddr < end) { 2223 /* The complete shadow table 2224 gmap_unshadow(sg); 2225 spin_unlock(&sg->guest_table_ 2226 list_del(&sg->list); 2227 gmap_put(sg); 2228 return; 2229 } 2230 /* Remove the page table tree from on 2231 head = radix_tree_delete(&sg->host_to 2232 gmap_for_each_rmap_safe(rmap, rnext, 2233 bits = rmap->raddr & _SHADOW_ 2234 raddr = rmap->raddr ^ bits; 2235 switch (bits) { 2236 case _SHADOW_RMAP_REGION1: 2237 gmap_unshadow_r2t(sg, 2238 break; 2239 case _SHADOW_RMAP_REGION2: 2240 gmap_unshadow_r3t(sg, 2241 break; 2242 case _SHADOW_RMAP_REGION3: 2243 gmap_unshadow_sgt(sg, 2244 break; 2245 case _SHADOW_RMAP_SEGMENT: 2246 gmap_unshadow_pgt(sg, 2247 break; 2248 case _SHADOW_RMAP_PGTABLE: 2249 gmap_unshadow_page(sg 2250 break; 2251 } 2252 kfree(rmap); 2253 } 2254 spin_unlock(&sg->guest_table_lock); 2255 } 2256 2257 /** 2258 * ptep_notify - call all invalidation callba 2259 * @mm: pointer to the process mm_struct 2260 * @vmaddr: virtual address in the process ad 2261 * @pte: pointer to the page table entry 2262 * @bits: bits from the pgste that caused the 2263 * 2264 * This function is assumed to be called with 2265 * for the pte to notify. 2266 */ 2267 void ptep_notify(struct mm_struct *mm, unsign 2268 pte_t *pte, unsigned long bi 2269 { 2270 unsigned long offset, gaddr = 0; 2271 unsigned long *table; 2272 struct gmap *gmap, *sg, *next; 2273 2274 offset = ((unsigned long) pte) & (255 2275 offset = offset * (PAGE_SIZE / sizeof 2276 rcu_read_lock(); 2277 list_for_each_entry_rcu(gmap, &mm->co 2278 spin_lock(&gmap->guest_table_ 2279 table = radix_tree_lookup(&gm 2280 vma 2281 if (table) 2282 gaddr = __gmap_segmen 2283 spin_unlock(&gmap->guest_tabl 2284 if (!table) 2285 continue; 2286 2287 if (!list_empty(&gmap->childr 2288 spin_lock(&gmap->shad 2289 list_for_each_entry_s 2290 2291 gmap_shadow_n 2292 spin_unlock(&gmap->sh 2293 } 2294 if (bits & PGSTE_IN_BIT) 2295 gmap_call_notifier(gm 2296 } 2297 rcu_read_unlock(); 2298 } 2299 EXPORT_SYMBOL_GPL(ptep_notify); 2300 2301 static void pmdp_notify_gmap(struct gmap *gma 2302 unsigned long ga 2303 { 2304 set_pmd(pmdp, clear_pmd_bit(*pmdp, __ 2305 gmap_call_notifier(gmap, gaddr, gaddr 2306 } 2307 2308 /** 2309 * gmap_pmdp_xchg - exchange a gmap pmd with 2310 * @gmap: pointer to the guest address space 2311 * @pmdp: pointer to the pmd entry 2312 * @new: replacement entry 2313 * @gaddr: the affected guest address 2314 * 2315 * This function is assumed to be called with 2316 * held. 2317 */ 2318 static void gmap_pmdp_xchg(struct gmap *gmap, 2319 unsigned long gadd 2320 { 2321 gaddr &= HPAGE_MASK; 2322 pmdp_notify_gmap(gmap, pmdp, gaddr); 2323 new = clear_pmd_bit(new, __pgprot(_SE 2324 if (MACHINE_HAS_TLB_GUEST) 2325 __pmdp_idte(gaddr, (pmd_t *)p 2326 IDTE_GLOBAL); 2327 else if (MACHINE_HAS_IDTE) 2328 __pmdp_idte(gaddr, (pmd_t *)p 2329 else 2330 __pmdp_csp(pmdp); 2331 set_pmd(pmdp, new); 2332 } 2333 2334 static void gmap_pmdp_clear(struct mm_struct 2335 int purge) 2336 { 2337 pmd_t *pmdp; 2338 struct gmap *gmap; 2339 unsigned long gaddr; 2340 2341 rcu_read_lock(); 2342 list_for_each_entry_rcu(gmap, &mm->co 2343 spin_lock(&gmap->guest_table_ 2344 pmdp = (pmd_t *)radix_tree_de 2345 2346 if (pmdp) { 2347 gaddr = __gmap_segmen 2348 pmdp_notify_gmap(gmap 2349 WARN_ON(pmd_val(*pmdp 2350 2351 if (purge) 2352 __pmdp_csp(pm 2353 set_pmd(pmdp, __pmd(_ 2354 } 2355 spin_unlock(&gmap->guest_tabl 2356 } 2357 rcu_read_unlock(); 2358 } 2359 2360 /** 2361 * gmap_pmdp_invalidate - invalidate all affe 2362 * flushing 2363 * @mm: pointer to the process mm_struct 2364 * @vmaddr: virtual address in the process ad 2365 */ 2366 void gmap_pmdp_invalidate(struct mm_struct *m 2367 { 2368 gmap_pmdp_clear(mm, vmaddr, 0); 2369 } 2370 EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate); 2371 2372 /** 2373 * gmap_pmdp_csp - csp all affected guest pmd 2374 * @mm: pointer to the process mm_struct 2375 * @vmaddr: virtual address in the process ad 2376 */ 2377 void gmap_pmdp_csp(struct mm_struct *mm, unsi 2378 { 2379 gmap_pmdp_clear(mm, vmaddr, 1); 2380 } 2381 EXPORT_SYMBOL_GPL(gmap_pmdp_csp); 2382 2383 /** 2384 * gmap_pmdp_idte_local - invalidate and clea 2385 * @mm: pointer to the process mm_struct 2386 * @vmaddr: virtual address in the process ad 2387 */ 2388 void gmap_pmdp_idte_local(struct mm_struct *m 2389 { 2390 unsigned long *entry, gaddr; 2391 struct gmap *gmap; 2392 pmd_t *pmdp; 2393 2394 rcu_read_lock(); 2395 list_for_each_entry_rcu(gmap, &mm->co 2396 spin_lock(&gmap->guest_table_ 2397 entry = radix_tree_delete(&gm 2398 vma 2399 if (entry) { 2400 pmdp = (pmd_t *)entry 2401 gaddr = __gmap_segmen 2402 pmdp_notify_gmap(gmap 2403 WARN_ON(*entry & ~(_S 2404 _S 2405 if (MACHINE_HAS_TLB_G 2406 __pmdp_idte(g 2407 g 2408 else if (MACHINE_HAS_ 2409 __pmdp_idte(g 2410 *entry = _SEGMENT_ENT 2411 } 2412 spin_unlock(&gmap->guest_tabl 2413 } 2414 rcu_read_unlock(); 2415 } 2416 EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local); 2417 2418 /** 2419 * gmap_pmdp_idte_global - invalidate and cle 2420 * @mm: pointer to the process mm_struct 2421 * @vmaddr: virtual address in the process ad 2422 */ 2423 void gmap_pmdp_idte_global(struct mm_struct * 2424 { 2425 unsigned long *entry, gaddr; 2426 struct gmap *gmap; 2427 pmd_t *pmdp; 2428 2429 rcu_read_lock(); 2430 list_for_each_entry_rcu(gmap, &mm->co 2431 spin_lock(&gmap->guest_table_ 2432 entry = radix_tree_delete(&gm 2433 vma 2434 if (entry) { 2435 pmdp = (pmd_t *)entry 2436 gaddr = __gmap_segmen 2437 pmdp_notify_gmap(gmap 2438 WARN_ON(*entry & ~(_S 2439 _S 2440 if (MACHINE_HAS_TLB_G 2441 __pmdp_idte(g 2442 g 2443 else if (MACHINE_HAS_ 2444 __pmdp_idte(g 2445 else 2446 __pmdp_csp(pm 2447 *entry = _SEGMENT_ENT 2448 } 2449 spin_unlock(&gmap->guest_tabl 2450 } 2451 rcu_read_unlock(); 2452 } 2453 EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global); 2454 2455 /** 2456 * gmap_test_and_clear_dirty_pmd - test and r 2457 * @gmap: pointer to guest address space 2458 * @pmdp: pointer to the pmd to be tested 2459 * @gaddr: virtual address in the guest addre 2460 * 2461 * This function is assumed to be called with 2462 * held. 2463 */ 2464 static bool gmap_test_and_clear_dirty_pmd(str 2465 uns 2466 { 2467 if (pmd_val(*pmdp) & _SEGMENT_ENTRY_I 2468 return false; 2469 2470 /* Already protected memory, which di 2471 if (pmd_val(*pmdp) & _SEGMENT_ENTRY_P 2472 !(pmd_val(*pmdp) & _SEGMENT_ENTRY 2473 return false; 2474 2475 /* Clear UC indication and reset prot 2476 set_pmd(pmdp, clear_pmd_bit(*pmdp, __ 2477 gmap_protect_pmd(gmap, gaddr, pmdp, P 2478 return true; 2479 } 2480 2481 /** 2482 * gmap_sync_dirty_log_pmd - set bitmap based 2483 * @gmap: pointer to guest address space 2484 * @bitmap: dirty bitmap for this pmd 2485 * @gaddr: virtual address in the guest addre 2486 * @vmaddr: virtual address in the host addre 2487 * 2488 * This function is assumed to be called with 2489 * held. 2490 */ 2491 void gmap_sync_dirty_log_pmd(struct gmap *gma 2492 unsigned long ga 2493 { 2494 int i; 2495 pmd_t *pmdp; 2496 pte_t *ptep; 2497 spinlock_t *ptl; 2498 2499 pmdp = gmap_pmd_op_walk(gmap, gaddr); 2500 if (!pmdp) 2501 return; 2502 2503 if (pmd_leaf(*pmdp)) { 2504 if (gmap_test_and_clear_dirty 2505 bitmap_fill(bitmap, _ 2506 } else { 2507 for (i = 0; i < _PAGE_ENTRIES 2508 ptep = pte_alloc_map_ 2509 if (!ptep) 2510 continue; 2511 if (ptep_test_and_cle 2512 set_bit(i, bi 2513 pte_unmap_unlock(ptep 2514 } 2515 } 2516 gmap_pmd_op_end(gmap, pmdp); 2517 } 2518 EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd); 2519 2520 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 2521 static int thp_split_walk_pmd_entry(pmd_t *pm 2522 unsigned 2523 { 2524 struct vm_area_struct *vma = walk->vm 2525 2526 split_huge_pmd(vma, pmd, addr); 2527 return 0; 2528 } 2529 2530 static const struct mm_walk_ops thp_split_wal 2531 .pmd_entry = thp_split_walk_pmd_ 2532 .walk_lock = PGWALK_WRLOCK_VERIF 2533 }; 2534 2535 static inline void thp_split_mm(struct mm_str 2536 { 2537 struct vm_area_struct *vma; 2538 VMA_ITERATOR(vmi, mm, 0); 2539 2540 for_each_vma(vmi, vma) { 2541 vm_flags_mod(vma, VM_NOHUGEPA 2542 walk_page_vma(vma, &thp_split 2543 } 2544 mm->def_flags |= VM_NOHUGEPAGE; 2545 } 2546 #else 2547 static inline void thp_split_mm(struct mm_str 2548 { 2549 } 2550 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 2551 2552 /* 2553 * switch on pgstes for its userspace process 2554 */ 2555 int s390_enable_sie(void) 2556 { 2557 struct mm_struct *mm = current->mm; 2558 2559 /* Do we have pgstes? if yes, we are 2560 if (mm_has_pgste(mm)) 2561 return 0; 2562 /* Fail if the page tables are 2K */ 2563 if (!mm_alloc_pgste(mm)) 2564 return -EINVAL; 2565 mmap_write_lock(mm); 2566 mm->context.has_pgste = 1; 2567 /* split thp mappings and disable thp 2568 thp_split_mm(mm); 2569 mmap_write_unlock(mm); 2570 return 0; 2571 } 2572 EXPORT_SYMBOL_GPL(s390_enable_sie); 2573 2574 static int find_zeropage_pte_entry(pte_t *pte 2575 unsigned l 2576 { 2577 unsigned long *found_addr = walk->pri 2578 2579 /* Return 1 of the page is a zeropage 2580 if (is_zero_pfn(pte_pfn(*pte))) { 2581 /* 2582 * Shared zeropage in e.g., a 2583 * right thing and likely don 2584 * currently only works in CO 2585 * mm_forbids_zeropage() is c 2586 */ 2587 if (!is_cow_mapping(walk->vma 2588 return -EFAULT; 2589 2590 *found_addr = addr; 2591 return 1; 2592 } 2593 return 0; 2594 } 2595 2596 static const struct mm_walk_ops find_zeropage 2597 .pte_entry = find_zeropage_pte_e 2598 .walk_lock = PGWALK_WRLOCK, 2599 }; 2600 2601 /* 2602 * Unshare all shared zeropages, replacing th 2603 * we cannot simply zap all shared zeropages, 2604 * trigger unexpected userfaultfd missing eve 2605 * 2606 * This must be called after mm->context.allo 2607 * set to 0, to avoid future mappings of shar 2608 * 2609 * mm contracts with s390, that even if mm we 2610 * and racing with walk_page_range_vma() call 2611 * would fail, it will never insert a page ta 2612 * pages once mm_forbids_zeropage(mm) i.e. 2613 * mm->context.allow_cow_sharing is set to 0. 2614 */ 2615 static int __s390_unshare_zeropages(struct mm 2616 { 2617 struct vm_area_struct *vma; 2618 VMA_ITERATOR(vmi, mm, 0); 2619 unsigned long addr; 2620 vm_fault_t fault; 2621 int rc; 2622 2623 for_each_vma(vmi, vma) { 2624 /* 2625 * We could only look at COW 2626 * proof to catch unexpected 2627 * fail. 2628 */ 2629 if ((vma->vm_flags & VM_PFNMA 2630 continue; 2631 addr = vma->vm_start; 2632 2633 retry: 2634 rc = walk_page_range_vma(vma, 2635 &fin 2636 if (rc < 0) 2637 return rc; 2638 else if (!rc) 2639 continue; 2640 2641 /* addr was updated by find_z 2642 fault = handle_mm_fault(vma, 2643 FAULT 2644 NULL) 2645 if (fault & VM_FAULT_OOM) 2646 return -ENOMEM; 2647 /* 2648 * See break_ksm(): even afte 2649 * must start the lookup from 2650 * handle_mm_fault() may back 2651 * 2652 * VM_FAULT_SIGBUS and VM_FAU 2653 * maybe they could trigger i 2654 * truncation. In that case, 2655 * and we can simply retry an 2656 */ 2657 cond_resched(); 2658 goto retry; 2659 } 2660 2661 return 0; 2662 } 2663 2664 static int __s390_disable_cow_sharing(struct 2665 { 2666 int rc; 2667 2668 if (!mm->context.allow_cow_sharing) 2669 return 0; 2670 2671 mm->context.allow_cow_sharing = 0; 2672 2673 /* Replace all shared zeropages by an 2674 rc = __s390_unshare_zeropages(mm); 2675 /* 2676 * Make sure to disable KSM (if enabl 2677 * individual VMAs). Note that nothin 2678 * from re-enabling it. 2679 */ 2680 if (!rc) 2681 rc = ksm_disable(mm); 2682 if (rc) 2683 mm->context.allow_cow_sharing 2684 return rc; 2685 } 2686 2687 /* 2688 * Disable most COW-sharing of memory pages f 2689 * (1) Disable KSM and unmerge/unshare any KS 2690 * (2) Disallow shared zeropages and unshare 2691 * 2692 * Not that we currently don't bother with CO 2693 * with parent/child processes due to fork(). 2694 */ 2695 int s390_disable_cow_sharing(void) 2696 { 2697 int rc; 2698 2699 mmap_write_lock(current->mm); 2700 rc = __s390_disable_cow_sharing(curre 2701 mmap_write_unlock(current->mm); 2702 return rc; 2703 } 2704 EXPORT_SYMBOL_GPL(s390_disable_cow_sharing); 2705 2706 /* 2707 * Enable storage key handling from now on an 2708 * keys with the default key. 2709 */ 2710 static int __s390_enable_skey_pte(pte_t *pte, 2711 unsigned lo 2712 { 2713 /* Clear storage key */ 2714 ptep_zap_key(walk->mm, addr, pte); 2715 return 0; 2716 } 2717 2718 /* 2719 * Give a chance to schedule after setting a 2720 * We only hold the mm lock, which is a rwsem 2721 * Both can sleep. 2722 */ 2723 static int __s390_enable_skey_pmd(pmd_t *pmd, 2724 unsigned lo 2725 { 2726 cond_resched(); 2727 return 0; 2728 } 2729 2730 static int __s390_enable_skey_hugetlb(pte_t * 2731 unsigne 2732 struct 2733 { 2734 pmd_t *pmd = (pmd_t *)pte; 2735 unsigned long start, end; 2736 struct folio *folio = page_folio(pmd_ 2737 2738 /* 2739 * The write check makes sure we do n 2740 * memory. This is needed as the walk 2741 * between actual guest memory and th 2742 * shared libraries. 2743 */ 2744 if (pmd_val(*pmd) & _SEGMENT_ENTRY_IN 2745 !(pmd_val(*pmd) & _SEGMENT_ENTRY_ 2746 return 0; 2747 2748 start = pmd_val(*pmd) & HPAGE_MASK; 2749 end = start + HPAGE_SIZE; 2750 __storage_key_init_range(start, end); 2751 set_bit(PG_arch_1, &folio->flags); 2752 cond_resched(); 2753 return 0; 2754 } 2755 2756 static const struct mm_walk_ops enable_skey_w 2757 .hugetlb_entry = __s390_enab 2758 .pte_entry = __s390_enab 2759 .pmd_entry = __s390_enab 2760 .walk_lock = PGWALK_WRLO 2761 }; 2762 2763 int s390_enable_skey(void) 2764 { 2765 struct mm_struct *mm = current->mm; 2766 int rc = 0; 2767 2768 mmap_write_lock(mm); 2769 if (mm_uses_skeys(mm)) 2770 goto out_up; 2771 2772 mm->context.uses_skeys = 1; 2773 rc = __s390_disable_cow_sharing(mm); 2774 if (rc) { 2775 mm->context.uses_skeys = 0; 2776 goto out_up; 2777 } 2778 walk_page_range(mm, 0, TASK_SIZE, &en 2779 2780 out_up: 2781 mmap_write_unlock(mm); 2782 return rc; 2783 } 2784 EXPORT_SYMBOL_GPL(s390_enable_skey); 2785 2786 /* 2787 * Reset CMMA state, make all pages stable ag 2788 */ 2789 static int __s390_reset_cmma(pte_t *pte, unsi 2790 unsigned long ne 2791 { 2792 ptep_zap_unused(walk->mm, addr, pte, 2793 return 0; 2794 } 2795 2796 static const struct mm_walk_ops reset_cmma_wa 2797 .pte_entry = __s390_rese 2798 .walk_lock = PGWALK_WRLO 2799 }; 2800 2801 void s390_reset_cmma(struct mm_struct *mm) 2802 { 2803 mmap_write_lock(mm); 2804 walk_page_range(mm, 0, TASK_SIZE, &re 2805 mmap_write_unlock(mm); 2806 } 2807 EXPORT_SYMBOL_GPL(s390_reset_cmma); 2808 2809 #define GATHER_GET_PAGES 32 2810 2811 struct reset_walk_state { 2812 unsigned long next; 2813 unsigned long count; 2814 unsigned long pfns[GATHER_GET_PAGES]; 2815 }; 2816 2817 static int s390_gather_pages(pte_t *ptep, uns 2818 unsigned long ne 2819 { 2820 struct reset_walk_state *p = walk->pr 2821 pte_t pte = READ_ONCE(*ptep); 2822 2823 if (pte_present(pte)) { 2824 /* we have a reference from t 2825 get_page(phys_to_page(pte_val 2826 p->pfns[p->count] = phys_to_p 2827 p->next = next; 2828 p->count++; 2829 } 2830 return p->count >= GATHER_GET_PAGES; 2831 } 2832 2833 static const struct mm_walk_ops gather_pages_ 2834 .pte_entry = s390_gather_pages, 2835 .walk_lock = PGWALK_RDLOCK, 2836 }; 2837 2838 /* 2839 * Call the Destroy secure page UVC on each p 2840 * Each page needs to have an extra reference 2841 */ 2842 void s390_uv_destroy_pfns(unsigned long count 2843 { 2844 struct folio *folio; 2845 unsigned long i; 2846 2847 for (i = 0; i < count; i++) { 2848 folio = pfn_folio(pfns[i]); 2849 /* we always have an extra re 2850 uv_destroy_folio(folio); 2851 /* get rid of the extra refer 2852 folio_put(folio); 2853 cond_resched(); 2854 } 2855 } 2856 EXPORT_SYMBOL_GPL(s390_uv_destroy_pfns); 2857 2858 /** 2859 * __s390_uv_destroy_range - Call the destroy 2860 * in the given range of the given address sp 2861 * @mm: the mm to operate on 2862 * @start: the start of the range 2863 * @end: the end of the range 2864 * @interruptible: if not 0, stop when a fata 2865 * 2866 * Walk the given range of the given address 2867 * secure page UVC on each page. Optionally e 2868 * pending. 2869 * 2870 * Return: 0 on success, -EINTR if the functi 2871 */ 2872 int __s390_uv_destroy_range(struct mm_struct 2873 unsigned long end 2874 { 2875 struct reset_walk_state state = { .ne 2876 int r = 1; 2877 2878 while (r > 0) { 2879 state.count = 0; 2880 mmap_read_lock(mm); 2881 r = walk_page_range(mm, state 2882 mmap_read_unlock(mm); 2883 cond_resched(); 2884 s390_uv_destroy_pfns(state.co 2885 if (interruptible && fatal_si 2886 return -EINTR; 2887 } 2888 return 0; 2889 } 2890 EXPORT_SYMBOL_GPL(__s390_uv_destroy_range); 2891 2892 /** 2893 * s390_unlist_old_asce - Remove the topmost 2894 * list of page tables of the gmap. 2895 * @gmap: the gmap whose table is to be remov 2896 * 2897 * On s390x, KVM keeps a list of all pages co 2898 * gmap (the CRST list). This list is used at 2899 * pages that are now not needed anymore. 2900 * 2901 * This function removes the topmost page of 2902 * the ASCE) from the CRST list. 2903 * 2904 * This means that it will not be freed when 2905 * to be handled separately by the caller, un 2906 * intended. Notice that this function will o 2907 * list, the page will still be used as a top 2908 */ 2909 void s390_unlist_old_asce(struct gmap *gmap) 2910 { 2911 struct page *old; 2912 2913 old = virt_to_page(gmap->table); 2914 spin_lock(&gmap->guest_table_lock); 2915 list_del(&old->lru); 2916 /* 2917 * Sometimes the topmost page might n 2918 * times, for example if the VM is re 2919 * times concurrently, or if s390_rep 2920 * s390_remove_old_asce and is attemp 2921 * the old asce has been removed from 2922 * will not be freed when the VM term 2923 * in use and still pointed to. 2924 * A subsequent call to replace_asce 2925 * to remove the same page from the l 2926 * Therefore it's necessary that the 2927 * pointers, so list_del can work (an 2928 * dereferencing stale or invalid poi 2929 */ 2930 INIT_LIST_HEAD(&old->lru); 2931 spin_unlock(&gmap->guest_table_lock); 2932 } 2933 EXPORT_SYMBOL_GPL(s390_unlist_old_asce); 2934 2935 /** 2936 * s390_replace_asce - Try to replace the cur 2937 * @gmap: the gmap whose ASCE needs to be rep 2938 * 2939 * If the ASCE is a SEGMENT type then this fu 2940 * otherwise the pointers in the host_to_gues 2941 * to the wrong pages, causing use-after-free 2942 * If the allocation of the new top level pag 2943 * replaced. 2944 * In any case, the old ASCE is always remove 2945 * Therefore the caller has to make sure to s 2946 * beforehand, unless a leak is actually inte 2947 */ 2948 int s390_replace_asce(struct gmap *gmap) 2949 { 2950 unsigned long asce; 2951 struct page *page; 2952 void *table; 2953 2954 s390_unlist_old_asce(gmap); 2955 2956 /* Replacing segment type ASCEs would 2957 if ((gmap->asce & _ASCE_TYPE_MASK) == 2958 return -EINVAL; 2959 2960 page = gmap_alloc_crst(); 2961 if (!page) 2962 return -ENOMEM; 2963 page->index = 0; 2964 table = page_to_virt(page); 2965 memcpy(table, gmap->table, 1UL << (CR 2966 2967 /* 2968 * The caller has to deal with the ol 2969 * the new one is properly added to t 2970 * it will be freed when the VM is to 2971 */ 2972 spin_lock(&gmap->guest_table_lock); 2973 list_add(&page->lru, &gmap->crst_list 2974 spin_unlock(&gmap->guest_table_lock); 2975 2976 /* Set new table origin while preserv 2977 asce = (gmap->asce & ~_ASCE_ORIGIN) | 2978 WRITE_ONCE(gmap->asce, asce); 2979 WRITE_ONCE(gmap->mm->context.gmap_asc 2980 WRITE_ONCE(gmap->table, table); 2981 2982 return 0; 2983 } 2984 EXPORT_SYMBOL_GPL(s390_replace_asce); 2985
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.