1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * This is a module to test the HMM (Heterogeneous Memory Management) 4 * mirror and zone device private memory migration APIs of the kernel. 5 * Userspace programs can register with the driver to mirror their own address 6 * space and can use the device to read/write any valid virtual address. 7 */ 8 #include <linux/init.h> 9 #include <linux/fs.h> 10 #include <linux/mm.h> 11 #include <linux/module.h> 12 #include <linux/kernel.h> 13 #include <linux/cdev.h> 14 #include <linux/device.h> 15 #include <linux/memremap.h> 16 #include <linux/mutex.h> 17 #include <linux/rwsem.h> 18 #include <linux/sched.h> 19 #include <linux/slab.h> 20 #include <linux/highmem.h> 21 #include <linux/delay.h> 22 #include <linux/pagemap.h> 23 #include <linux/hmm.h> 24 #include <linux/vmalloc.h> 25 #include <linux/swap.h> 26 #include <linux/swapops.h> 27 #include <linux/sched/mm.h> 28 #include <linux/platform_device.h> 29 #include <linux/rmap.h> 30 #include <linux/mmu_notifier.h> 31 #include <linux/migrate.h> 32 33 #include "test_hmm_uapi.h" 34 35 #define DMIRROR_NDEVICES 4 36 #define DMIRROR_RANGE_FAULT_TIMEOUT 1000 37 #define DEVMEM_CHUNK_SIZE (256 * 1024 * 1024U) 38 #define DEVMEM_CHUNKS_RESERVE 16 39 40 /* 41 * For device_private pages, dpage is just a dummy struct page 42 * representing a piece of device memory. dmirror_devmem_alloc_page 43 * allocates a real system memory page as backing storage to fake a 44 * real device. zone_device_data points to that backing page. But 45 * for device_coherent memory, the struct page represents real 46 * physical CPU-accessible memory that we can use directly. 47 */ 48 #define BACKING_PAGE(page) (is_device_private_page((page)) ? \ 49 (page)->zone_device_data : (page)) 50 51 static unsigned long spm_addr_dev0; 52 module_param(spm_addr_dev0, long, 0644); 53 MODULE_PARM_DESC(spm_addr_dev0, 54 "Specify start address for SPM (special purpose memory) used for device 0. By setting this Coherent device type will be used. Make sure spm_addr_dev1 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE."); 55 56 static unsigned long spm_addr_dev1; 57 module_param(spm_addr_dev1, long, 0644); 58 MODULE_PARM_DESC(spm_addr_dev1, 59 "Specify start address for SPM (special purpose memory) used for device 1. By setting this Coherent device type will be used. Make sure spm_addr_dev0 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE."); 60 61 static const struct dev_pagemap_ops dmirror_devmem_ops; 62 static const struct mmu_interval_notifier_ops dmirror_min_ops; 63 static dev_t dmirror_dev; 64 65 struct dmirror_device; 66 67 struct dmirror_bounce { 68 void *ptr; 69 unsigned long size; 70 unsigned long addr; 71 unsigned long cpages; 72 }; 73 74 #define DPT_XA_TAG_ATOMIC 1UL 75 #define DPT_XA_TAG_WRITE 3UL 76 77 /* 78 * Data structure to track address ranges and register for mmu interval 79 * notifier updates. 80 */ 81 struct dmirror_interval { 82 struct mmu_interval_notifier notifier; 83 struct dmirror *dmirror; 84 }; 85 86 /* 87 * Data attached to the open device file. 88 * Note that it might be shared after a fork(). 89 */ 90 struct dmirror { 91 struct dmirror_device *mdevice; 92 struct xarray pt; 93 struct mmu_interval_notifier notifier; 94 struct mutex mutex; 95 }; 96 97 /* 98 * ZONE_DEVICE pages for migration and simulating device memory. 99 */ 100 struct dmirror_chunk { 101 struct dev_pagemap pagemap; 102 struct dmirror_device *mdevice; 103 bool remove; 104 }; 105 106 /* 107 * Per device data. 108 */ 109 struct dmirror_device { 110 struct cdev cdevice; 111 unsigned int zone_device_type; 112 struct device device; 113 114 unsigned int devmem_capacity; 115 unsigned int devmem_count; 116 struct dmirror_chunk **devmem_chunks; 117 struct mutex devmem_lock; /* protects the above */ 118 119 unsigned long calloc; 120 unsigned long cfree; 121 struct page *free_pages; 122 spinlock_t lock; /* protects the above */ 123 }; 124 125 static struct dmirror_device dmirror_devices[DMIRROR_NDEVICES]; 126 127 static int dmirror_bounce_init(struct dmirror_bounce *bounce, 128 unsigned long addr, 129 unsigned long size) 130 { 131 bounce->addr = addr; 132 bounce->size = size; 133 bounce->cpages = 0; 134 bounce->ptr = vmalloc(size); 135 if (!bounce->ptr) 136 return -ENOMEM; 137 return 0; 138 } 139 140 static bool dmirror_is_private_zone(struct dmirror_device *mdevice) 141 { 142 return (mdevice->zone_device_type == 143 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? true : false; 144 } 145 146 static enum migrate_vma_direction 147 dmirror_select_device(struct dmirror *dmirror) 148 { 149 return (dmirror->mdevice->zone_device_type == 150 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? 151 MIGRATE_VMA_SELECT_DEVICE_PRIVATE : 152 MIGRATE_VMA_SELECT_DEVICE_COHERENT; 153 } 154 155 static void dmirror_bounce_fini(struct dmirror_bounce *bounce) 156 { 157 vfree(bounce->ptr); 158 } 159 160 static int dmirror_fops_open(struct inode *inode, struct file *filp) 161 { 162 struct cdev *cdev = inode->i_cdev; 163 struct dmirror *dmirror; 164 int ret; 165 166 /* Mirror this process address space */ 167 dmirror = kzalloc(sizeof(*dmirror), GFP_KERNEL); 168 if (dmirror == NULL) 169 return -ENOMEM; 170 171 dmirror->mdevice = container_of(cdev, struct dmirror_device, cdevice); 172 mutex_init(&dmirror->mutex); 173 xa_init(&dmirror->pt); 174 175 ret = mmu_interval_notifier_insert(&dmirror->notifier, current->mm, 176 0, ULONG_MAX & PAGE_MASK, &dmirror_min_ops); 177 if (ret) { 178 kfree(dmirror); 179 return ret; 180 } 181 182 filp->private_data = dmirror; 183 return 0; 184 } 185 186 static int dmirror_fops_release(struct inode *inode, struct file *filp) 187 { 188 struct dmirror *dmirror = filp->private_data; 189 190 mmu_interval_notifier_remove(&dmirror->notifier); 191 xa_destroy(&dmirror->pt); 192 kfree(dmirror); 193 return 0; 194 } 195 196 static struct dmirror_chunk *dmirror_page_to_chunk(struct page *page) 197 { 198 return container_of(page->pgmap, struct dmirror_chunk, pagemap); 199 } 200 201 static struct dmirror_device *dmirror_page_to_device(struct page *page) 202 203 { 204 return dmirror_page_to_chunk(page)->mdevice; 205 } 206 207 static int dmirror_do_fault(struct dmirror *dmirror, struct hmm_range *range) 208 { 209 unsigned long *pfns = range->hmm_pfns; 210 unsigned long pfn; 211 212 for (pfn = (range->start >> PAGE_SHIFT); 213 pfn < (range->end >> PAGE_SHIFT); 214 pfn++, pfns++) { 215 struct page *page; 216 void *entry; 217 218 /* 219 * Since we asked for hmm_range_fault() to populate pages, 220 * it shouldn't return an error entry on success. 221 */ 222 WARN_ON(*pfns & HMM_PFN_ERROR); 223 WARN_ON(!(*pfns & HMM_PFN_VALID)); 224 225 page = hmm_pfn_to_page(*pfns); 226 WARN_ON(!page); 227 228 entry = page; 229 if (*pfns & HMM_PFN_WRITE) 230 entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE); 231 else if (WARN_ON(range->default_flags & HMM_PFN_WRITE)) 232 return -EFAULT; 233 entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); 234 if (xa_is_err(entry)) 235 return xa_err(entry); 236 } 237 238 return 0; 239 } 240 241 static void dmirror_do_update(struct dmirror *dmirror, unsigned long start, 242 unsigned long end) 243 { 244 unsigned long pfn; 245 void *entry; 246 247 /* 248 * The XArray doesn't hold references to pages since it relies on 249 * the mmu notifier to clear page pointers when they become stale. 250 * Therefore, it is OK to just clear the entry. 251 */ 252 xa_for_each_range(&dmirror->pt, pfn, entry, start >> PAGE_SHIFT, 253 end >> PAGE_SHIFT) 254 xa_erase(&dmirror->pt, pfn); 255 } 256 257 static bool dmirror_interval_invalidate(struct mmu_interval_notifier *mni, 258 const struct mmu_notifier_range *range, 259 unsigned long cur_seq) 260 { 261 struct dmirror *dmirror = container_of(mni, struct dmirror, notifier); 262 263 /* 264 * Ignore invalidation callbacks for device private pages since 265 * the invalidation is handled as part of the migration process. 266 */ 267 if (range->event == MMU_NOTIFY_MIGRATE && 268 range->owner == dmirror->mdevice) 269 return true; 270 271 if (mmu_notifier_range_blockable(range)) 272 mutex_lock(&dmirror->mutex); 273 else if (!mutex_trylock(&dmirror->mutex)) 274 return false; 275 276 mmu_interval_set_seq(mni, cur_seq); 277 dmirror_do_update(dmirror, range->start, range->end); 278 279 mutex_unlock(&dmirror->mutex); 280 return true; 281 } 282 283 static const struct mmu_interval_notifier_ops dmirror_min_ops = { 284 .invalidate = dmirror_interval_invalidate, 285 }; 286 287 static int dmirror_range_fault(struct dmirror *dmirror, 288 struct hmm_range *range) 289 { 290 struct mm_struct *mm = dmirror->notifier.mm; 291 unsigned long timeout = 292 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 293 int ret; 294 295 while (true) { 296 if (time_after(jiffies, timeout)) { 297 ret = -EBUSY; 298 goto out; 299 } 300 301 range->notifier_seq = mmu_interval_read_begin(range->notifier); 302 mmap_read_lock(mm); 303 ret = hmm_range_fault(range); 304 mmap_read_unlock(mm); 305 if (ret) { 306 if (ret == -EBUSY) 307 continue; 308 goto out; 309 } 310 311 mutex_lock(&dmirror->mutex); 312 if (mmu_interval_read_retry(range->notifier, 313 range->notifier_seq)) { 314 mutex_unlock(&dmirror->mutex); 315 continue; 316 } 317 break; 318 } 319 320 ret = dmirror_do_fault(dmirror, range); 321 322 mutex_unlock(&dmirror->mutex); 323 out: 324 return ret; 325 } 326 327 static int dmirror_fault(struct dmirror *dmirror, unsigned long start, 328 unsigned long end, bool write) 329 { 330 struct mm_struct *mm = dmirror->notifier.mm; 331 unsigned long addr; 332 unsigned long pfns[64]; 333 struct hmm_range range = { 334 .notifier = &dmirror->notifier, 335 .hmm_pfns = pfns, 336 .pfn_flags_mask = 0, 337 .default_flags = 338 HMM_PFN_REQ_FAULT | (write ? HMM_PFN_REQ_WRITE : 0), 339 .dev_private_owner = dmirror->mdevice, 340 }; 341 int ret = 0; 342 343 /* Since the mm is for the mirrored process, get a reference first. */ 344 if (!mmget_not_zero(mm)) 345 return 0; 346 347 for (addr = start; addr < end; addr = range.end) { 348 range.start = addr; 349 range.end = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end); 350 351 ret = dmirror_range_fault(dmirror, &range); 352 if (ret) 353 break; 354 } 355 356 mmput(mm); 357 return ret; 358 } 359 360 static int dmirror_do_read(struct dmirror *dmirror, unsigned long start, 361 unsigned long end, struct dmirror_bounce *bounce) 362 { 363 unsigned long pfn; 364 void *ptr; 365 366 ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK); 367 368 for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) { 369 void *entry; 370 struct page *page; 371 372 entry = xa_load(&dmirror->pt, pfn); 373 page = xa_untag_pointer(entry); 374 if (!page) 375 return -ENOENT; 376 377 memcpy_from_page(ptr, page, 0, PAGE_SIZE); 378 379 ptr += PAGE_SIZE; 380 bounce->cpages++; 381 } 382 383 return 0; 384 } 385 386 static int dmirror_read(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd) 387 { 388 struct dmirror_bounce bounce; 389 unsigned long start, end; 390 unsigned long size = cmd->npages << PAGE_SHIFT; 391 int ret; 392 393 start = cmd->addr; 394 end = start + size; 395 if (end < start) 396 return -EINVAL; 397 398 ret = dmirror_bounce_init(&bounce, start, size); 399 if (ret) 400 return ret; 401 402 while (1) { 403 mutex_lock(&dmirror->mutex); 404 ret = dmirror_do_read(dmirror, start, end, &bounce); 405 mutex_unlock(&dmirror->mutex); 406 if (ret != -ENOENT) 407 break; 408 409 start = cmd->addr + (bounce.cpages << PAGE_SHIFT); 410 ret = dmirror_fault(dmirror, start, end, false); 411 if (ret) 412 break; 413 cmd->faults++; 414 } 415 416 if (ret == 0) { 417 if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr, 418 bounce.size)) 419 ret = -EFAULT; 420 } 421 cmd->cpages = bounce.cpages; 422 dmirror_bounce_fini(&bounce); 423 return ret; 424 } 425 426 static int dmirror_do_write(struct dmirror *dmirror, unsigned long start, 427 unsigned long end, struct dmirror_bounce *bounce) 428 { 429 unsigned long pfn; 430 void *ptr; 431 432 ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK); 433 434 for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) { 435 void *entry; 436 struct page *page; 437 438 entry = xa_load(&dmirror->pt, pfn); 439 page = xa_untag_pointer(entry); 440 if (!page || xa_pointer_tag(entry) != DPT_XA_TAG_WRITE) 441 return -ENOENT; 442 443 memcpy_to_page(page, 0, ptr, PAGE_SIZE); 444 445 ptr += PAGE_SIZE; 446 bounce->cpages++; 447 } 448 449 return 0; 450 } 451 452 static int dmirror_write(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd) 453 { 454 struct dmirror_bounce bounce; 455 unsigned long start, end; 456 unsigned long size = cmd->npages << PAGE_SHIFT; 457 int ret; 458 459 start = cmd->addr; 460 end = start + size; 461 if (end < start) 462 return -EINVAL; 463 464 ret = dmirror_bounce_init(&bounce, start, size); 465 if (ret) 466 return ret; 467 if (copy_from_user(bounce.ptr, u64_to_user_ptr(cmd->ptr), 468 bounce.size)) { 469 ret = -EFAULT; 470 goto fini; 471 } 472 473 while (1) { 474 mutex_lock(&dmirror->mutex); 475 ret = dmirror_do_write(dmirror, start, end, &bounce); 476 mutex_unlock(&dmirror->mutex); 477 if (ret != -ENOENT) 478 break; 479 480 start = cmd->addr + (bounce.cpages << PAGE_SHIFT); 481 ret = dmirror_fault(dmirror, start, end, true); 482 if (ret) 483 break; 484 cmd->faults++; 485 } 486 487 fini: 488 cmd->cpages = bounce.cpages; 489 dmirror_bounce_fini(&bounce); 490 return ret; 491 } 492 493 static int dmirror_allocate_chunk(struct dmirror_device *mdevice, 494 struct page **ppage) 495 { 496 struct dmirror_chunk *devmem; 497 struct resource *res = NULL; 498 unsigned long pfn; 499 unsigned long pfn_first; 500 unsigned long pfn_last; 501 void *ptr; 502 int ret = -ENOMEM; 503 504 devmem = kzalloc(sizeof(*devmem), GFP_KERNEL); 505 if (!devmem) 506 return ret; 507 508 switch (mdevice->zone_device_type) { 509 case HMM_DMIRROR_MEMORY_DEVICE_PRIVATE: 510 res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE, 511 "hmm_dmirror"); 512 if (IS_ERR_OR_NULL(res)) 513 goto err_devmem; 514 devmem->pagemap.range.start = res->start; 515 devmem->pagemap.range.end = res->end; 516 devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; 517 break; 518 case HMM_DMIRROR_MEMORY_DEVICE_COHERENT: 519 devmem->pagemap.range.start = (MINOR(mdevice->cdevice.dev) - 2) ? 520 spm_addr_dev0 : 521 spm_addr_dev1; 522 devmem->pagemap.range.end = devmem->pagemap.range.start + 523 DEVMEM_CHUNK_SIZE - 1; 524 devmem->pagemap.type = MEMORY_DEVICE_COHERENT; 525 break; 526 default: 527 ret = -EINVAL; 528 goto err_devmem; 529 } 530 531 devmem->pagemap.nr_range = 1; 532 devmem->pagemap.ops = &dmirror_devmem_ops; 533 devmem->pagemap.owner = mdevice; 534 535 mutex_lock(&mdevice->devmem_lock); 536 537 if (mdevice->devmem_count == mdevice->devmem_capacity) { 538 struct dmirror_chunk **new_chunks; 539 unsigned int new_capacity; 540 541 new_capacity = mdevice->devmem_capacity + 542 DEVMEM_CHUNKS_RESERVE; 543 new_chunks = krealloc(mdevice->devmem_chunks, 544 sizeof(new_chunks[0]) * new_capacity, 545 GFP_KERNEL); 546 if (!new_chunks) 547 goto err_release; 548 mdevice->devmem_capacity = new_capacity; 549 mdevice->devmem_chunks = new_chunks; 550 } 551 ptr = memremap_pages(&devmem->pagemap, numa_node_id()); 552 if (IS_ERR_OR_NULL(ptr)) { 553 if (ptr) 554 ret = PTR_ERR(ptr); 555 else 556 ret = -EFAULT; 557 goto err_release; 558 } 559 560 devmem->mdevice = mdevice; 561 pfn_first = devmem->pagemap.range.start >> PAGE_SHIFT; 562 pfn_last = pfn_first + (range_len(&devmem->pagemap.range) >> PAGE_SHIFT); 563 mdevice->devmem_chunks[mdevice->devmem_count++] = devmem; 564 565 mutex_unlock(&mdevice->devmem_lock); 566 567 pr_info("added new %u MB chunk (total %u chunks, %u MB) PFNs [0x%lx 0x%lx)\n", 568 DEVMEM_CHUNK_SIZE / (1024 * 1024), 569 mdevice->devmem_count, 570 mdevice->devmem_count * (DEVMEM_CHUNK_SIZE / (1024 * 1024)), 571 pfn_first, pfn_last); 572 573 spin_lock(&mdevice->lock); 574 for (pfn = pfn_first; pfn < pfn_last; pfn++) { 575 struct page *page = pfn_to_page(pfn); 576 577 page->zone_device_data = mdevice->free_pages; 578 mdevice->free_pages = page; 579 } 580 if (ppage) { 581 *ppage = mdevice->free_pages; 582 mdevice->free_pages = (*ppage)->zone_device_data; 583 mdevice->calloc++; 584 } 585 spin_unlock(&mdevice->lock); 586 587 return 0; 588 589 err_release: 590 mutex_unlock(&mdevice->devmem_lock); 591 if (res && devmem->pagemap.type == MEMORY_DEVICE_PRIVATE) 592 release_mem_region(devmem->pagemap.range.start, 593 range_len(&devmem->pagemap.range)); 594 err_devmem: 595 kfree(devmem); 596 597 return ret; 598 } 599 600 static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) 601 { 602 struct page *dpage = NULL; 603 struct page *rpage = NULL; 604 605 /* 606 * For ZONE_DEVICE private type, this is a fake device so we allocate 607 * real system memory to store our device memory. 608 * For ZONE_DEVICE coherent type we use the actual dpage to store the 609 * data and ignore rpage. 610 */ 611 if (dmirror_is_private_zone(mdevice)) { 612 rpage = alloc_page(GFP_HIGHUSER); 613 if (!rpage) 614 return NULL; 615 } 616 spin_lock(&mdevice->lock); 617 618 if (mdevice->free_pages) { 619 dpage = mdevice->free_pages; 620 mdevice->free_pages = dpage->zone_device_data; 621 mdevice->calloc++; 622 spin_unlock(&mdevice->lock); 623 } else { 624 spin_unlock(&mdevice->lock); 625 if (dmirror_allocate_chunk(mdevice, &dpage)) 626 goto error; 627 } 628 629 zone_device_page_init(dpage); 630 dpage->zone_device_data = rpage; 631 return dpage; 632 633 error: 634 if (rpage) 635 __free_page(rpage); 636 return NULL; 637 } 638 639 static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, 640 struct dmirror *dmirror) 641 { 642 struct dmirror_device *mdevice = dmirror->mdevice; 643 const unsigned long *src = args->src; 644 unsigned long *dst = args->dst; 645 unsigned long addr; 646 647 for (addr = args->start; addr < args->end; addr += PAGE_SIZE, 648 src++, dst++) { 649 struct page *spage; 650 struct page *dpage; 651 struct page *rpage; 652 653 if (!(*src & MIGRATE_PFN_MIGRATE)) 654 continue; 655 656 /* 657 * Note that spage might be NULL which is OK since it is an 658 * unallocated pte_none() or read-only zero page. 659 */ 660 spage = migrate_pfn_to_page(*src); 661 if (WARN(spage && is_zone_device_page(spage), 662 "page already in device spage pfn: 0x%lx\n", 663 page_to_pfn(spage))) 664 continue; 665 666 dpage = dmirror_devmem_alloc_page(mdevice); 667 if (!dpage) 668 continue; 669 670 rpage = BACKING_PAGE(dpage); 671 if (spage) 672 copy_highpage(rpage, spage); 673 else 674 clear_highpage(rpage); 675 676 /* 677 * Normally, a device would use the page->zone_device_data to 678 * point to the mirror but here we use it to hold the page for 679 * the simulated device memory and that page holds the pointer 680 * to the mirror. 681 */ 682 rpage->zone_device_data = dmirror; 683 684 pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n", 685 page_to_pfn(spage), page_to_pfn(dpage)); 686 *dst = migrate_pfn(page_to_pfn(dpage)); 687 if ((*src & MIGRATE_PFN_WRITE) || 688 (!spage && args->vma->vm_flags & VM_WRITE)) 689 *dst |= MIGRATE_PFN_WRITE; 690 } 691 } 692 693 static int dmirror_check_atomic(struct dmirror *dmirror, unsigned long start, 694 unsigned long end) 695 { 696 unsigned long pfn; 697 698 for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) { 699 void *entry; 700 701 entry = xa_load(&dmirror->pt, pfn); 702 if (xa_pointer_tag(entry) == DPT_XA_TAG_ATOMIC) 703 return -EPERM; 704 } 705 706 return 0; 707 } 708 709 static int dmirror_atomic_map(unsigned long start, unsigned long end, 710 struct page **pages, struct dmirror *dmirror) 711 { 712 unsigned long pfn, mapped = 0; 713 int i; 714 715 /* Map the migrated pages into the device's page tables. */ 716 mutex_lock(&dmirror->mutex); 717 718 for (i = 0, pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++, i++) { 719 void *entry; 720 721 if (!pages[i]) 722 continue; 723 724 entry = pages[i]; 725 entry = xa_tag_pointer(entry, DPT_XA_TAG_ATOMIC); 726 entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); 727 if (xa_is_err(entry)) { 728 mutex_unlock(&dmirror->mutex); 729 return xa_err(entry); 730 } 731 732 mapped++; 733 } 734 735 mutex_unlock(&dmirror->mutex); 736 return mapped; 737 } 738 739 static int dmirror_migrate_finalize_and_map(struct migrate_vma *args, 740 struct dmirror *dmirror) 741 { 742 unsigned long start = args->start; 743 unsigned long end = args->end; 744 const unsigned long *src = args->src; 745 const unsigned long *dst = args->dst; 746 unsigned long pfn; 747 748 /* Map the migrated pages into the device's page tables. */ 749 mutex_lock(&dmirror->mutex); 750 751 for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++, 752 src++, dst++) { 753 struct page *dpage; 754 void *entry; 755 756 if (!(*src & MIGRATE_PFN_MIGRATE)) 757 continue; 758 759 dpage = migrate_pfn_to_page(*dst); 760 if (!dpage) 761 continue; 762 763 entry = BACKING_PAGE(dpage); 764 if (*dst & MIGRATE_PFN_WRITE) 765 entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE); 766 entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); 767 if (xa_is_err(entry)) { 768 mutex_unlock(&dmirror->mutex); 769 return xa_err(entry); 770 } 771 } 772 773 mutex_unlock(&dmirror->mutex); 774 return 0; 775 } 776 777 static int dmirror_exclusive(struct dmirror *dmirror, 778 struct hmm_dmirror_cmd *cmd) 779 { 780 unsigned long start, end, addr; 781 unsigned long size = cmd->npages << PAGE_SHIFT; 782 struct mm_struct *mm = dmirror->notifier.mm; 783 struct page *pages[64]; 784 struct dmirror_bounce bounce; 785 unsigned long next; 786 int ret; 787 788 start = cmd->addr; 789 end = start + size; 790 if (end < start) 791 return -EINVAL; 792 793 /* Since the mm is for the mirrored process, get a reference first. */ 794 if (!mmget_not_zero(mm)) 795 return -EINVAL; 796 797 mmap_read_lock(mm); 798 for (addr = start; addr < end; addr = next) { 799 unsigned long mapped = 0; 800 int i; 801 802 if (end < addr + (ARRAY_SIZE(pages) << PAGE_SHIFT)) 803 next = end; 804 else 805 next = addr + (ARRAY_SIZE(pages) << PAGE_SHIFT); 806 807 ret = make_device_exclusive_range(mm, addr, next, pages, NULL); 808 /* 809 * Do dmirror_atomic_map() iff all pages are marked for 810 * exclusive access to avoid accessing uninitialized 811 * fields of pages. 812 */ 813 if (ret == (next - addr) >> PAGE_SHIFT) 814 mapped = dmirror_atomic_map(addr, next, pages, dmirror); 815 for (i = 0; i < ret; i++) { 816 if (pages[i]) { 817 unlock_page(pages[i]); 818 put_page(pages[i]); 819 } 820 } 821 822 if (addr + (mapped << PAGE_SHIFT) < next) { 823 mmap_read_unlock(mm); 824 mmput(mm); 825 return -EBUSY; 826 } 827 } 828 mmap_read_unlock(mm); 829 mmput(mm); 830 831 /* Return the migrated data for verification. */ 832 ret = dmirror_bounce_init(&bounce, start, size); 833 if (ret) 834 return ret; 835 mutex_lock(&dmirror->mutex); 836 ret = dmirror_do_read(dmirror, start, end, &bounce); 837 mutex_unlock(&dmirror->mutex); 838 if (ret == 0) { 839 if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr, 840 bounce.size)) 841 ret = -EFAULT; 842 } 843 844 cmd->cpages = bounce.cpages; 845 dmirror_bounce_fini(&bounce); 846 return ret; 847 } 848 849 static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args, 850 struct dmirror *dmirror) 851 { 852 const unsigned long *src = args->src; 853 unsigned long *dst = args->dst; 854 unsigned long start = args->start; 855 unsigned long end = args->end; 856 unsigned long addr; 857 858 for (addr = start; addr < end; addr += PAGE_SIZE, 859 src++, dst++) { 860 struct page *dpage, *spage; 861 862 spage = migrate_pfn_to_page(*src); 863 if (!spage || !(*src & MIGRATE_PFN_MIGRATE)) 864 continue; 865 866 if (WARN_ON(!is_device_private_page(spage) && 867 !is_device_coherent_page(spage))) 868 continue; 869 spage = BACKING_PAGE(spage); 870 dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr); 871 if (!dpage) 872 continue; 873 pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n", 874 page_to_pfn(spage), page_to_pfn(dpage)); 875 876 lock_page(dpage); 877 xa_erase(&dmirror->pt, addr >> PAGE_SHIFT); 878 copy_highpage(dpage, spage); 879 *dst = migrate_pfn(page_to_pfn(dpage)); 880 if (*src & MIGRATE_PFN_WRITE) 881 *dst |= MIGRATE_PFN_WRITE; 882 } 883 return 0; 884 } 885 886 static unsigned long 887 dmirror_successful_migrated_pages(struct migrate_vma *migrate) 888 { 889 unsigned long cpages = 0; 890 unsigned long i; 891 892 for (i = 0; i < migrate->npages; i++) { 893 if (migrate->src[i] & MIGRATE_PFN_VALID && 894 migrate->src[i] & MIGRATE_PFN_MIGRATE) 895 cpages++; 896 } 897 return cpages; 898 } 899 900 static int dmirror_migrate_to_system(struct dmirror *dmirror, 901 struct hmm_dmirror_cmd *cmd) 902 { 903 unsigned long start, end, addr; 904 unsigned long size = cmd->npages << PAGE_SHIFT; 905 struct mm_struct *mm = dmirror->notifier.mm; 906 struct vm_area_struct *vma; 907 unsigned long src_pfns[64] = { 0 }; 908 unsigned long dst_pfns[64] = { 0 }; 909 struct migrate_vma args = { 0 }; 910 unsigned long next; 911 int ret; 912 913 start = cmd->addr; 914 end = start + size; 915 if (end < start) 916 return -EINVAL; 917 918 /* Since the mm is for the mirrored process, get a reference first. */ 919 if (!mmget_not_zero(mm)) 920 return -EINVAL; 921 922 cmd->cpages = 0; 923 mmap_read_lock(mm); 924 for (addr = start; addr < end; addr = next) { 925 vma = vma_lookup(mm, addr); 926 if (!vma || !(vma->vm_flags & VM_READ)) { 927 ret = -EINVAL; 928 goto out; 929 } 930 next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT)); 931 if (next > vma->vm_end) 932 next = vma->vm_end; 933 934 args.vma = vma; 935 args.src = src_pfns; 936 args.dst = dst_pfns; 937 args.start = addr; 938 args.end = next; 939 args.pgmap_owner = dmirror->mdevice; 940 args.flags = dmirror_select_device(dmirror); 941 942 ret = migrate_vma_setup(&args); 943 if (ret) 944 goto out; 945 946 pr_debug("Migrating from device mem to sys mem\n"); 947 dmirror_devmem_fault_alloc_and_copy(&args, dmirror); 948 949 migrate_vma_pages(&args); 950 cmd->cpages += dmirror_successful_migrated_pages(&args); 951 migrate_vma_finalize(&args); 952 } 953 out: 954 mmap_read_unlock(mm); 955 mmput(mm); 956 957 return ret; 958 } 959 960 static int dmirror_migrate_to_device(struct dmirror *dmirror, 961 struct hmm_dmirror_cmd *cmd) 962 { 963 unsigned long start, end, addr; 964 unsigned long size = cmd->npages << PAGE_SHIFT; 965 struct mm_struct *mm = dmirror->notifier.mm; 966 struct vm_area_struct *vma; 967 unsigned long src_pfns[64] = { 0 }; 968 unsigned long dst_pfns[64] = { 0 }; 969 struct dmirror_bounce bounce; 970 struct migrate_vma args = { 0 }; 971 unsigned long next; 972 int ret; 973 974 start = cmd->addr; 975 end = start + size; 976 if (end < start) 977 return -EINVAL; 978 979 /* Since the mm is for the mirrored process, get a reference first. */ 980 if (!mmget_not_zero(mm)) 981 return -EINVAL; 982 983 mmap_read_lock(mm); 984 for (addr = start; addr < end; addr = next) { 985 vma = vma_lookup(mm, addr); 986 if (!vma || !(vma->vm_flags & VM_READ)) { 987 ret = -EINVAL; 988 goto out; 989 } 990 next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT)); 991 if (next > vma->vm_end) 992 next = vma->vm_end; 993 994 args.vma = vma; 995 args.src = src_pfns; 996 args.dst = dst_pfns; 997 args.start = addr; 998 args.end = next; 999 args.pgmap_owner = dmirror->mdevice; 1000 args.flags = MIGRATE_VMA_SELECT_SYSTEM; 1001 ret = migrate_vma_setup(&args); 1002 if (ret) 1003 goto out; 1004 1005 pr_debug("Migrating from sys mem to device mem\n"); 1006 dmirror_migrate_alloc_and_copy(&args, dmirror); 1007 migrate_vma_pages(&args); 1008 dmirror_migrate_finalize_and_map(&args, dmirror); 1009 migrate_vma_finalize(&args); 1010 } 1011 mmap_read_unlock(mm); 1012 mmput(mm); 1013 1014 /* 1015 * Return the migrated data for verification. 1016 * Only for pages in device zone 1017 */ 1018 ret = dmirror_bounce_init(&bounce, start, size); 1019 if (ret) 1020 return ret; 1021 mutex_lock(&dmirror->mutex); 1022 ret = dmirror_do_read(dmirror, start, end, &bounce); 1023 mutex_unlock(&dmirror->mutex); 1024 if (ret == 0) { 1025 if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr, 1026 bounce.size)) 1027 ret = -EFAULT; 1028 } 1029 cmd->cpages = bounce.cpages; 1030 dmirror_bounce_fini(&bounce); 1031 return ret; 1032 1033 out: 1034 mmap_read_unlock(mm); 1035 mmput(mm); 1036 return ret; 1037 } 1038 1039 static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range, 1040 unsigned char *perm, unsigned long entry) 1041 { 1042 struct page *page; 1043 1044 if (entry & HMM_PFN_ERROR) { 1045 *perm = HMM_DMIRROR_PROT_ERROR; 1046 return; 1047 } 1048 if (!(entry & HMM_PFN_VALID)) { 1049 *perm = HMM_DMIRROR_PROT_NONE; 1050 return; 1051 } 1052 1053 page = hmm_pfn_to_page(entry); 1054 if (is_device_private_page(page)) { 1055 /* Is the page migrated to this device or some other? */ 1056 if (dmirror->mdevice == dmirror_page_to_device(page)) 1057 *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL; 1058 else 1059 *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE; 1060 } else if (is_device_coherent_page(page)) { 1061 /* Is the page migrated to this device or some other? */ 1062 if (dmirror->mdevice == dmirror_page_to_device(page)) 1063 *perm = HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL; 1064 else 1065 *perm = HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE; 1066 } else if (is_zero_pfn(page_to_pfn(page))) 1067 *perm = HMM_DMIRROR_PROT_ZERO; 1068 else 1069 *perm = HMM_DMIRROR_PROT_NONE; 1070 if (entry & HMM_PFN_WRITE) 1071 *perm |= HMM_DMIRROR_PROT_WRITE; 1072 else 1073 *perm |= HMM_DMIRROR_PROT_READ; 1074 if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PMD_SHIFT) 1075 *perm |= HMM_DMIRROR_PROT_PMD; 1076 else if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PUD_SHIFT) 1077 *perm |= HMM_DMIRROR_PROT_PUD; 1078 } 1079 1080 static bool dmirror_snapshot_invalidate(struct mmu_interval_notifier *mni, 1081 const struct mmu_notifier_range *range, 1082 unsigned long cur_seq) 1083 { 1084 struct dmirror_interval *dmi = 1085 container_of(mni, struct dmirror_interval, notifier); 1086 struct dmirror *dmirror = dmi->dmirror; 1087 1088 if (mmu_notifier_range_blockable(range)) 1089 mutex_lock(&dmirror->mutex); 1090 else if (!mutex_trylock(&dmirror->mutex)) 1091 return false; 1092 1093 /* 1094 * Snapshots only need to set the sequence number since any 1095 * invalidation in the interval invalidates the whole snapshot. 1096 */ 1097 mmu_interval_set_seq(mni, cur_seq); 1098 1099 mutex_unlock(&dmirror->mutex); 1100 return true; 1101 } 1102 1103 static const struct mmu_interval_notifier_ops dmirror_mrn_ops = { 1104 .invalidate = dmirror_snapshot_invalidate, 1105 }; 1106 1107 static int dmirror_range_snapshot(struct dmirror *dmirror, 1108 struct hmm_range *range, 1109 unsigned char *perm) 1110 { 1111 struct mm_struct *mm = dmirror->notifier.mm; 1112 struct dmirror_interval notifier; 1113 unsigned long timeout = 1114 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1115 unsigned long i; 1116 unsigned long n; 1117 int ret = 0; 1118 1119 notifier.dmirror = dmirror; 1120 range->notifier = ¬ifier.notifier; 1121 1122 ret = mmu_interval_notifier_insert(range->notifier, mm, 1123 range->start, range->end - range->start, 1124 &dmirror_mrn_ops); 1125 if (ret) 1126 return ret; 1127 1128 while (true) { 1129 if (time_after(jiffies, timeout)) { 1130 ret = -EBUSY; 1131 goto out; 1132 } 1133 1134 range->notifier_seq = mmu_interval_read_begin(range->notifier); 1135 1136 mmap_read_lock(mm); 1137 ret = hmm_range_fault(range); 1138 mmap_read_unlock(mm); 1139 if (ret) { 1140 if (ret == -EBUSY) 1141 continue; 1142 goto out; 1143 } 1144 1145 mutex_lock(&dmirror->mutex); 1146 if (mmu_interval_read_retry(range->notifier, 1147 range->notifier_seq)) { 1148 mutex_unlock(&dmirror->mutex); 1149 continue; 1150 } 1151 break; 1152 } 1153 1154 n = (range->end - range->start) >> PAGE_SHIFT; 1155 for (i = 0; i < n; i++) 1156 dmirror_mkentry(dmirror, range, perm + i, range->hmm_pfns[i]); 1157 1158 mutex_unlock(&dmirror->mutex); 1159 out: 1160 mmu_interval_notifier_remove(range->notifier); 1161 return ret; 1162 } 1163 1164 static int dmirror_snapshot(struct dmirror *dmirror, 1165 struct hmm_dmirror_cmd *cmd) 1166 { 1167 struct mm_struct *mm = dmirror->notifier.mm; 1168 unsigned long start, end; 1169 unsigned long size = cmd->npages << PAGE_SHIFT; 1170 unsigned long addr; 1171 unsigned long next; 1172 unsigned long pfns[64]; 1173 unsigned char perm[64]; 1174 char __user *uptr; 1175 struct hmm_range range = { 1176 .hmm_pfns = pfns, 1177 .dev_private_owner = dmirror->mdevice, 1178 }; 1179 int ret = 0; 1180 1181 start = cmd->addr; 1182 end = start + size; 1183 if (end < start) 1184 return -EINVAL; 1185 1186 /* Since the mm is for the mirrored process, get a reference first. */ 1187 if (!mmget_not_zero(mm)) 1188 return -EINVAL; 1189 1190 /* 1191 * Register a temporary notifier to detect invalidations even if it 1192 * overlaps with other mmu_interval_notifiers. 1193 */ 1194 uptr = u64_to_user_ptr(cmd->ptr); 1195 for (addr = start; addr < end; addr = next) { 1196 unsigned long n; 1197 1198 next = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end); 1199 range.start = addr; 1200 range.end = next; 1201 1202 ret = dmirror_range_snapshot(dmirror, &range, perm); 1203 if (ret) 1204 break; 1205 1206 n = (range.end - range.start) >> PAGE_SHIFT; 1207 if (copy_to_user(uptr, perm, n)) { 1208 ret = -EFAULT; 1209 break; 1210 } 1211 1212 cmd->cpages += n; 1213 uptr += n; 1214 } 1215 mmput(mm); 1216 1217 return ret; 1218 } 1219 1220 static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk) 1221 { 1222 unsigned long start_pfn = chunk->pagemap.range.start >> PAGE_SHIFT; 1223 unsigned long end_pfn = chunk->pagemap.range.end >> PAGE_SHIFT; 1224 unsigned long npages = end_pfn - start_pfn + 1; 1225 unsigned long i; 1226 unsigned long *src_pfns; 1227 unsigned long *dst_pfns; 1228 1229 src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL); 1230 dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL); 1231 1232 migrate_device_range(src_pfns, start_pfn, npages); 1233 for (i = 0; i < npages; i++) { 1234 struct page *dpage, *spage; 1235 1236 spage = migrate_pfn_to_page(src_pfns[i]); 1237 if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) 1238 continue; 1239 1240 if (WARN_ON(!is_device_private_page(spage) && 1241 !is_device_coherent_page(spage))) 1242 continue; 1243 spage = BACKING_PAGE(spage); 1244 dpage = alloc_page(GFP_HIGHUSER_MOVABLE | __GFP_NOFAIL); 1245 lock_page(dpage); 1246 copy_highpage(dpage, spage); 1247 dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)); 1248 if (src_pfns[i] & MIGRATE_PFN_WRITE) 1249 dst_pfns[i] |= MIGRATE_PFN_WRITE; 1250 } 1251 migrate_device_pages(src_pfns, dst_pfns, npages); 1252 migrate_device_finalize(src_pfns, dst_pfns, npages); 1253 kvfree(src_pfns); 1254 kvfree(dst_pfns); 1255 } 1256 1257 /* Removes free pages from the free list so they can't be re-allocated */ 1258 static void dmirror_remove_free_pages(struct dmirror_chunk *devmem) 1259 { 1260 struct dmirror_device *mdevice = devmem->mdevice; 1261 struct page *page; 1262 1263 for (page = mdevice->free_pages; page; page = page->zone_device_data) 1264 if (dmirror_page_to_chunk(page) == devmem) 1265 mdevice->free_pages = page->zone_device_data; 1266 } 1267 1268 static void dmirror_device_remove_chunks(struct dmirror_device *mdevice) 1269 { 1270 unsigned int i; 1271 1272 mutex_lock(&mdevice->devmem_lock); 1273 if (mdevice->devmem_chunks) { 1274 for (i = 0; i < mdevice->devmem_count; i++) { 1275 struct dmirror_chunk *devmem = 1276 mdevice->devmem_chunks[i]; 1277 1278 spin_lock(&mdevice->lock); 1279 devmem->remove = true; 1280 dmirror_remove_free_pages(devmem); 1281 spin_unlock(&mdevice->lock); 1282 1283 dmirror_device_evict_chunk(devmem); 1284 memunmap_pages(&devmem->pagemap); 1285 if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE) 1286 release_mem_region(devmem->pagemap.range.start, 1287 range_len(&devmem->pagemap.range)); 1288 kfree(devmem); 1289 } 1290 mdevice->devmem_count = 0; 1291 mdevice->devmem_capacity = 0; 1292 mdevice->free_pages = NULL; 1293 kfree(mdevice->devmem_chunks); 1294 mdevice->devmem_chunks = NULL; 1295 } 1296 mutex_unlock(&mdevice->devmem_lock); 1297 } 1298 1299 static long dmirror_fops_unlocked_ioctl(struct file *filp, 1300 unsigned int command, 1301 unsigned long arg) 1302 { 1303 void __user *uarg = (void __user *)arg; 1304 struct hmm_dmirror_cmd cmd; 1305 struct dmirror *dmirror; 1306 int ret; 1307 1308 dmirror = filp->private_data; 1309 if (!dmirror) 1310 return -EINVAL; 1311 1312 if (copy_from_user(&cmd, uarg, sizeof(cmd))) 1313 return -EFAULT; 1314 1315 if (cmd.addr & ~PAGE_MASK) 1316 return -EINVAL; 1317 if (cmd.addr >= (cmd.addr + (cmd.npages << PAGE_SHIFT))) 1318 return -EINVAL; 1319 1320 cmd.cpages = 0; 1321 cmd.faults = 0; 1322 1323 switch (command) { 1324 case HMM_DMIRROR_READ: 1325 ret = dmirror_read(dmirror, &cmd); 1326 break; 1327 1328 case HMM_DMIRROR_WRITE: 1329 ret = dmirror_write(dmirror, &cmd); 1330 break; 1331 1332 case HMM_DMIRROR_MIGRATE_TO_DEV: 1333 ret = dmirror_migrate_to_device(dmirror, &cmd); 1334 break; 1335 1336 case HMM_DMIRROR_MIGRATE_TO_SYS: 1337 ret = dmirror_migrate_to_system(dmirror, &cmd); 1338 break; 1339 1340 case HMM_DMIRROR_EXCLUSIVE: 1341 ret = dmirror_exclusive(dmirror, &cmd); 1342 break; 1343 1344 case HMM_DMIRROR_CHECK_EXCLUSIVE: 1345 ret = dmirror_check_atomic(dmirror, cmd.addr, 1346 cmd.addr + (cmd.npages << PAGE_SHIFT)); 1347 break; 1348 1349 case HMM_DMIRROR_SNAPSHOT: 1350 ret = dmirror_snapshot(dmirror, &cmd); 1351 break; 1352 1353 case HMM_DMIRROR_RELEASE: 1354 dmirror_device_remove_chunks(dmirror->mdevice); 1355 ret = 0; 1356 break; 1357 1358 default: 1359 return -EINVAL; 1360 } 1361 if (ret) 1362 return ret; 1363 1364 if (copy_to_user(uarg, &cmd, sizeof(cmd))) 1365 return -EFAULT; 1366 1367 return 0; 1368 } 1369 1370 static int dmirror_fops_mmap(struct file *file, struct vm_area_struct *vma) 1371 { 1372 unsigned long addr; 1373 1374 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { 1375 struct page *page; 1376 int ret; 1377 1378 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 1379 if (!page) 1380 return -ENOMEM; 1381 1382 ret = vm_insert_page(vma, addr, page); 1383 if (ret) { 1384 __free_page(page); 1385 return ret; 1386 } 1387 put_page(page); 1388 } 1389 1390 return 0; 1391 } 1392 1393 static const struct file_operations dmirror_fops = { 1394 .open = dmirror_fops_open, 1395 .release = dmirror_fops_release, 1396 .mmap = dmirror_fops_mmap, 1397 .unlocked_ioctl = dmirror_fops_unlocked_ioctl, 1398 .llseek = default_llseek, 1399 .owner = THIS_MODULE, 1400 }; 1401 1402 static void dmirror_devmem_free(struct page *page) 1403 { 1404 struct page *rpage = BACKING_PAGE(page); 1405 struct dmirror_device *mdevice; 1406 1407 if (rpage != page) 1408 __free_page(rpage); 1409 1410 mdevice = dmirror_page_to_device(page); 1411 spin_lock(&mdevice->lock); 1412 1413 /* Return page to our allocator if not freeing the chunk */ 1414 if (!dmirror_page_to_chunk(page)->remove) { 1415 mdevice->cfree++; 1416 page->zone_device_data = mdevice->free_pages; 1417 mdevice->free_pages = page; 1418 } 1419 spin_unlock(&mdevice->lock); 1420 } 1421 1422 static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) 1423 { 1424 struct migrate_vma args = { 0 }; 1425 unsigned long src_pfns = 0; 1426 unsigned long dst_pfns = 0; 1427 struct page *rpage; 1428 struct dmirror *dmirror; 1429 vm_fault_t ret; 1430 1431 /* 1432 * Normally, a device would use the page->zone_device_data to point to 1433 * the mirror but here we use it to hold the page for the simulated 1434 * device memory and that page holds the pointer to the mirror. 1435 */ 1436 rpage = vmf->page->zone_device_data; 1437 dmirror = rpage->zone_device_data; 1438 1439 /* FIXME demonstrate how we can adjust migrate range */ 1440 args.vma = vmf->vma; 1441 args.start = vmf->address; 1442 args.end = args.start + PAGE_SIZE; 1443 args.src = &src_pfns; 1444 args.dst = &dst_pfns; 1445 args.pgmap_owner = dmirror->mdevice; 1446 args.flags = dmirror_select_device(dmirror); 1447 args.fault_page = vmf->page; 1448 1449 if (migrate_vma_setup(&args)) 1450 return VM_FAULT_SIGBUS; 1451 1452 ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror); 1453 if (ret) 1454 return ret; 1455 migrate_vma_pages(&args); 1456 /* 1457 * No device finalize step is needed since 1458 * dmirror_devmem_fault_alloc_and_copy() will have already 1459 * invalidated the device page table. 1460 */ 1461 migrate_vma_finalize(&args); 1462 return 0; 1463 } 1464 1465 static const struct dev_pagemap_ops dmirror_devmem_ops = { 1466 .page_free = dmirror_devmem_free, 1467 .migrate_to_ram = dmirror_devmem_fault, 1468 }; 1469 1470 static int dmirror_device_init(struct dmirror_device *mdevice, int id) 1471 { 1472 dev_t dev; 1473 int ret; 1474 1475 dev = MKDEV(MAJOR(dmirror_dev), id); 1476 mutex_init(&mdevice->devmem_lock); 1477 spin_lock_init(&mdevice->lock); 1478 1479 cdev_init(&mdevice->cdevice, &dmirror_fops); 1480 mdevice->cdevice.owner = THIS_MODULE; 1481 device_initialize(&mdevice->device); 1482 mdevice->device.devt = dev; 1483 1484 ret = dev_set_name(&mdevice->device, "hmm_dmirror%u", id); 1485 if (ret) 1486 return ret; 1487 1488 ret = cdev_device_add(&mdevice->cdevice, &mdevice->device); 1489 if (ret) 1490 return ret; 1491 1492 /* Build a list of free ZONE_DEVICE struct pages */ 1493 return dmirror_allocate_chunk(mdevice, NULL); 1494 } 1495 1496 static void dmirror_device_remove(struct dmirror_device *mdevice) 1497 { 1498 dmirror_device_remove_chunks(mdevice); 1499 cdev_device_del(&mdevice->cdevice, &mdevice->device); 1500 } 1501 1502 static int __init hmm_dmirror_init(void) 1503 { 1504 int ret; 1505 int id = 0; 1506 int ndevices = 0; 1507 1508 ret = alloc_chrdev_region(&dmirror_dev, 0, DMIRROR_NDEVICES, 1509 "HMM_DMIRROR"); 1510 if (ret) 1511 goto err_unreg; 1512 1513 memset(dmirror_devices, 0, DMIRROR_NDEVICES * sizeof(dmirror_devices[0])); 1514 dmirror_devices[ndevices++].zone_device_type = 1515 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE; 1516 dmirror_devices[ndevices++].zone_device_type = 1517 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE; 1518 if (spm_addr_dev0 && spm_addr_dev1) { 1519 dmirror_devices[ndevices++].zone_device_type = 1520 HMM_DMIRROR_MEMORY_DEVICE_COHERENT; 1521 dmirror_devices[ndevices++].zone_device_type = 1522 HMM_DMIRROR_MEMORY_DEVICE_COHERENT; 1523 } 1524 for (id = 0; id < ndevices; id++) { 1525 ret = dmirror_device_init(dmirror_devices + id, id); 1526 if (ret) 1527 goto err_chrdev; 1528 } 1529 1530 pr_info("HMM test module loaded. This is only for testing HMM.\n"); 1531 return 0; 1532 1533 err_chrdev: 1534 while (--id >= 0) 1535 dmirror_device_remove(dmirror_devices + id); 1536 unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES); 1537 err_unreg: 1538 return ret; 1539 } 1540 1541 static void __exit hmm_dmirror_exit(void) 1542 { 1543 int id; 1544 1545 for (id = 0; id < DMIRROR_NDEVICES; id++) 1546 if (dmirror_devices[id].zone_device_type) 1547 dmirror_device_remove(dmirror_devices + id); 1548 unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES); 1549 } 1550 1551 module_init(hmm_dmirror_init); 1552 module_exit(hmm_dmirror_exit); 1553 MODULE_DESCRIPTION("HMM (Heterogeneous Memory Management) test module"); 1554 MODULE_LICENSE("GPL"); 1555
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.