~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/mm/userfaultfd.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /mm/userfaultfd.c (Version linux-6.12-rc7) and /mm/userfaultfd.c (Version linux-5.10.229)


  1 // SPDX-License-Identifier: GPL-2.0-only            1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*                                                  2 /*
  3  *  mm/userfaultfd.c                                3  *  mm/userfaultfd.c
  4  *                                                  4  *
  5  *  Copyright (C) 2015  Red Hat, Inc.               5  *  Copyright (C) 2015  Red Hat, Inc.
  6  */                                                 6  */
  7                                                     7 
  8 #include <linux/mm.h>                               8 #include <linux/mm.h>
  9 #include <linux/sched/signal.h>                     9 #include <linux/sched/signal.h>
 10 #include <linux/pagemap.h>                         10 #include <linux/pagemap.h>
 11 #include <linux/rmap.h>                            11 #include <linux/rmap.h>
 12 #include <linux/swap.h>                            12 #include <linux/swap.h>
 13 #include <linux/swapops.h>                         13 #include <linux/swapops.h>
 14 #include <linux/userfaultfd_k.h>                   14 #include <linux/userfaultfd_k.h>
 15 #include <linux/mmu_notifier.h>                    15 #include <linux/mmu_notifier.h>
 16 #include <linux/hugetlb.h>                         16 #include <linux/hugetlb.h>
 17 #include <linux/shmem_fs.h>                        17 #include <linux/shmem_fs.h>
 18 #include <asm/tlbflush.h>                          18 #include <asm/tlbflush.h>
 19 #include <asm/tlb.h>                           << 
 20 #include "internal.h"                              19 #include "internal.h"
 21                                                    20 
 22 static __always_inline                             21 static __always_inline
 23 bool validate_dst_vma(struct vm_area_struct *d !!  22 struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm,
                                                   >>  23                                     unsigned long dst_start,
                                                   >>  24                                     unsigned long len)
 24 {                                                  25 {
 25         /* Make sure that the dst range is ful !!  26         /*
 26         if (dst_end > dst_vma->vm_end)         !!  27          * Make sure that the dst range is both valid and fully within a
 27                 return false;                  !!  28          * single existing vma.
                                                   >>  29          */
                                                   >>  30         struct vm_area_struct *dst_vma;
                                                   >>  31 
                                                   >>  32         dst_vma = find_vma(dst_mm, dst_start);
                                                   >>  33         if (!dst_vma)
                                                   >>  34                 return NULL;
                                                   >>  35 
                                                   >>  36         if (dst_start < dst_vma->vm_start ||
                                                   >>  37             dst_start + len > dst_vma->vm_end)
                                                   >>  38                 return NULL;
 28                                                    39 
 29         /*                                         40         /*
 30          * Check the vma is registered in uffd     41          * Check the vma is registered in uffd, this is required to
 31          * enforce the VM_MAYWRITE check done      42          * enforce the VM_MAYWRITE check done at uffd registration
 32          * time.                                   43          * time.
 33          */                                        44          */
 34         if (!dst_vma->vm_userfaultfd_ctx.ctx)      45         if (!dst_vma->vm_userfaultfd_ctx.ctx)
 35                 return false;                  !!  46                 return NULL;
 36                                                << 
 37         return true;                           << 
 38 }                                              << 
 39                                                << 
 40 static __always_inline                         << 
 41 struct vm_area_struct *find_vma_and_prepare_an << 
 42                                                << 
 43 {                                              << 
 44         struct vm_area_struct *vma;            << 
 45                                                << 
 46         mmap_assert_locked(mm);                << 
 47         vma = vma_lookup(mm, addr);            << 
 48         if (!vma)                              << 
 49                 vma = ERR_PTR(-ENOENT);        << 
 50         else if (!(vma->vm_flags & VM_SHARED)  << 
 51                  unlikely(anon_vma_prepare(vma << 
 52                 vma = ERR_PTR(-ENOMEM);        << 
 53                                                << 
 54         return vma;                            << 
 55 }                                              << 
 56                                                << 
 57 #ifdef CONFIG_PER_VMA_LOCK                     << 
 58 /*                                             << 
 59  * uffd_lock_vma() - Lookup and lock vma corre << 
 60  * @mm: mm to search vma in.                   << 
 61  * @address: address that the vma should conta << 
 62  *                                             << 
 63  * Should be called without holding mmap_lock. << 
 64  *                                             << 
 65  * Return: A locked vma containing @address, - << 
 66  * -ENOMEM if anon_vma couldn't be allocated.  << 
 67  */                                            << 
 68 static struct vm_area_struct *uffd_lock_vma(st << 
 69                                        unsigne << 
 70 {                                              << 
 71         struct vm_area_struct *vma;            << 
 72                                                << 
 73         vma = lock_vma_under_rcu(mm, address); << 
 74         if (vma) {                             << 
 75                 /*                             << 
 76                  * We know we're going to need << 
 77                  * that early.                 << 
 78                  */                            << 
 79                 if (!(vma->vm_flags & VM_SHARE << 
 80                         vma_end_read(vma);     << 
 81                 else                           << 
 82                         return vma;            << 
 83         }                                      << 
 84                                                << 
 85         mmap_read_lock(mm);                    << 
 86         vma = find_vma_and_prepare_anon(mm, ad << 
 87         if (!IS_ERR(vma)) {                    << 
 88                 /*                             << 
 89                  * We cannot use vma_start_rea << 
 90                  * false locked (see comment i << 
 91                  * can avoid that by directly  << 
 92                  * mmap_lock, which guarantees << 
 93                  * vma for write (vma_start_wr << 
 94                  */                            << 
 95                 down_read(&vma->vm_lock->lock) << 
 96         }                                      << 
 97                                                << 
 98         mmap_read_unlock(mm);                  << 
 99         return vma;                            << 
100 }                                              << 
101                                                << 
102 static struct vm_area_struct *uffd_mfill_lock( << 
103                                                << 
104                                                << 
105 {                                              << 
106         struct vm_area_struct *dst_vma;        << 
107                                                << 
108         dst_vma = uffd_lock_vma(dst_mm, dst_st << 
109         if (IS_ERR(dst_vma) || validate_dst_vm << 
110                 return dst_vma;                << 
111                                                << 
112         vma_end_read(dst_vma);                 << 
113         return ERR_PTR(-ENOENT);               << 
114 }                                              << 
115                                                << 
116 static void uffd_mfill_unlock(struct vm_area_s << 
117 {                                              << 
118         vma_end_read(vma);                     << 
119 }                                              << 
120                                                << 
121 #else                                          << 
122                                                << 
123 static struct vm_area_struct *uffd_mfill_lock( << 
124                                                << 
125                                                << 
126 {                                              << 
127         struct vm_area_struct *dst_vma;        << 
128                                                << 
129         mmap_read_lock(dst_mm);                << 
130         dst_vma = find_vma_and_prepare_anon(ds << 
131         if (IS_ERR(dst_vma))                   << 
132                 goto out_unlock;               << 
133                                                << 
134         if (validate_dst_vma(dst_vma, dst_star << 
135                 return dst_vma;                << 
136                                                    47 
137         dst_vma = ERR_PTR(-ENOENT);            << 
138 out_unlock:                                    << 
139         mmap_read_unlock(dst_mm);              << 
140         return dst_vma;                            48         return dst_vma;
141 }                                                  49 }
142                                                    50 
143 static void uffd_mfill_unlock(struct vm_area_s !!  51 static int mcopy_atomic_pte(struct mm_struct *dst_mm,
144 {                                              !!  52                             pmd_t *dst_pmd,
145         mmap_read_unlock(vma->vm_mm);          !!  53                             struct vm_area_struct *dst_vma,
146 }                                              !!  54                             unsigned long dst_addr,
147 #endif                                         !!  55                             unsigned long src_addr,
148                                                !!  56                             struct page **pagep,
149 /* Check if dst_addr is outside of file's size !!  57                             bool wp_copy)
150 static bool mfill_file_over_size(struct vm_are << 
151                                  unsigned long << 
152 {                                              << 
153         struct inode *inode;                   << 
154         pgoff_t offset, max_off;               << 
155                                                << 
156         if (!dst_vma->vm_file)                 << 
157                 return false;                  << 
158                                                << 
159         inode = dst_vma->vm_file->f_inode;     << 
160         offset = linear_page_index(dst_vma, ds << 
161         max_off = DIV_ROUND_UP(i_size_read(ino << 
162         return offset >= max_off;              << 
163 }                                              << 
164                                                << 
165 /*                                             << 
166  * Install PTEs, to map dst_addr (within dst_v << 
167  *                                             << 
168  * This function handles both MCOPY_ATOMIC_NOR << 
169  * and anon, and for both shared and private V << 
170  */                                            << 
171 int mfill_atomic_install_pte(pmd_t *dst_pmd,   << 
172                              struct vm_area_st << 
173                              unsigned long dst << 
174                              bool newly_alloca << 
175 {                                                  58 {
176         int ret;                               << 
177         struct mm_struct *dst_mm = dst_vma->vm << 
178         pte_t _dst_pte, *dst_pte;                  59         pte_t _dst_pte, *dst_pte;
179         bool writable = dst_vma->vm_flags & VM << 
180         bool vm_shared = dst_vma->vm_flags & V << 
181         spinlock_t *ptl;                           60         spinlock_t *ptl;
182         struct folio *folio = page_folio(page) !!  61         void *page_kaddr;
183         bool page_in_cache = folio_mapping(fol << 
184                                                << 
185         _dst_pte = mk_pte(page, dst_vma->vm_pa << 
186         _dst_pte = pte_mkdirty(_dst_pte);      << 
187         if (page_in_cache && !vm_shared)       << 
188                 writable = false;              << 
189         if (writable)                          << 
190                 _dst_pte = pte_mkwrite(_dst_pt << 
191         if (flags & MFILL_ATOMIC_WP)           << 
192                 _dst_pte = pte_mkuffd_wp(_dst_ << 
193                                                << 
194         ret = -EAGAIN;                         << 
195         dst_pte = pte_offset_map_lock(dst_mm,  << 
196         if (!dst_pte)                          << 
197                 goto out;                      << 
198                                                << 
199         if (mfill_file_over_size(dst_vma, dst_ << 
200                 ret = -EFAULT;                 << 
201                 goto out_unlock;               << 
202         }                                      << 
203                                                << 
204         ret = -EEXIST;                         << 
205         /*                                     << 
206          * We allow to overwrite a pte marker: << 
207          * registered, we firstly wr-protect a << 
208          * page backing it, then access the pa << 
209          */                                    << 
210         if (!pte_none_mostly(ptep_get(dst_pte) << 
211                 goto out_unlock;               << 
212                                                << 
213         if (page_in_cache) {                   << 
214                 /* Usually, cache pages are al << 
215                 if (newly_allocated)           << 
216                         folio_add_lru(folio);  << 
217                 folio_add_file_rmap_pte(folio, << 
218         } else {                               << 
219                 folio_add_new_anon_rmap(folio, << 
220                 folio_add_lru_vma(folio, dst_v << 
221         }                                      << 
222                                                << 
223         /*                                     << 
224          * Must happen after rmap, as mm_count << 
225          * PageAnon()), which is set by __page << 
226          */                                    << 
227         inc_mm_counter(dst_mm, mm_counter(foli << 
228                                                << 
229         set_pte_at(dst_mm, dst_addr, dst_pte,  << 
230                                                << 
231         /* No need to invalidate - it was non- << 
232         update_mmu_cache(dst_vma, dst_addr, ds << 
233         ret = 0;                               << 
234 out_unlock:                                    << 
235         pte_unmap_unlock(dst_pte, ptl);        << 
236 out:                                           << 
237         return ret;                            << 
238 }                                              << 
239                                                << 
240 static int mfill_atomic_pte_copy(pmd_t *dst_pm << 
241                                  struct vm_are << 
242                                  unsigned long << 
243                                  unsigned long << 
244                                  uffd_flags_t  << 
245                                  struct folio  << 
246 {                                              << 
247         void *kaddr;                           << 
248         int ret;                                   62         int ret;
249         struct folio *folio;                   !!  63         struct page *page;
                                                   >>  64         pgoff_t offset, max_off;
                                                   >>  65         struct inode *inode;
250                                                    66 
251         if (!*foliop) {                        !!  67         if (!*pagep) {
252                 ret = -ENOMEM;                     68                 ret = -ENOMEM;
253                 folio = vma_alloc_folio(GFP_HI !!  69                 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr);
254                                         dst_ad !!  70                 if (!page)
255                 if (!folio)                    << 
256                         goto out;                  71                         goto out;
257                                                    72 
258                 kaddr = kmap_local_folio(folio !!  73                 page_kaddr = kmap_atomic(page);
259                 /*                             !!  74                 ret = copy_from_user(page_kaddr,
260                  * The read mmap_lock is held  !!  75                                      (const void __user *) src_addr,
261                  * mmap_lock being read recurs << 
262                  * possible if a writer has ta << 
263                  *                             << 
264                  * process A thread 1 takes re << 
265                  * process A thread 2 calls mm << 
266                  * process B thread 1 takes pa << 
267                  * process B thread 2 calls mm << 
268                  * process A thread 1 blocks t << 
269                  * process B thread 1 blocks t << 
270                  *                             << 
271                  * Disable page faults to prev << 
272                  * and retry the copy outside  << 
273                  */                            << 
274                 pagefault_disable();           << 
275                 ret = copy_from_user(kaddr, (c << 
276                                      PAGE_SIZE     76                                      PAGE_SIZE);
277                 pagefault_enable();            !!  77                 kunmap_atomic(page_kaddr);
278                 kunmap_local(kaddr);           << 
279                                                    78 
280                 /* fallback to copy_from_user      79                 /* fallback to copy_from_user outside mmap_lock */
281                 if (unlikely(ret)) {               80                 if (unlikely(ret)) {
282                         ret = -ENOENT;             81                         ret = -ENOENT;
283                         *foliop = folio;       !!  82                         *pagep = page;
284                         /* don't free the page     83                         /* don't free the page */
285                         goto out;                  84                         goto out;
286                 }                                  85                 }
287                                                    86 
288                 flush_dcache_folio(folio);     !!  87                 flush_dcache_page(page);
289         } else {                                   88         } else {
290                 folio = *foliop;               !!  89                 page = *pagep;
291                 *foliop = NULL;                !!  90                 *pagep = NULL;
292         }                                          91         }
293                                                    92 
294         /*                                         93         /*
295          * The memory barrier inside __folio_m !!  94          * The memory barrier inside __SetPageUptodate makes sure that
296          * preceding stores to the page conten     95          * preceding stores to the page contents become visible before
297          * the set_pte_at() write.                 96          * the set_pte_at() write.
298          */                                        97          */
299         __folio_mark_uptodate(folio);          !!  98         __SetPageUptodate(page);
300                                                    99 
301         ret = -ENOMEM;                            100         ret = -ENOMEM;
302         if (mem_cgroup_charge(folio, dst_vma-> !! 101         if (mem_cgroup_charge(page, dst_mm, GFP_KERNEL))
303                 goto out_release;              << 
304                                                << 
305         ret = mfill_atomic_install_pte(dst_pmd << 
306                                        &folio- << 
307         if (ret)                               << 
308                 goto out_release;                 102                 goto out_release;
309 out:                                           << 
310         return ret;                            << 
311 out_release:                                   << 
312         folio_put(folio);                      << 
313         goto out;                              << 
314 }                                              << 
315                                                << 
316 static int mfill_atomic_pte_zeroed_folio(pmd_t << 
317                                          struc << 
318                                          unsig << 
319 {                                              << 
320         struct folio *folio;                   << 
321         int ret = -ENOMEM;                     << 
322                                                << 
323         folio = vma_alloc_zeroed_movable_folio << 
324         if (!folio)                            << 
325                 return ret;                    << 
326                                                   103 
327         if (mem_cgroup_charge(folio, dst_vma-> !! 104         _dst_pte = pte_mkdirty(mk_pte(page, dst_vma->vm_page_prot));
328                 goto out_put;                  !! 105         if (dst_vma->vm_flags & VM_WRITE) {
329                                                !! 106                 if (wp_copy)
330         /*                                     !! 107                         _dst_pte = pte_mkuffd_wp(_dst_pte);
331          * The memory barrier inside __folio_m !! 108                 else
332          * zeroing out the folio become visibl !! 109                         _dst_pte = pte_mkwrite(_dst_pte);
333          * using set_pte_at(). See do_anonymou !! 110         }
334          */                                    << 
335         __folio_mark_uptodate(folio);          << 
336                                                << 
337         ret = mfill_atomic_install_pte(dst_pmd << 
338                                        &folio- << 
339         if (ret)                               << 
340                 goto out_put;                  << 
341                                                << 
342         return 0;                              << 
343 out_put:                                       << 
344         folio_put(folio);                      << 
345         return ret;                            << 
346 }                                              << 
347                                                << 
348 static int mfill_atomic_pte_zeropage(pmd_t *ds << 
349                                      struct vm << 
350                                      unsigned  << 
351 {                                              << 
352         pte_t _dst_pte, *dst_pte;              << 
353         spinlock_t *ptl;                       << 
354         int ret;                               << 
355                                                << 
356         if (mm_forbids_zeropage(dst_vma->vm_mm << 
357                 return mfill_atomic_pte_zeroed << 
358                                                   111 
359         _dst_pte = pte_mkspecial(pfn_pte(my_ze !! 112         dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
360                                          dst_v !! 113         if (dst_vma->vm_file) {
361         ret = -EAGAIN;                         !! 114                 /* the shmem MAP_PRIVATE case requires checking the i_size */
362         dst_pte = pte_offset_map_lock(dst_vma- !! 115                 inode = dst_vma->vm_file->f_inode;
363         if (!dst_pte)                          !! 116                 offset = linear_page_index(dst_vma, dst_addr);
364                 goto out;                      !! 117                 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
365         if (mfill_file_over_size(dst_vma, dst_ << 
366                 ret = -EFAULT;                    118                 ret = -EFAULT;
367                 goto out_unlock;               !! 119                 if (unlikely(offset >= max_off))
                                                   >> 120                         goto out_release_uncharge_unlock;
368         }                                         121         }
369         ret = -EEXIST;                            122         ret = -EEXIST;
370         if (!pte_none(ptep_get(dst_pte)))      !! 123         if (!pte_none(*dst_pte))
371                 goto out_unlock;               !! 124                 goto out_release_uncharge_unlock;
372         set_pte_at(dst_vma->vm_mm, dst_addr, d << 
373         /* No need to invalidate - it was non- << 
374         update_mmu_cache(dst_vma, dst_addr, ds << 
375         ret = 0;                               << 
376 out_unlock:                                    << 
377         pte_unmap_unlock(dst_pte, ptl);        << 
378 out:                                           << 
379         return ret;                            << 
380 }                                              << 
381                                                   125 
382 /* Handles UFFDIO_CONTINUE for all shmem VMAs  !! 126         inc_mm_counter(dst_mm, MM_ANONPAGES);
383 static int mfill_atomic_pte_continue(pmd_t *ds !! 127         page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
384                                      struct vm !! 128         lru_cache_add_inactive_or_unevictable(page, dst_vma);
385                                      unsigned  << 
386                                      uffd_flag << 
387 {                                              << 
388         struct inode *inode = file_inode(dst_v << 
389         pgoff_t pgoff = linear_page_index(dst_ << 
390         struct folio *folio;                   << 
391         struct page *page;                     << 
392         int ret;                               << 
393                                                   129 
394         ret = shmem_get_folio(inode, pgoff, 0, !! 130         set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
395         /* Our caller expects us to return -EF << 
396         if (ret == -ENOENT)                    << 
397                 ret = -EFAULT;                 << 
398         if (ret)                               << 
399                 goto out;                      << 
400         if (!folio) {                          << 
401                 ret = -EFAULT;                 << 
402                 goto out;                      << 
403         }                                      << 
404                                                << 
405         page = folio_file_page(folio, pgoff);  << 
406         if (PageHWPoison(page)) {              << 
407                 ret = -EIO;                    << 
408                 goto out_release;              << 
409         }                                      << 
410                                                   131 
411         ret = mfill_atomic_install_pte(dst_pmd !! 132         /* No need to invalidate - it was non-present before */
412                                        page, f !! 133         update_mmu_cache(dst_vma, dst_addr, dst_pte);
413         if (ret)                               << 
414                 goto out_release;              << 
415                                                   134 
416         folio_unlock(folio);                   !! 135         pte_unmap_unlock(dst_pte, ptl);
417         ret = 0;                                  136         ret = 0;
418 out:                                              137 out:
419         return ret;                               138         return ret;
                                                   >> 139 out_release_uncharge_unlock:
                                                   >> 140         pte_unmap_unlock(dst_pte, ptl);
420 out_release:                                      141 out_release:
421         folio_unlock(folio);                   !! 142         put_page(page);
422         folio_put(folio);                      << 
423         goto out;                                 143         goto out;
424 }                                                 144 }
425                                                   145 
426 /* Handles UFFDIO_POISON for all non-hugetlb V !! 146 static int mfill_zeropage_pte(struct mm_struct *dst_mm,
427 static int mfill_atomic_pte_poison(pmd_t *dst_ !! 147                               pmd_t *dst_pmd,
428                                    struct vm_a !! 148                               struct vm_area_struct *dst_vma,
429                                    unsigned lo !! 149                               unsigned long dst_addr)
430                                    uffd_flags_ << 
431 {                                                 150 {
432         int ret;                               << 
433         struct mm_struct *dst_mm = dst_vma->vm << 
434         pte_t _dst_pte, *dst_pte;                 151         pte_t _dst_pte, *dst_pte;
435         spinlock_t *ptl;                          152         spinlock_t *ptl;
                                                   >> 153         int ret;
                                                   >> 154         pgoff_t offset, max_off;
                                                   >> 155         struct inode *inode;
436                                                   156 
437         _dst_pte = make_pte_marker(PTE_MARKER_ !! 157         _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
438         ret = -EAGAIN;                         !! 158                                          dst_vma->vm_page_prot));
439         dst_pte = pte_offset_map_lock(dst_mm,     159         dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
440         if (!dst_pte)                          !! 160         if (dst_vma->vm_file) {
441                 goto out;                      !! 161                 /* the shmem MAP_PRIVATE case requires checking the i_size */
442                                                !! 162                 inode = dst_vma->vm_file->f_inode;
443         if (mfill_file_over_size(dst_vma, dst_ !! 163                 offset = linear_page_index(dst_vma, dst_addr);
                                                   >> 164                 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
444                 ret = -EFAULT;                    165                 ret = -EFAULT;
445                 goto out_unlock;               !! 166                 if (unlikely(offset >= max_off))
                                                   >> 167                         goto out_unlock;
446         }                                         168         }
447                                                << 
448         ret = -EEXIST;                            169         ret = -EEXIST;
449         /* Refuse to overwrite any PTE, even a !! 170         if (!pte_none(*dst_pte))
450         if (!pte_none(ptep_get(dst_pte)))      << 
451                 goto out_unlock;                  171                 goto out_unlock;
452                                                << 
453         set_pte_at(dst_mm, dst_addr, dst_pte,     172         set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
454                                                << 
455         /* No need to invalidate - it was non-    173         /* No need to invalidate - it was non-present before */
456         update_mmu_cache(dst_vma, dst_addr, ds    174         update_mmu_cache(dst_vma, dst_addr, dst_pte);
457         ret = 0;                                  175         ret = 0;
458 out_unlock:                                       176 out_unlock:
459         pte_unmap_unlock(dst_pte, ptl);           177         pte_unmap_unlock(dst_pte, ptl);
460 out:                                           << 
461         return ret;                               178         return ret;
462 }                                                 179 }
463                                                   180 
464 static pmd_t *mm_alloc_pmd(struct mm_struct *m    181 static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
465 {                                                 182 {
466         pgd_t *pgd;                               183         pgd_t *pgd;
467         p4d_t *p4d;                               184         p4d_t *p4d;
468         pud_t *pud;                               185         pud_t *pud;
469                                                   186 
470         pgd = pgd_offset(mm, address);            187         pgd = pgd_offset(mm, address);
471         p4d = p4d_alloc(mm, pgd, address);        188         p4d = p4d_alloc(mm, pgd, address);
472         if (!p4d)                                 189         if (!p4d)
473                 return NULL;                      190                 return NULL;
474         pud = pud_alloc(mm, p4d, address);        191         pud = pud_alloc(mm, p4d, address);
475         if (!pud)                                 192         if (!pud)
476                 return NULL;                      193                 return NULL;
477         /*                                        194         /*
478          * Note that we didn't run this becaus    195          * Note that we didn't run this because the pmd was
479          * missing, the *pmd may be already es    196          * missing, the *pmd may be already established and in
480          * turn it may also be a trans_huge_pm    197          * turn it may also be a trans_huge_pmd.
481          */                                       198          */
482         return pmd_alloc(mm, pud, address);       199         return pmd_alloc(mm, pud, address);
483 }                                                 200 }
484                                                   201 
485 #ifdef CONFIG_HUGETLB_PAGE                        202 #ifdef CONFIG_HUGETLB_PAGE
486 /*                                                203 /*
487  * mfill_atomic processing for HUGETLB vmas.   !! 204  * __mcopy_atomic processing for HUGETLB vmas.  Note that this routine is
488  * called with either vma-lock or mmap_lock he !! 205  * called with mmap_lock held, it will release mmap_lock before returning.
489  * before returning.                           << 
490  */                                               206  */
491 static __always_inline ssize_t mfill_atomic_hu !! 207 static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
492                                                << 
493                                                   208                                               struct vm_area_struct *dst_vma,
494                                                   209                                               unsigned long dst_start,
495                                                   210                                               unsigned long src_start,
496                                                   211                                               unsigned long len,
497                                                !! 212                                               bool *mmap_changing,
                                                   >> 213                                               bool zeropage)
498 {                                                 214 {
499         struct mm_struct *dst_mm = dst_vma->vm !! 215         int vm_alloc_shared = dst_vma->vm_flags & VM_SHARED;
                                                   >> 216         int vm_shared = dst_vma->vm_flags & VM_SHARED;
500         ssize_t err;                              217         ssize_t err;
501         pte_t *dst_pte;                           218         pte_t *dst_pte;
502         unsigned long src_addr, dst_addr;         219         unsigned long src_addr, dst_addr;
503         long copied;                              220         long copied;
504         struct folio *folio;                   !! 221         struct page *page;
505         unsigned long vma_hpagesize;              222         unsigned long vma_hpagesize;
506         pgoff_t idx;                              223         pgoff_t idx;
507         u32 hash;                                 224         u32 hash;
508         struct address_space *mapping;            225         struct address_space *mapping;
509                                                   226 
510         /*                                        227         /*
511          * There is no default zero huge page     228          * There is no default zero huge page for all huge page sizes as
512          * supported by hugetlb.  A PMD_SIZE h    229          * supported by hugetlb.  A PMD_SIZE huge pages may exist as used
513          * by THP.  Since we can not reliably     230          * by THP.  Since we can not reliably insert a zero page, this
514          * feature is not supported.              231          * feature is not supported.
515          */                                       232          */
516         if (uffd_flags_mode_is(flags, MFILL_AT !! 233         if (zeropage) {
517                 up_read(&ctx->map_changing_loc !! 234                 mmap_read_unlock(dst_mm);
518                 uffd_mfill_unlock(dst_vma);    << 
519                 return -EINVAL;                   235                 return -EINVAL;
520         }                                         236         }
521                                                   237 
522         src_addr = src_start;                     238         src_addr = src_start;
523         dst_addr = dst_start;                     239         dst_addr = dst_start;
524         copied = 0;                               240         copied = 0;
525         folio = NULL;                          !! 241         page = NULL;
526         vma_hpagesize = vma_kernel_pagesize(ds    242         vma_hpagesize = vma_kernel_pagesize(dst_vma);
527                                                   243 
528         /*                                        244         /*
529          * Validate alignment based on huge pa    245          * Validate alignment based on huge page size
530          */                                       246          */
531         err = -EINVAL;                            247         err = -EINVAL;
532         if (dst_start & (vma_hpagesize - 1) ||    248         if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1))
533                 goto out_unlock;                  249                 goto out_unlock;
534                                                   250 
535 retry:                                            251 retry:
536         /*                                        252         /*
537          * On routine entry dst_vma is set.  I    253          * On routine entry dst_vma is set.  If we had to drop mmap_lock and
538          * retry, dst_vma will be set to NULL     254          * retry, dst_vma will be set to NULL and we must lookup again.
539          */                                       255          */
540         if (!dst_vma) {                           256         if (!dst_vma) {
541                 dst_vma = uffd_mfill_lock(dst_ << 
542                 if (IS_ERR(dst_vma)) {         << 
543                         err = PTR_ERR(dst_vma) << 
544                         goto out;              << 
545                 }                              << 
546                                                << 
547                 err = -ENOENT;                    257                 err = -ENOENT;
548                 if (!is_vm_hugetlb_page(dst_vm !! 258                 dst_vma = find_dst_vma(dst_mm, dst_start, len);
549                         goto out_unlock_vma;   !! 259                 if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
                                                   >> 260                         goto out_unlock;
550                                                   261 
551                 err = -EINVAL;                    262                 err = -EINVAL;
552                 if (vma_hpagesize != vma_kerne    263                 if (vma_hpagesize != vma_kernel_pagesize(dst_vma))
553                         goto out_unlock_vma;   !! 264                         goto out_unlock;
554                                                   265 
555                 /*                             !! 266                 vm_shared = dst_vma->vm_flags & VM_SHARED;
556                  * If memory mappings are chan !! 267         }
557                  * operation (e.g. mremap) run !! 268 
558                  * request the user to retry l !! 269         /*
559                  */                            !! 270          * If not shared, ensure the dst_vma has a anon_vma.
560                 down_read(&ctx->map_changing_l !! 271          */
561                 err = -EAGAIN;                 !! 272         err = -ENOMEM;
562                 if (atomic_read(&ctx->mmap_cha !! 273         if (!vm_shared) {
                                                   >> 274                 if (unlikely(anon_vma_prepare(dst_vma)))
563                         goto out_unlock;          275                         goto out_unlock;
564         }                                         276         }
565                                                   277 
566         while (src_addr < src_start + len) {      278         while (src_addr < src_start + len) {
                                                   >> 279                 pte_t dst_pteval;
                                                   >> 280 
567                 BUG_ON(dst_addr >= dst_start +    281                 BUG_ON(dst_addr >= dst_start + len);
568                                                   282 
569                 /*                                283                 /*
570                  * Serialize via vma_lock and  !! 284                  * Serialize via i_mmap_rwsem and hugetlb_fault_mutex.
571                  * vma_lock ensures the dst_pt !! 285                  * i_mmap_rwsem ensures the dst_pte remains valid even
572                  * in the case of shared pmds.    286                  * in the case of shared pmds.  fault mutex prevents
573                  * races with other faulting t    287                  * races with other faulting threads.
574                  */                               288                  */
575                 idx = linear_page_index(dst_vm << 
576                 mapping = dst_vma->vm_file->f_    289                 mapping = dst_vma->vm_file->f_mapping;
                                                   >> 290                 i_mmap_lock_read(mapping);
                                                   >> 291                 idx = linear_page_index(dst_vma, dst_addr);
577                 hash = hugetlb_fault_mutex_has    292                 hash = hugetlb_fault_mutex_hash(mapping, idx);
578                 mutex_lock(&hugetlb_fault_mute    293                 mutex_lock(&hugetlb_fault_mutex_table[hash]);
579                 hugetlb_vma_lock_read(dst_vma) << 
580                                                   294 
581                 err = -ENOMEM;                    295                 err = -ENOMEM;
582                 dst_pte = huge_pte_alloc(dst_m !! 296                 dst_pte = huge_pte_alloc(dst_mm, dst_addr, vma_hpagesize);
583                 if (!dst_pte) {                   297                 if (!dst_pte) {
584                         hugetlb_vma_unlock_rea << 
585                         mutex_unlock(&hugetlb_    298                         mutex_unlock(&hugetlb_fault_mutex_table[hash]);
                                                   >> 299                         i_mmap_unlock_read(mapping);
586                         goto out_unlock;          300                         goto out_unlock;
587                 }                                 301                 }
588                                                   302 
589                 if (!uffd_flags_mode_is(flags, !! 303                 err = -EEXIST;
590                     !huge_pte_none_mostly(huge !! 304                 dst_pteval = huge_ptep_get(dst_pte);
591                         err = -EEXIST;         !! 305                 if (!huge_pte_none(dst_pteval)) {
592                         hugetlb_vma_unlock_rea << 
593                         mutex_unlock(&hugetlb_    306                         mutex_unlock(&hugetlb_fault_mutex_table[hash]);
                                                   >> 307                         i_mmap_unlock_read(mapping);
594                         goto out_unlock;          308                         goto out_unlock;
595                 }                                 309                 }
596                                                   310 
597                 err = hugetlb_mfill_atomic_pte !! 311                 err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
598                                                !! 312                                                 dst_addr, src_addr, &page);
599                                                   313 
600                 hugetlb_vma_unlock_read(dst_vm << 
601                 mutex_unlock(&hugetlb_fault_mu    314                 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
                                                   >> 315                 i_mmap_unlock_read(mapping);
                                                   >> 316                 vm_alloc_shared = vm_shared;
602                                                   317 
603                 cond_resched();                   318                 cond_resched();
604                                                   319 
605                 if (unlikely(err == -ENOENT))     320                 if (unlikely(err == -ENOENT)) {
606                         up_read(&ctx->map_chan !! 321                         mmap_read_unlock(dst_mm);
607                         uffd_mfill_unlock(dst_ !! 322                         BUG_ON(!page);
608                         BUG_ON(!folio);        << 
609                                                   323 
610                         err = copy_folio_from_ !! 324                         err = copy_huge_page_from_user(page,
611                                                !! 325                                                 (const void __user *)src_addr,
                                                   >> 326                                                 vma_hpagesize / PAGE_SIZE,
                                                   >> 327                                                 true);
612                         if (unlikely(err)) {      328                         if (unlikely(err)) {
613                                 err = -EFAULT;    329                                 err = -EFAULT;
614                                 goto out;         330                                 goto out;
615                         }                         331                         }
                                                   >> 332                         mmap_read_lock(dst_mm);
                                                   >> 333                         /*
                                                   >> 334                          * If memory mappings are changing because of non-cooperative
                                                   >> 335                          * operation (e.g. mremap) running in parallel, bail out and
                                                   >> 336                          * request the user to retry later
                                                   >> 337                          */
                                                   >> 338                         if (mmap_changing && READ_ONCE(*mmap_changing)) {
                                                   >> 339                                 err = -EAGAIN;
                                                   >> 340                                 break;
                                                   >> 341                         }
616                                                   342 
617                         dst_vma = NULL;           343                         dst_vma = NULL;
618                         goto retry;               344                         goto retry;
619                 } else                            345                 } else
620                         BUG_ON(folio);         !! 346                         BUG_ON(page);
621                                                   347 
622                 if (!err) {                       348                 if (!err) {
623                         dst_addr += vma_hpages    349                         dst_addr += vma_hpagesize;
624                         src_addr += vma_hpages    350                         src_addr += vma_hpagesize;
625                         copied += vma_hpagesiz    351                         copied += vma_hpagesize;
626                                                   352 
627                         if (fatal_signal_pendi    353                         if (fatal_signal_pending(current))
628                                 err = -EINTR;     354                                 err = -EINTR;
629                 }                                 355                 }
630                 if (err)                          356                 if (err)
631                         break;                    357                         break;
632         }                                         358         }
633                                                   359 
634 out_unlock:                                       360 out_unlock:
635         up_read(&ctx->map_changing_lock);      !! 361         mmap_read_unlock(dst_mm);
636 out_unlock_vma:                                << 
637         uffd_mfill_unlock(dst_vma);            << 
638 out:                                              362 out:
639         if (folio)                             !! 363         if (page) {
640                 folio_put(folio);              !! 364                 /*
                                                   >> 365                  * We encountered an error and are about to free a newly
                                                   >> 366                  * allocated huge page.
                                                   >> 367                  *
                                                   >> 368                  * Reservation handling is very subtle, and is different for
                                                   >> 369                  * private and shared mappings.  See the routine
                                                   >> 370                  * restore_reserve_on_error for details.  Unfortunately, we
                                                   >> 371                  * can not call restore_reserve_on_error now as it would
                                                   >> 372                  * require holding mmap_lock.
                                                   >> 373                  *
                                                   >> 374                  * If a reservation for the page existed in the reservation
                                                   >> 375                  * map of a private mapping, the map was modified to indicate
                                                   >> 376                  * the reservation was consumed when the page was allocated.
                                                   >> 377                  * We clear the PagePrivate flag now so that the global
                                                   >> 378                  * reserve count will not be incremented in free_huge_page.
                                                   >> 379                  * The reservation map will still indicate the reservation
                                                   >> 380                  * was consumed and possibly prevent later page allocation.
                                                   >> 381                  * This is better than leaking a global reservation.  If no
                                                   >> 382                  * reservation existed, it is still safe to clear PagePrivate
                                                   >> 383                  * as no adjustments to reservation counts were made during
                                                   >> 384                  * allocation.
                                                   >> 385                  *
                                                   >> 386                  * The reservation map for shared mappings indicates which
                                                   >> 387                  * pages have reservations.  When a huge page is allocated
                                                   >> 388                  * for an address with a reservation, no change is made to
                                                   >> 389                  * the reserve map.  In this case PagePrivate will be set
                                                   >> 390                  * to indicate that the global reservation count should be
                                                   >> 391                  * incremented when the page is freed.  This is the desired
                                                   >> 392                  * behavior.  However, when a huge page is allocated for an
                                                   >> 393                  * address without a reservation a reservation entry is added
                                                   >> 394                  * to the reservation map, and PagePrivate will not be set.
                                                   >> 395                  * When the page is freed, the global reserve count will NOT
                                                   >> 396                  * be incremented and it will appear as though we have leaked
                                                   >> 397                  * reserved page.  In this case, set PagePrivate so that the
                                                   >> 398                  * global reserve count will be incremented to match the
                                                   >> 399                  * reservation map entry which was created.
                                                   >> 400                  *
                                                   >> 401                  * Note that vm_alloc_shared is based on the flags of the vma
                                                   >> 402                  * for which the page was originally allocated.  dst_vma could
                                                   >> 403                  * be different or NULL on error.
                                                   >> 404                  */
                                                   >> 405                 if (vm_alloc_shared)
                                                   >> 406                         SetPagePrivate(page);
                                                   >> 407                 else
                                                   >> 408                         ClearPagePrivate(page);
                                                   >> 409                 put_page(page);
                                                   >> 410         }
641         BUG_ON(copied < 0);                       411         BUG_ON(copied < 0);
642         BUG_ON(err > 0);                          412         BUG_ON(err > 0);
643         BUG_ON(!copied && !err);                  413         BUG_ON(!copied && !err);
644         return copied ? copied : err;             414         return copied ? copied : err;
645 }                                                 415 }
646 #else /* !CONFIG_HUGETLB_PAGE */                  416 #else /* !CONFIG_HUGETLB_PAGE */
647 /* fail at build time if gcc attempts to use t    417 /* fail at build time if gcc attempts to use this */
648 extern ssize_t mfill_atomic_hugetlb(struct use !! 418 extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
649                                     struct vm_ !! 419                                       struct vm_area_struct *dst_vma,
650                                     unsigned l !! 420                                       unsigned long dst_start,
651                                     unsigned l !! 421                                       unsigned long src_start,
652                                     unsigned l !! 422                                       unsigned long len,
653                                     uffd_flags !! 423                                       bool *mmap_changing,
                                                   >> 424                                       bool zeropage);
654 #endif /* CONFIG_HUGETLB_PAGE */                  425 #endif /* CONFIG_HUGETLB_PAGE */
655                                                   426 
656 static __always_inline ssize_t mfill_atomic_pt !! 427 static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
                                                   >> 428                                                 pmd_t *dst_pmd,
657                                                   429                                                 struct vm_area_struct *dst_vma,
658                                                   430                                                 unsigned long dst_addr,
659                                                   431                                                 unsigned long src_addr,
660                                                !! 432                                                 struct page **page,
661                                                !! 433                                                 bool zeropage,
                                                   >> 434                                                 bool wp_copy)
662 {                                                 435 {
663         ssize_t err;                              436         ssize_t err;
664                                                   437 
665         if (uffd_flags_mode_is(flags, MFILL_AT << 
666                 return mfill_atomic_pte_contin << 
667                                                << 
668         } else if (uffd_flags_mode_is(flags, M << 
669                 return mfill_atomic_pte_poison << 
670                                                << 
671         }                                      << 
672                                                << 
673         /*                                        438         /*
674          * The normal page fault path for a sh    439          * The normal page fault path for a shmem will invoke the
675          * fault, fill the hole in the file an    440          * fault, fill the hole in the file and COW it right away. The
676          * result generates plain anonymous me    441          * result generates plain anonymous memory. So when we are
677          * asked to fill an hole in a MAP_PRIV    442          * asked to fill an hole in a MAP_PRIVATE shmem mapping, we'll
678          * generate anonymous memory directly     443          * generate anonymous memory directly without actually filling
679          * the hole. For the MAP_PRIVATE case     444          * the hole. For the MAP_PRIVATE case the robustness check
680          * only happens in the pagetable (to v    445          * only happens in the pagetable (to verify it's still none)
681          * and not in the radix tree.             446          * and not in the radix tree.
682          */                                       447          */
683         if (!(dst_vma->vm_flags & VM_SHARED))     448         if (!(dst_vma->vm_flags & VM_SHARED)) {
684                 if (uffd_flags_mode_is(flags,  !! 449                 if (!zeropage)
685                         err = mfill_atomic_pte !! 450                         err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
686                                                !! 451                                                dst_addr, src_addr, page,
687                                                !! 452                                                wp_copy);
688                 else                              453                 else
689                         err = mfill_atomic_pte !! 454                         err = mfill_zeropage_pte(dst_mm, dst_pmd,
690                                                   455                                                  dst_vma, dst_addr);
691         } else {                                  456         } else {
692                 err = shmem_mfill_atomic_pte(d !! 457                 VM_WARN_ON_ONCE(wp_copy);
693                                              d !! 458                 if (!zeropage)
694                                              f !! 459                         err = shmem_mcopy_atomic_pte(dst_mm, dst_pmd,
                                                   >> 460                                                      dst_vma, dst_addr,
                                                   >> 461                                                      src_addr, page);
                                                   >> 462                 else
                                                   >> 463                         err = shmem_mfill_zeropage_pte(dst_mm, dst_pmd,
                                                   >> 464                                                        dst_vma, dst_addr);
695         }                                         465         }
696                                                   466 
697         return err;                               467         return err;
698 }                                                 468 }
699                                                   469 
700 static __always_inline ssize_t mfill_atomic(st !! 470 static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
701                                             un !! 471                                               unsigned long dst_start,
702                                             un !! 472                                               unsigned long src_start,
703                                             un !! 473                                               unsigned long len,
704                                             uf !! 474                                               bool zeropage,
                                                   >> 475                                               bool *mmap_changing,
                                                   >> 476                                               __u64 mode)
705 {                                                 477 {
706         struct mm_struct *dst_mm = ctx->mm;    << 
707         struct vm_area_struct *dst_vma;           478         struct vm_area_struct *dst_vma;
708         ssize_t err;                              479         ssize_t err;
709         pmd_t *dst_pmd;                           480         pmd_t *dst_pmd;
710         unsigned long src_addr, dst_addr;         481         unsigned long src_addr, dst_addr;
711         long copied;                              482         long copied;
712         struct folio *folio;                   !! 483         struct page *page;
                                                   >> 484         bool wp_copy;
713                                                   485 
714         /*                                        486         /*
715          * Sanitize the command parameters:       487          * Sanitize the command parameters:
716          */                                       488          */
717         BUG_ON(dst_start & ~PAGE_MASK);           489         BUG_ON(dst_start & ~PAGE_MASK);
718         BUG_ON(len & ~PAGE_MASK);                 490         BUG_ON(len & ~PAGE_MASK);
719                                                   491 
720         /* Does the address range wrap, or is     492         /* Does the address range wrap, or is the span zero-sized? */
721         BUG_ON(src_start + len <= src_start);     493         BUG_ON(src_start + len <= src_start);
722         BUG_ON(dst_start + len <= dst_start);     494         BUG_ON(dst_start + len <= dst_start);
723                                                   495 
724         src_addr = src_start;                     496         src_addr = src_start;
725         dst_addr = dst_start;                     497         dst_addr = dst_start;
726         copied = 0;                               498         copied = 0;
727         folio = NULL;                          !! 499         page = NULL;
728 retry:                                            500 retry:
729         /*                                     !! 501         mmap_read_lock(dst_mm);
730          * Make sure the vma is not shared, th << 
731          * both valid and fully within a singl << 
732          */                                    << 
733         dst_vma = uffd_mfill_lock(dst_mm, dst_ << 
734         if (IS_ERR(dst_vma)) {                 << 
735                 err = PTR_ERR(dst_vma);        << 
736                 goto out;                      << 
737         }                                      << 
738                                                   502 
739         /*                                        503         /*
740          * If memory mappings are changing bec    504          * If memory mappings are changing because of non-cooperative
741          * operation (e.g. mremap) running in     505          * operation (e.g. mremap) running in parallel, bail out and
742          * request the user to retry later        506          * request the user to retry later
743          */                                       507          */
744         down_read(&ctx->map_changing_lock);    << 
745         err = -EAGAIN;                            508         err = -EAGAIN;
746         if (atomic_read(&ctx->mmap_changing))  !! 509         if (mmap_changing && READ_ONCE(*mmap_changing))
                                                   >> 510                 goto out_unlock;
                                                   >> 511 
                                                   >> 512         /*
                                                   >> 513          * Make sure the vma is not shared, that the dst range is
                                                   >> 514          * both valid and fully within a single existing vma.
                                                   >> 515          */
                                                   >> 516         err = -ENOENT;
                                                   >> 517         dst_vma = find_dst_vma(dst_mm, dst_start, len);
                                                   >> 518         if (!dst_vma)
747                 goto out_unlock;                  519                 goto out_unlock;
748                                                   520 
749         err = -EINVAL;                            521         err = -EINVAL;
750         /*                                        522         /*
751          * shmem_zero_setup is invoked in mmap    523          * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but
752          * it will overwrite vm_ops, so vma_is    524          * it will overwrite vm_ops, so vma_is_anonymous must return false.
753          */                                       525          */
754         if (WARN_ON_ONCE(vma_is_anonymous(dst_    526         if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) &&
755             dst_vma->vm_flags & VM_SHARED))       527             dst_vma->vm_flags & VM_SHARED))
756                 goto out_unlock;                  528                 goto out_unlock;
757                                                   529 
758         /*                                        530         /*
759          * validate 'mode' now that we know th    531          * validate 'mode' now that we know the dst_vma: don't allow
760          * a wrprotect copy if the userfaultfd    532          * a wrprotect copy if the userfaultfd didn't register as WP.
761          */                                       533          */
762         if ((flags & MFILL_ATOMIC_WP) && !(dst !! 534         wp_copy = mode & UFFDIO_COPY_MODE_WP;
                                                   >> 535         if (wp_copy && !(dst_vma->vm_flags & VM_UFFD_WP))
763                 goto out_unlock;                  536                 goto out_unlock;
764                                                   537 
765         /*                                        538         /*
766          * If this is a HUGETLB vma, pass off     539          * If this is a HUGETLB vma, pass off to appropriate routine
767          */                                       540          */
768         if (is_vm_hugetlb_page(dst_vma))          541         if (is_vm_hugetlb_page(dst_vma))
769                 return  mfill_atomic_hugetlb(c !! 542                 return  __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
770                                              s !! 543                                                src_start, len, mmap_changing,
                                                   >> 544                                                zeropage);
771                                                   545 
772         if (!vma_is_anonymous(dst_vma) && !vma    546         if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
773                 goto out_unlock;                  547                 goto out_unlock;
774         if (!vma_is_shmem(dst_vma) &&          !! 548 
775             uffd_flags_mode_is(flags, MFILL_AT !! 549         /*
                                                   >> 550          * Ensure the dst_vma has a anon_vma or this page
                                                   >> 551          * would get a NULL anon_vma when moved in the
                                                   >> 552          * dst_vma.
                                                   >> 553          */
                                                   >> 554         err = -ENOMEM;
                                                   >> 555         if (!(dst_vma->vm_flags & VM_SHARED) &&
                                                   >> 556             unlikely(anon_vma_prepare(dst_vma)))
776                 goto out_unlock;                  557                 goto out_unlock;
777                                                   558 
778         while (src_addr < src_start + len) {      559         while (src_addr < src_start + len) {
779                 pmd_t dst_pmdval;                 560                 pmd_t dst_pmdval;
780                                                   561 
781                 BUG_ON(dst_addr >= dst_start +    562                 BUG_ON(dst_addr >= dst_start + len);
782                                                   563 
783                 dst_pmd = mm_alloc_pmd(dst_mm,    564                 dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
784                 if (unlikely(!dst_pmd)) {         565                 if (unlikely(!dst_pmd)) {
785                         err = -ENOMEM;            566                         err = -ENOMEM;
786                         break;                    567                         break;
787                 }                                 568                 }
788                                                   569 
789                 dst_pmdval = pmdp_get_lockless !! 570                 dst_pmdval = pmd_read_atomic(dst_pmd);
790                 if (unlikely(pmd_none(dst_pmdv << 
791                     unlikely(__pte_alloc(dst_m << 
792                         err = -ENOMEM;         << 
793                         break;                 << 
794                 }                              << 
795                 dst_pmdval = pmdp_get_lockless << 
796                 /*                                571                 /*
797                  * If the dst_pmd is THP don't !! 572                  * If the dst_pmd is mapped as THP don't
798                  * (This includes the case whe !! 573                  * override it and just be strict.
799                  * changed back to none after  << 
800                  */                               574                  */
801                 if (unlikely(!pmd_present(dst_ !! 575                 if (unlikely(pmd_trans_huge(dst_pmdval))) {
802                              pmd_devmap(dst_pm << 
803                         err = -EEXIST;            576                         err = -EEXIST;
804                         break;                    577                         break;
805                 }                                 578                 }
806                 if (unlikely(pmd_bad(dst_pmdva !! 579                 if (unlikely(pmd_none(dst_pmdval)) &&
                                                   >> 580                     unlikely(__pte_alloc(dst_mm, dst_pmd))) {
                                                   >> 581                         err = -ENOMEM;
                                                   >> 582                         break;
                                                   >> 583                 }
                                                   >> 584                 /* If an huge pmd materialized from under us fail */
                                                   >> 585                 if (unlikely(pmd_trans_huge(*dst_pmd))) {
807                         err = -EFAULT;            586                         err = -EFAULT;
808                         break;                    587                         break;
809                 }                                 588                 }
810                 /*                             << 
811                  * For shmem mappings, khugepa << 
812                  * tables under us; pte_offset << 
813                  */                            << 
814                                                   589 
815                 err = mfill_atomic_pte(dst_pmd !! 590                 BUG_ON(pmd_none(*dst_pmd));
816                                        src_add !! 591                 BUG_ON(pmd_trans_huge(*dst_pmd));
                                                   >> 592 
                                                   >> 593                 err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
                                                   >> 594                                        src_addr, &page, zeropage, wp_copy);
817                 cond_resched();                   595                 cond_resched();
818                                                   596 
819                 if (unlikely(err == -ENOENT))     597                 if (unlikely(err == -ENOENT)) {
820                         void *kaddr;           !! 598                         void *page_kaddr;
821                                                   599 
822                         up_read(&ctx->map_chan !! 600                         mmap_read_unlock(dst_mm);
823                         uffd_mfill_unlock(dst_ !! 601                         BUG_ON(!page);
824                         BUG_ON(!folio);        << 
825                                                   602 
826                         kaddr = kmap_local_fol !! 603                         page_kaddr = kmap(page);
827                         err = copy_from_user(k !! 604                         err = copy_from_user(page_kaddr,
828                                              (    605                                              (const void __user *) src_addr,
829                                              P    606                                              PAGE_SIZE);
830                         kunmap_local(kaddr);   !! 607                         kunmap(page);
831                         if (unlikely(err)) {      608                         if (unlikely(err)) {
832                                 err = -EFAULT;    609                                 err = -EFAULT;
833                                 goto out;         610                                 goto out;
834                         }                         611                         }
835                         flush_dcache_folio(fol !! 612                         flush_dcache_page(page);
836                         goto retry;               613                         goto retry;
837                 } else                            614                 } else
838                         BUG_ON(folio);         !! 615                         BUG_ON(page);
839                                                   616 
840                 if (!err) {                       617                 if (!err) {
841                         dst_addr += PAGE_SIZE;    618                         dst_addr += PAGE_SIZE;
842                         src_addr += PAGE_SIZE;    619                         src_addr += PAGE_SIZE;
843                         copied += PAGE_SIZE;      620                         copied += PAGE_SIZE;
844                                                   621 
845                         if (fatal_signal_pendi    622                         if (fatal_signal_pending(current))
846                                 err = -EINTR;     623                                 err = -EINTR;
847                 }                                 624                 }
848                 if (err)                          625                 if (err)
849                         break;                    626                         break;
850         }                                         627         }
851                                                   628 
852 out_unlock:                                       629 out_unlock:
853         up_read(&ctx->map_changing_lock);      !! 630         mmap_read_unlock(dst_mm);
854         uffd_mfill_unlock(dst_vma);            << 
855 out:                                              631 out:
856         if (folio)                             !! 632         if (page)
857                 folio_put(folio);              !! 633                 put_page(page);
858         BUG_ON(copied < 0);                       634         BUG_ON(copied < 0);
859         BUG_ON(err > 0);                          635         BUG_ON(err > 0);
860         BUG_ON(!copied && !err);                  636         BUG_ON(!copied && !err);
861         return copied ? copied : err;             637         return copied ? copied : err;
862 }                                                 638 }
863                                                   639 
864 ssize_t mfill_atomic_copy(struct userfaultfd_c !! 640 ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
865                           unsigned long src_st !! 641                      unsigned long src_start, unsigned long len,
866                           uffd_flags_t flags)  !! 642                      bool *mmap_changing, __u64 mode)
867 {                                              << 
868         return mfill_atomic(ctx, dst_start, sr << 
869                             uffd_flags_set_mod << 
870 }                                              << 
871                                                << 
872 ssize_t mfill_atomic_zeropage(struct userfault << 
873                               unsigned long st << 
874                               unsigned long le << 
875 {                                              << 
876         return mfill_atomic(ctx, start, 0, len << 
877                             uffd_flags_set_mod << 
878 }                                              << 
879                                                << 
880 ssize_t mfill_atomic_continue(struct userfault << 
881                               unsigned long le << 
882 {                                              << 
883                                                << 
884         /*                                     << 
885          * A caller might reasonably assume th << 
886          * smp_wmb() to ensure that any writes << 
887          * the thread doing the UFFDIO_CONTINU << 
888          * subsequent loads from the page thro << 
889          */                                    << 
890         smp_wmb();                             << 
891                                                << 
892         return mfill_atomic(ctx, start, 0, len << 
893                             uffd_flags_set_mod << 
894 }                                              << 
895                                                << 
896 ssize_t mfill_atomic_poison(struct userfaultfd << 
897                             unsigned long len, << 
898 {                                                 643 {
899         return mfill_atomic(ctx, start, 0, len !! 644         return __mcopy_atomic(dst_mm, dst_start, src_start, len, false,
900                             uffd_flags_set_mod !! 645                               mmap_changing, mode);
901 }                                                 646 }
902                                                   647 
903 long uffd_wp_range(struct vm_area_struct *dst_ !! 648 ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
904                    unsigned long start, unsign !! 649                        unsigned long len, bool *mmap_changing)
905 {                                                 650 {
906         unsigned int mm_cp_flags;              !! 651         return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing, 0);
907         struct mmu_gather tlb;                 << 
908         long ret;                              << 
909                                                << 
910         VM_WARN_ONCE(start < dst_vma->vm_start << 
911                         "The address range exc << 
912         if (enable_wp)                         << 
913                 mm_cp_flags = MM_CP_UFFD_WP;   << 
914         else                                   << 
915                 mm_cp_flags = MM_CP_UFFD_WP_RE << 
916                                                << 
917         /*                                     << 
918          * vma->vm_page_prot already reflects  << 
919          * VMA (see userfaultfd_set_vm_flags() << 
920          * to be write-protected as default wh << 
921          * Try upgrading write permissions man << 
922          */                                    << 
923         if (!enable_wp && vma_wants_manual_pte << 
924                 mm_cp_flags |= MM_CP_TRY_CHANG << 
925         tlb_gather_mmu(&tlb, dst_vma->vm_mm);  << 
926         ret = change_protection(&tlb, dst_vma, << 
927         tlb_finish_mmu(&tlb);                  << 
928                                                << 
929         return ret;                            << 
930 }                                                 652 }
931                                                   653 
932 int mwriteprotect_range(struct userfaultfd_ctx !! 654 int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
933                         unsigned long len, boo !! 655                         unsigned long len, bool enable_wp, bool *mmap_changing)
934 {                                                 656 {
935         struct mm_struct *dst_mm = ctx->mm;    << 
936         unsigned long end = start + len;       << 
937         unsigned long _start, _end;            << 
938         struct vm_area_struct *dst_vma;           657         struct vm_area_struct *dst_vma;
939         unsigned long page_mask;               !! 658         pgprot_t newprot;
940         long err;                              !! 659         int err;
941         VMA_ITERATOR(vmi, dst_mm, start);      << 
942                                                   660 
943         /*                                        661         /*
944          * Sanitize the command parameters:       662          * Sanitize the command parameters:
945          */                                       663          */
946         BUG_ON(start & ~PAGE_MASK);               664         BUG_ON(start & ~PAGE_MASK);
947         BUG_ON(len & ~PAGE_MASK);                 665         BUG_ON(len & ~PAGE_MASK);
948                                                   666 
949         /* Does the address range wrap, or is     667         /* Does the address range wrap, or is the span zero-sized? */
950         BUG_ON(start + len <= start);             668         BUG_ON(start + len <= start);
951                                                   669 
952         mmap_read_lock(dst_mm);                   670         mmap_read_lock(dst_mm);
953                                                   671 
954         /*                                        672         /*
955          * If memory mappings are changing bec    673          * If memory mappings are changing because of non-cooperative
956          * operation (e.g. mremap) running in     674          * operation (e.g. mremap) running in parallel, bail out and
957          * request the user to retry later        675          * request the user to retry later
958          */                                       676          */
959         down_read(&ctx->map_changing_lock);    << 
960         err = -EAGAIN;                            677         err = -EAGAIN;
961         if (atomic_read(&ctx->mmap_changing))  !! 678         if (mmap_changing && READ_ONCE(*mmap_changing))
962                 goto out_unlock;                  679                 goto out_unlock;
963                                                   680 
964         err = -ENOENT;                            681         err = -ENOENT;
965         for_each_vma_range(vmi, dst_vma, end)  !! 682         dst_vma = find_dst_vma(dst_mm, start, len);
966                                                << 
967                 if (!userfaultfd_wp(dst_vma))  << 
968                         err = -ENOENT;         << 
969                         break;                 << 
970                 }                              << 
971                                                << 
972                 if (is_vm_hugetlb_page(dst_vma << 
973                         err = -EINVAL;         << 
974                         page_mask = vma_kernel << 
975                         if ((start & page_mask << 
976                                 break;         << 
977                 }                              << 
978                                                << 
979                 _start = max(dst_vma->vm_start << 
980                 _end = min(dst_vma->vm_end, en << 
981                                                << 
982                 err = uffd_wp_range(dst_vma, _ << 
983                                                << 
984                 /* Return 0 on success, <0 on  << 
985                 if (err < 0)                   << 
986                         break;                 << 
987                 err = 0;                       << 
988         }                                      << 
989 out_unlock:                                    << 
990         up_read(&ctx->map_changing_lock);      << 
991         mmap_read_unlock(dst_mm);              << 
992         return err;                            << 
993 }                                              << 
994                                                << 
995                                                << 
996 void double_pt_lock(spinlock_t *ptl1,          << 
997                     spinlock_t *ptl2)          << 
998         __acquires(ptl1)                       << 
999         __acquires(ptl2)                       << 
1000 {                                             << 
1001         if (ptl1 > ptl2)                      << 
1002                 swap(ptl1, ptl2);             << 
1003         /* lock in virtual address order to a << 
1004         spin_lock(ptl1);                      << 
1005         if (ptl1 != ptl2)                     << 
1006                 spin_lock_nested(ptl2, SINGLE << 
1007         else                                  << 
1008                 __acquire(ptl2);              << 
1009 }                                             << 
1010                                               << 
1011 void double_pt_unlock(spinlock_t *ptl1,       << 
1012                       spinlock_t *ptl2)       << 
1013         __releases(ptl1)                      << 
1014         __releases(ptl2)                      << 
1015 {                                             << 
1016         spin_unlock(ptl1);                    << 
1017         if (ptl1 != ptl2)                     << 
1018                 spin_unlock(ptl2);            << 
1019         else                                  << 
1020                 __release(ptl2);              << 
1021 }                                             << 
1022                                               << 
1023                                               << 
1024 static int move_present_pte(struct mm_struct  << 
1025                             struct vm_area_st << 
1026                             struct vm_area_st << 
1027                             unsigned long dst << 
1028                             pte_t *dst_pte, p << 
1029                             pte_t orig_dst_pt << 
1030                             spinlock_t *dst_p << 
1031                             struct folio *src << 
1032 {                                             << 
1033         int err = 0;                          << 
1034                                               << 
1035         double_pt_lock(dst_ptl, src_ptl);     << 
1036                                               << 
1037         if (!pte_same(ptep_get(src_pte), orig << 
1038             !pte_same(ptep_get(dst_pte), orig << 
1039                 err = -EAGAIN;                << 
1040                 goto out;                     << 
1041         }                                     << 
1042         if (folio_test_large(src_folio) ||    << 
1043             folio_maybe_dma_pinned(src_folio) << 
1044             !PageAnonExclusive(&src_folio->pa << 
1045                 err = -EBUSY;                 << 
1046                 goto out;                     << 
1047         }                                     << 
1048                                               << 
1049         orig_src_pte = ptep_clear_flush(src_v << 
1050         /* Folio got pinned from under us. Pu << 
1051         if (folio_maybe_dma_pinned(src_folio) << 
1052                 set_pte_at(mm, src_addr, src_ << 
1053                 err = -EBUSY;                 << 
1054                 goto out;                     << 
1055         }                                     << 
1056                                               << 
1057         folio_move_anon_rmap(src_folio, dst_v << 
1058         src_folio->index = linear_page_index( << 
1059                                               << 
1060         orig_dst_pte = mk_pte(&src_folio->pag << 
1061         /* Follow mremap() behavior and treat << 
1062         orig_dst_pte = pte_mkwrite(pte_mkdirt << 
1063                                               << 
1064         set_pte_at(mm, dst_addr, dst_pte, ori << 
1065 out:                                          << 
1066         double_pt_unlock(dst_ptl, src_ptl);   << 
1067         return err;                           << 
1068 }                                             << 
1069                                               << 
1070 static int move_swap_pte(struct mm_struct *mm << 
1071                          unsigned long dst_ad << 
1072                          pte_t *dst_pte, pte_ << 
1073                          pte_t orig_dst_pte,  << 
1074                          spinlock_t *dst_ptl, << 
1075 {                                             << 
1076         if (!pte_swp_exclusive(orig_src_pte)) << 
1077                 return -EBUSY;                << 
1078                                               << 
1079         double_pt_lock(dst_ptl, src_ptl);     << 
1080                                               << 
1081         if (!pte_same(ptep_get(src_pte), orig << 
1082             !pte_same(ptep_get(dst_pte), orig << 
1083                 double_pt_unlock(dst_ptl, src << 
1084                 return -EAGAIN;               << 
1085         }                                     << 
1086                                               << 
1087         orig_src_pte = ptep_get_and_clear(mm, << 
1088         set_pte_at(mm, dst_addr, dst_pte, ori << 
1089         double_pt_unlock(dst_ptl, src_ptl);   << 
1090                                               << 
1091         return 0;                             << 
1092 }                                             << 
1093                                               << 
1094 static int move_zeropage_pte(struct mm_struct << 
1095                              struct vm_area_s << 
1096                              struct vm_area_s << 
1097                              unsigned long ds << 
1098                              pte_t *dst_pte,  << 
1099                              pte_t orig_dst_p << 
1100                              spinlock_t *dst_ << 
1101 {                                             << 
1102         pte_t zero_pte;                       << 
1103                                               << 
1104         double_pt_lock(dst_ptl, src_ptl);     << 
1105         if (!pte_same(ptep_get(src_pte), orig << 
1106             !pte_same(ptep_get(dst_pte), orig << 
1107                 double_pt_unlock(dst_ptl, src << 
1108                 return -EAGAIN;               << 
1109         }                                     << 
1110                                               << 
1111         zero_pte = pte_mkspecial(pfn_pte(my_z << 
1112                                          dst_ << 
1113         ptep_clear_flush(src_vma, src_addr, s << 
1114         set_pte_at(mm, dst_addr, dst_pte, zer << 
1115         double_pt_unlock(dst_ptl, src_ptl);   << 
1116                                               << 
1117         return 0;                             << 
1118 }                                             << 
1119                                               << 
1120                                               << 
1121 /*                                            << 
1122  * The mmap_lock for reading is held by the c << 
1123  * from src_pmd to dst_pmd if possible, and r << 
1124  * in moving the page.                        << 
1125  */                                           << 
1126 static int move_pages_pte(struct mm_struct *m << 
1127                           struct vm_area_stru << 
1128                           struct vm_area_stru << 
1129                           unsigned long dst_a << 
1130                           __u64 mode)         << 
1131 {                                             << 
1132         swp_entry_t entry;                    << 
1133         pte_t orig_src_pte, orig_dst_pte;     << 
1134         pte_t src_folio_pte;                  << 
1135         spinlock_t *src_ptl, *dst_ptl;        << 
1136         pte_t *src_pte = NULL;                << 
1137         pte_t *dst_pte = NULL;                << 
1138                                               << 
1139         struct folio *src_folio = NULL;       << 
1140         struct anon_vma *src_anon_vma = NULL; << 
1141         struct mmu_notifier_range range;      << 
1142         int err = 0;                          << 
1143                                               << 
1144         flush_cache_range(src_vma, src_addr,  << 
1145         mmu_notifier_range_init(&range, MMU_N << 
1146                                 src_addr, src << 
1147         mmu_notifier_invalidate_range_start(& << 
1148 retry:                                        << 
1149         dst_pte = pte_offset_map_nolock(mm, d << 
1150                                               << 
1151         /* Retry if a huge pmd materialized f << 
1152         if (unlikely(!dst_pte)) {             << 
1153                 err = -EAGAIN;                << 
1154                 goto out;                     << 
1155         }                                     << 
1156                                               << 
1157         src_pte = pte_offset_map_nolock(mm, s << 
1158                                               << 
1159         /*                                    << 
1160          * We held the mmap_lock for reading  << 
1161          * can zap transparent huge pages und << 
1162          * transparent huge page fault can es << 
1163          * transparent huge pages under us.   << 
1164          */                                   << 
1165         if (unlikely(!src_pte)) {             << 
1166                 err = -EAGAIN;                << 
1167                 goto out;                     << 
1168         }                                     << 
1169                                               << 
1170         /* Sanity checks before the operation << 
1171         if (WARN_ON_ONCE(pmd_none(*dst_pmd))  << 
1172             WARN_ON_ONCE(pmd_trans_huge(*dst_ << 
1173                 err = -EINVAL;                << 
1174                 goto out;                     << 
1175         }                                     << 
1176                                               << 
1177         spin_lock(dst_ptl);                   << 
1178         orig_dst_pte = ptep_get(dst_pte);     << 
1179         spin_unlock(dst_ptl);                 << 
1180         if (!pte_none(orig_dst_pte)) {        << 
1181                 err = -EEXIST;                << 
1182                 goto out;                     << 
1183         }                                     << 
1184                                               << 
1185         spin_lock(src_ptl);                   << 
1186         orig_src_pte = ptep_get(src_pte);     << 
1187         spin_unlock(src_ptl);                 << 
1188         if (pte_none(orig_src_pte)) {         << 
1189                 if (!(mode & UFFDIO_MOVE_MODE << 
1190                         err = -ENOENT;        << 
1191                 else /* nothing to do to move << 
1192                         err = 0;              << 
1193                 goto out;                     << 
1194         }                                     << 
1195                                               << 
1196         /* If PTE changed after we locked the << 
1197         if (src_folio && unlikely(!pte_same(s << 
1198                 err = -EAGAIN;                << 
1199                 goto out;                     << 
1200         }                                     << 
1201                                               << 
1202         if (pte_present(orig_src_pte)) {      << 
1203                 if (is_zero_pfn(pte_pfn(orig_ << 
1204                         err = move_zeropage_p << 
1205                                               << 
1206                                               << 
1207                                               << 
1208                         goto out;             << 
1209                 }                             << 
1210                                               << 
1211                 /*                            << 
1212                  * Pin and lock both source f << 
1213                  * RCU read section, we can't << 
1214                  * unmap the ptes, obtain the << 
1215                  */                           << 
1216                 if (!src_folio) {             << 
1217                         struct folio *folio;  << 
1218                                               << 
1219                         /*                    << 
1220                          * Pin the page while << 
1221                          * page isn't freed u << 
1222                          */                   << 
1223                         spin_lock(src_ptl);   << 
1224                         if (!pte_same(orig_sr << 
1225                                 spin_unlock(s << 
1226                                 err = -EAGAIN << 
1227                                 goto out;     << 
1228                         }                     << 
1229                                               << 
1230                         folio = vm_normal_fol << 
1231                         if (!folio || !PageAn << 
1232                                 spin_unlock(s << 
1233                                 err = -EBUSY; << 
1234                                 goto out;     << 
1235                         }                     << 
1236                                               << 
1237                         folio_get(folio);     << 
1238                         src_folio = folio;    << 
1239                         src_folio_pte = orig_ << 
1240                         spin_unlock(src_ptl); << 
1241                                               << 
1242                         if (!folio_trylock(sr << 
1243                                 pte_unmap(&or << 
1244                                 pte_unmap(&or << 
1245                                 src_pte = dst << 
1246                                 /* now we can << 
1247                                 folio_lock(sr << 
1248                                 goto retry;   << 
1249                         }                     << 
1250                                               << 
1251                         if (WARN_ON_ONCE(!fol << 
1252                                 err = -EBUSY; << 
1253                                 goto out;     << 
1254                         }                     << 
1255                 }                             << 
1256                                               << 
1257                 /* at this point we have src_ << 
1258                 if (folio_test_large(src_foli << 
1259                         /* split_folio() can  << 
1260                         pte_unmap(&orig_src_p << 
1261                         pte_unmap(&orig_dst_p << 
1262                         src_pte = dst_pte = N << 
1263                         err = split_folio(src << 
1264                         if (err)              << 
1265                                 goto out;     << 
1266                         /* have to reacquire  << 
1267                         folio_unlock(src_foli << 
1268                         folio_put(src_folio); << 
1269                         src_folio = NULL;     << 
1270                         goto retry;           << 
1271                 }                             << 
1272                                               << 
1273                 if (!src_anon_vma) {          << 
1274                         /*                    << 
1275                          * folio_referenced w << 
1276                          * without the folio  << 
1277                          * the anon_vma lock, << 
1278                          */                   << 
1279                         src_anon_vma = folio_ << 
1280                         if (!src_anon_vma) {  << 
1281                                 /* page was u << 
1282                                 err = -EAGAIN << 
1283                                 goto out;     << 
1284                         }                     << 
1285                         if (!anon_vma_trylock << 
1286                                 pte_unmap(&or << 
1287                                 pte_unmap(&or << 
1288                                 src_pte = dst << 
1289                                 /* now we can << 
1290                                 anon_vma_lock << 
1291                                 goto retry;   << 
1292                         }                     << 
1293                 }                             << 
1294                                               << 
1295                 err = move_present_pte(mm,  d << 
1296                                        dst_ad << 
1297                                        orig_d << 
1298                                        dst_pt << 
1299         } else {                              << 
1300                 entry = pte_to_swp_entry(orig << 
1301                 if (non_swap_entry(entry)) {  << 
1302                         if (is_migration_entr << 
1303                                 pte_unmap(&or << 
1304                                 pte_unmap(&or << 
1305                                 src_pte = dst << 
1306                                 migration_ent << 
1307                                 err = -EAGAIN << 
1308                         } else                << 
1309                                 err = -EFAULT << 
1310                         goto out;             << 
1311                 }                             << 
1312                                               << 
1313                 err = move_swap_pte(mm, dst_a << 
1314                                     dst_pte,  << 
1315                                     orig_dst_ << 
1316                                     dst_ptl,  << 
1317         }                                     << 
1318                                               << 
1319 out:                                          << 
1320         if (src_anon_vma) {                   << 
1321                 anon_vma_unlock_write(src_ano << 
1322                 put_anon_vma(src_anon_vma);   << 
1323         }                                     << 
1324         if (src_folio) {                      << 
1325                 folio_unlock(src_folio);      << 
1326                 folio_put(src_folio);         << 
1327         }                                     << 
1328         if (dst_pte)                          << 
1329                 pte_unmap(dst_pte);           << 
1330         if (src_pte)                          << 
1331                 pte_unmap(src_pte);           << 
1332         mmu_notifier_invalidate_range_end(&ra << 
1333                                               << 
1334         return err;                           << 
1335 }                                             << 
1336                                               << 
1337 #ifdef CONFIG_TRANSPARENT_HUGEPAGE            << 
1338 static inline bool move_splits_huge_pmd(unsig << 
1339                                         unsig << 
1340                                         unsig << 
1341 {                                             << 
1342         return (src_addr & ~HPAGE_PMD_MASK) | << 
1343                 src_end - src_addr < HPAGE_PM << 
1344 }                                             << 
1345 #else                                         << 
1346 static inline bool move_splits_huge_pmd(unsig << 
1347                                         unsig << 
1348                                         unsig << 
1349 {                                             << 
1350         /* This is unreachable anyway, just t << 
1351         return false;                         << 
1352 }                                             << 
1353 #endif                                        << 
1354                                               << 
1355 static inline bool vma_move_compatible(struct << 
1356 {                                             << 
1357         return !(vma->vm_flags & (VM_PFNMAP | << 
1358                                   VM_MIXEDMAP << 
1359 }                                             << 
1360                                               << 
1361 static int validate_move_areas(struct userfau << 
1362                                struct vm_area << 
1363                                struct vm_area << 
1364 {                                             << 
1365         /* Only allow moving if both have the << 
1366         if ((src_vma->vm_flags & VM_ACCESS_FL << 
1367             pgprot_val(src_vma->vm_page_prot) << 
1368                 return -EINVAL;               << 
1369                                               << 
1370         /* Only allow moving if both are mloc << 
1371         if ((src_vma->vm_flags & VM_LOCKED) ! << 
1372                 return -EINVAL;               << 
1373                                               << 
1374         /*                                    << 
1375          * For now, we keep it simple and onl << 
1376          * Access flags are equal, therefore  << 
1377          */                                   << 
1378         if (!(src_vma->vm_flags & VM_WRITE))  << 
1379                 return -EINVAL;               << 
1380                                               << 
1381         /* Check if vma flags indicate conten << 
1382         if (!vma_move_compatible(src_vma) ||  << 
1383                 return -EINVAL;               << 
1384                                               << 
1385         /* Ensure dst_vma is registered in uf << 
1386         if (!dst_vma->vm_userfaultfd_ctx.ctx  << 
1387             dst_vma->vm_userfaultfd_ctx.ctx ! << 
1388                 return -EINVAL;               << 
1389                                               << 
1390         /* Only allow moving across anonymous << 
1391         if (!vma_is_anonymous(src_vma) || !vm << 
1392                 return -EINVAL;               << 
1393                                               << 
1394         return 0;                             << 
1395 }                                             << 
1396                                               << 
1397 static __always_inline                        << 
1398 int find_vmas_mm_locked(struct mm_struct *mm, << 
1399                         unsigned long dst_sta << 
1400                         unsigned long src_sta << 
1401                         struct vm_area_struct << 
1402                         struct vm_area_struct << 
1403 {                                             << 
1404         struct vm_area_struct *vma;           << 
1405                                               << 
1406         mmap_assert_locked(mm);               << 
1407         vma = find_vma_and_prepare_anon(mm, d << 
1408         if (IS_ERR(vma))                      << 
1409                 return PTR_ERR(vma);          << 
1410                                               << 
1411         *dst_vmap = vma;                      << 
1412         /* Skip finding src_vma if src_start  << 
1413         if (src_start >= vma->vm_start && src << 
1414                 goto out_success;             << 
1415                                               << 
1416         vma = vma_lookup(mm, src_start);      << 
1417         if (!vma)                             << 
1418                 return -ENOENT;               << 
1419 out_success:                                  << 
1420         *src_vmap = vma;                      << 
1421         return 0;                             << 
1422 }                                             << 
1423                                               << 
1424 #ifdef CONFIG_PER_VMA_LOCK                    << 
1425 static int uffd_move_lock(struct mm_struct *m << 
1426                           unsigned long dst_s << 
1427                           unsigned long src_s << 
1428                           struct vm_area_stru << 
1429                           struct vm_area_stru << 
1430 {                                             << 
1431         struct vm_area_struct *vma;           << 
1432         int err;                              << 
1433                                               << 
1434         vma = uffd_lock_vma(mm, dst_start);   << 
1435         if (IS_ERR(vma))                      << 
1436                 return PTR_ERR(vma);          << 
1437                                               << 
1438         *dst_vmap = vma;                      << 
1439         /*                                       683         /*
1440          * Skip finding src_vma if src_start  !! 684          * Make sure the vma is not shared, that the dst range is
1441          * that we don't lock the same vma tw !! 685          * both valid and fully within a single existing vma.
1442          */                                   << 
1443         if (src_start >= vma->vm_start && src << 
1444                 *src_vmap = vma;              << 
1445                 return 0;                     << 
1446         }                                     << 
1447                                               << 
1448         /*                                    << 
1449          * Using uffd_lock_vma() to get src_v << 
1450          *                                    << 
1451          * Thread1                            << 
1452          * -------                            << 
1453          * vma_start_read(dst_vma)            << 
1454          *                                    << 
1455          *                                    << 
1456          * vma_start_read(src_vma)            << 
1457          * mmap_read_lock(mm)                 << 
1458          *                                    << 
1459          */                                   << 
1460         *src_vmap = lock_vma_under_rcu(mm, sr << 
1461         if (likely(*src_vmap))                << 
1462                 return 0;                     << 
1463                                               << 
1464         /* Undo any locking and retry in mmap << 
1465         vma_end_read(*dst_vmap);              << 
1466                                               << 
1467         mmap_read_lock(mm);                   << 
1468         err = find_vmas_mm_locked(mm, dst_sta << 
1469         if (!err) {                           << 
1470                 /*                            << 
1471                  * See comment in uffd_lock_v << 
1472                  * vma_start_read() here.     << 
1473                  */                           << 
1474                 down_read(&(*dst_vmap)->vm_lo << 
1475                 if (*dst_vmap != *src_vmap)   << 
1476                         down_read_nested(&(*s << 
1477                                          SING << 
1478         }                                     << 
1479         mmap_read_unlock(mm);                 << 
1480         return err;                           << 
1481 }                                             << 
1482                                               << 
1483 static void uffd_move_unlock(struct vm_area_s << 
1484                              struct vm_area_s << 
1485 {                                             << 
1486         vma_end_read(src_vma);                << 
1487         if (src_vma != dst_vma)               << 
1488                 vma_end_read(dst_vma);        << 
1489 }                                             << 
1490                                               << 
1491 #else                                         << 
1492                                               << 
1493 static int uffd_move_lock(struct mm_struct *m << 
1494                           unsigned long dst_s << 
1495                           unsigned long src_s << 
1496                           struct vm_area_stru << 
1497                           struct vm_area_stru << 
1498 {                                             << 
1499         int err;                              << 
1500                                               << 
1501         mmap_read_lock(mm);                   << 
1502         err = find_vmas_mm_locked(mm, dst_sta << 
1503         if (err)                              << 
1504                 mmap_read_unlock(mm);         << 
1505         return err;                           << 
1506 }                                             << 
1507                                               << 
1508 static void uffd_move_unlock(struct vm_area_s << 
1509                              struct vm_area_s << 
1510 {                                             << 
1511         mmap_assert_locked(src_vma->vm_mm);   << 
1512         mmap_read_unlock(dst_vma->vm_mm);     << 
1513 }                                             << 
1514 #endif                                        << 
1515                                               << 
1516 /**                                           << 
1517  * move_pages - move arbitrary anonymous page << 
1518  * @ctx: pointer to the userfaultfd context   << 
1519  * @dst_start: start of the destination virtu << 
1520  * @src_start: start of the source virtual me << 
1521  * @len: length of the virtual memory range   << 
1522  * @mode: flags from uffdio_move.mode         << 
1523  *                                            << 
1524  * It will either use the mmap_lock in read m << 
1525  *                                            << 
1526  * move_pages() remaps arbitrary anonymous pa << 
1527  * copy. It only works on non shared anonymou << 
1528  * be relocated without generating non linear << 
1529  * code.                                      << 
1530  *                                            << 
1531  * It provides a zero copy mechanism to handl << 
1532  * The source vma pages should have mapcount  << 
1533  * enforced by using madvise(MADV_DONTFORK) o << 
1534  *                                            << 
1535  * The thread receiving the page during the u << 
1536  * will receive the faulting page in the sour << 
1537  * storage or any other I/O device (MADV_DONT << 
1538  * avoids move_pages() to fail with -EBUSY if << 
1539  * move_pages() is called), then it will call << 
1540  * page in the faulting address in the destin << 
1541  *                                            << 
1542  * This userfaultfd command works purely via  << 
1543  * most efficient way to move physical non sh << 
1544  * across different virtual addresses. Unlike << 
1545  * it does not create any new vmas. The mappi << 
1546  * address is atomic.                         << 
1547  *                                            << 
1548  * It only works if the vma protection bits a << 
1549  * source and destination vma.                << 
1550  *                                            << 
1551  * It can remap non shared anonymous pages wi << 
1552  *                                            << 
1553  * If the source virtual memory range has any << 
1554  * the destination virtual memory range is no << 
1555  * move_pages() will fail respectively with - << 
1556  * provides a very strict behavior to avoid a << 
1557  * corruption going unnoticed if there are us << 
1558  * Only one thread should resolve the userlan << 
1559  * time for any given faulting address. This  << 
1560  * try to both call move_pages() on the same  << 
1561  * same time, the second thread will get an e << 
1562  * command.                                   << 
1563  *                                            << 
1564  * The command retval will return "len" is su << 
1565  * however can be interrupted by fatal signal << 
1566  * interrupted it will return the number of b << 
1567  * remapped before the interruption if any, o << 
1568  * none. It will never return zero. Either it << 
1569  * an amount of bytes successfully moved. If  << 
1570  * "short" remap, the move_pages() command sh << 
1571  * userland with src+retval, dst+reval, len-r << 
1572  * about the error that interrupted it.       << 
1573  *                                            << 
1574  * The UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES flag  << 
1575  * prevent -ENOENT errors to materialize if t << 
1576  * source virtual range that is being remappe << 
1577  * accounted as successfully remapped in the  << 
1578  * command. This is mostly useful to remap hu << 
1579  * virtual regions without knowing if there a << 
1580  * in the regions or not, but preventing the  << 
1581  * the hugepmd during the remap.              << 
1582  *                                            << 
1583  * If there's any rmap walk that is taking th << 
1584  * first obtaining the folio lock (the only c << 
1585  * folio_referenced), they will have to verif << 
1586  * has changed after taking the anon_vma lock << 
1587  * should release the lock and retry obtainin << 
1588  * it means the anon_vma was changed by move_ << 
1589  * could be obtained. This is the only additi << 
1590  * the rmap code to provide this anonymous pa << 
1591  */                                           << 
1592 ssize_t move_pages(struct userfaultfd_ctx *ct << 
1593                    unsigned long src_start, u << 
1594 {                                             << 
1595         struct mm_struct *mm = ctx->mm;       << 
1596         struct vm_area_struct *src_vma, *dst_ << 
1597         unsigned long src_addr, dst_addr;     << 
1598         pmd_t *src_pmd, *dst_pmd;             << 
1599         long err = -EINVAL;                   << 
1600         ssize_t moved = 0;                    << 
1601                                               << 
1602         /* Sanitize the command parameters. * << 
1603         if (WARN_ON_ONCE(src_start & ~PAGE_MA << 
1604             WARN_ON_ONCE(dst_start & ~PAGE_MA << 
1605             WARN_ON_ONCE(len & ~PAGE_MASK))   << 
1606                 goto out;                     << 
1607                                               << 
1608         /* Does the address range wrap, or is << 
1609         if (WARN_ON_ONCE(src_start + len <= s << 
1610             WARN_ON_ONCE(dst_start + len <= d << 
1611                 goto out;                     << 
1612                                               << 
1613         err = uffd_move_lock(mm, dst_start, s << 
1614         if (err)                              << 
1615                 goto out;                     << 
1616                                               << 
1617         /* Re-check after taking map_changing << 
1618         err = -EAGAIN;                        << 
1619         down_read(&ctx->map_changing_lock);   << 
1620         if (likely(atomic_read(&ctx->mmap_cha << 
1621                 goto out_unlock;              << 
1622         /*                                    << 
1623          * Make sure the vma is not shared, t << 
1624          * ranges are both valid and fully wi << 
1625          * vma.                               << 
1626          */                                      686          */
1627         err = -EINVAL;                        !! 687         if (!dst_vma || (dst_vma->vm_flags & VM_SHARED))
1628         if (src_vma->vm_flags & VM_SHARED)    << 
1629                 goto out_unlock;              << 
1630         if (src_start + len > src_vma->vm_end << 
1631                 goto out_unlock;                 688                 goto out_unlock;
1632                                               !! 689         if (!userfaultfd_wp(dst_vma))
1633         if (dst_vma->vm_flags & VM_SHARED)    << 
1634                 goto out_unlock;                 690                 goto out_unlock;
1635         if (dst_start + len > dst_vma->vm_end !! 691         if (!vma_is_anonymous(dst_vma))
1636                 goto out_unlock;              << 
1637                                               << 
1638         err = validate_move_areas(ctx, src_vm << 
1639         if (err)                              << 
1640                 goto out_unlock;                 692                 goto out_unlock;
1641                                                  693 
1642         for (src_addr = src_start, dst_addr = !! 694         if (enable_wp)
1643              src_addr < src_start + len;) {   !! 695                 newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
1644                 spinlock_t *ptl;              !! 696         else
1645                 pmd_t dst_pmdval;             !! 697                 newprot = vm_get_page_prot(dst_vma->vm_flags);
1646                 unsigned long step_size;      << 
1647                                               << 
1648                 /*                            << 
1649                  * Below works because anonym << 
1650                  * transparent huge PUD. If f << 
1651                  * that case would need to be << 
1652                  */                           << 
1653                 src_pmd = mm_find_pmd(mm, src << 
1654                 if (unlikely(!src_pmd)) {     << 
1655                         if (!(mode & UFFDIO_M << 
1656                                 err = -ENOENT << 
1657                                 break;        << 
1658                         }                     << 
1659                         src_pmd = mm_alloc_pm << 
1660                         if (unlikely(!src_pmd << 
1661                                 err = -ENOMEM << 
1662                                 break;        << 
1663                         }                     << 
1664                 }                             << 
1665                 dst_pmd = mm_alloc_pmd(mm, ds << 
1666                 if (unlikely(!dst_pmd)) {     << 
1667                         err = -ENOMEM;        << 
1668                         break;                << 
1669                 }                             << 
1670                                               << 
1671                 dst_pmdval = pmdp_get_lockles << 
1672                 /*                            << 
1673                  * If the dst_pmd is mapped a << 
1674                  * be strict. If dst_pmd chan << 
1675                  * move_pages_huge_pmd() will << 
1676                  * while move_pages_pte() wil << 
1677                  */                           << 
1678                 if (unlikely(pmd_trans_huge(d << 
1679                         err = -EEXIST;        << 
1680                         break;                << 
1681                 }                             << 
1682                                               << 
1683                 ptl = pmd_trans_huge_lock(src << 
1684                 if (ptl) {                    << 
1685                         if (pmd_devmap(*src_p << 
1686                                 spin_unlock(p << 
1687                                 err = -ENOENT << 
1688                                 break;        << 
1689                         }                     << 
1690                                               << 
1691                         /* Check if we can mo << 
1692                         if (move_splits_huge_ << 
1693                             !pmd_none(dst_pmd << 
1694                                 struct folio  << 
1695                                               << 
1696                                 if (!folio || << 
1697                                               << 
1698                                         spin_ << 
1699                                         err = << 
1700                                         break << 
1701                                 }             << 
1702                                               << 
1703                                 spin_unlock(p << 
1704                                 split_huge_pm << 
1705                                 /* The folio  << 
1706                                 continue;     << 
1707                         }                     << 
1708                                               << 
1709                         err = move_pages_huge << 
1710                                               << 
1711                                               << 
1712                         step_size = HPAGE_PMD << 
1713                 } else {                      << 
1714                         if (pmd_none(*src_pmd << 
1715                                 if (!(mode &  << 
1716                                         err = << 
1717                                         break << 
1718                                 }             << 
1719                                 if (unlikely( << 
1720                                         err = << 
1721                                         break << 
1722                                 }             << 
1723                         }                     << 
1724                                               << 
1725                         if (unlikely(pte_allo << 
1726                                 err = -ENOMEM << 
1727                                 break;        << 
1728                         }                     << 
1729                                               << 
1730                         err = move_pages_pte( << 
1731                                               << 
1732                                               << 
1733                         step_size = PAGE_SIZE << 
1734                 }                             << 
1735                                               << 
1736                 cond_resched();               << 
1737                                               << 
1738                 if (fatal_signal_pending(curr << 
1739                         /* Do not override an << 
1740                         if (!err || err == -E << 
1741                                 err = -EINTR; << 
1742                         break;                << 
1743                 }                             << 
1744                                               << 
1745                 if (err) {                    << 
1746                         if (err == -EAGAIN)   << 
1747                                 continue;     << 
1748                         break;                << 
1749                 }                             << 
1750                                                  698 
1751                 /* Proceed to the next page * !! 699         change_protection(dst_vma, start, start + len, newprot,
1752                 dst_addr += step_size;        !! 700                           enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
1753                 src_addr += step_size;        << 
1754                 moved += step_size;           << 
1755         }                                     << 
1756                                                  701 
                                                   >> 702         err = 0;
1757 out_unlock:                                      703 out_unlock:
1758         up_read(&ctx->map_changing_lock);     !! 704         mmap_read_unlock(dst_mm);
1759         uffd_move_unlock(dst_vma, src_vma);   !! 705         return err;
1760 out:                                          << 
1761         VM_WARN_ON(moved < 0);                << 
1762         VM_WARN_ON(err > 0);                  << 
1763         VM_WARN_ON(!moved && !err);           << 
1764         return moved ? moved : err;           << 
1765 }                                             << 
1766                                               << 
1767 static void userfaultfd_set_vm_flags(struct v << 
1768                                      vm_flags << 
1769 {                                             << 
1770         const bool uffd_wp_changed = (vma->vm << 
1771                                               << 
1772         vm_flags_reset(vma, flags);           << 
1773         /*                                    << 
1774          * For shared mappings, we want to en << 
1775          * userfaultfd-wp is enabled (see vma << 
1776          * recalculate vma->vm_page_prot when << 
1777          */                                   << 
1778         if ((vma->vm_flags & VM_SHARED) && uf << 
1779                 vma_set_page_prot(vma);       << 
1780 }                                             << 
1781                                               << 
1782 static void userfaultfd_set_ctx(struct vm_are << 
1783                                 struct userfa << 
1784                                 unsigned long << 
1785 {                                             << 
1786         vma_start_write(vma);                 << 
1787         vma->vm_userfaultfd_ctx = (struct vm_ << 
1788         userfaultfd_set_vm_flags(vma,         << 
1789                                  (vma->vm_fla << 
1790 }                                             << 
1791                                               << 
1792 void userfaultfd_reset_ctx(struct vm_area_str << 
1793 {                                             << 
1794         userfaultfd_set_ctx(vma, NULL, 0);    << 
1795 }                                             << 
1796                                               << 
1797 struct vm_area_struct *userfaultfd_clear_vma( << 
1798                                               << 
1799                                               << 
1800                                               << 
1801                                               << 
1802 {                                             << 
1803         struct vm_area_struct *ret;           << 
1804                                               << 
1805         /* Reset ptes for the whole vma range << 
1806         if (userfaultfd_wp(vma))              << 
1807                 uffd_wp_range(vma, start, end << 
1808                                               << 
1809         ret = vma_modify_flags_uffd(vmi, prev << 
1810                                     vma->vm_f << 
1811                                     NULL_VM_U << 
1812                                               << 
1813         /*                                    << 
1814          * In the vma_merge() successful mpro << 
1815          * the next vma was merged into the c << 
1816          * the current one has not been updat << 
1817          */                                   << 
1818         if (!IS_ERR(ret))                     << 
1819                 userfaultfd_reset_ctx(ret);   << 
1820                                               << 
1821         return ret;                           << 
1822 }                                             << 
1823                                               << 
1824 /* Assumes mmap write lock taken, and mm_stru << 
1825 int userfaultfd_register_range(struct userfau << 
1826                                struct vm_area << 
1827                                unsigned long  << 
1828                                unsigned long  << 
1829                                bool wp_async) << 
1830 {                                             << 
1831         VMA_ITERATOR(vmi, ctx->mm, start);    << 
1832         struct vm_area_struct *prev = vma_pre << 
1833         unsigned long vma_end;                << 
1834         unsigned long new_flags;              << 
1835                                               << 
1836         if (vma->vm_start < start)            << 
1837                 prev = vma;                   << 
1838                                               << 
1839         for_each_vma_range(vmi, vma, end) {   << 
1840                 cond_resched();               << 
1841                                               << 
1842                 BUG_ON(!vma_can_userfault(vma << 
1843                 BUG_ON(vma->vm_userfaultfd_ct << 
1844                        vma->vm_userfaultfd_ct << 
1845                 WARN_ON(!(vma->vm_flags & VM_ << 
1846                                               << 
1847                 /*                            << 
1848                  * Nothing to do: this vma is << 
1849                  * userfaultfd and with the r << 
1850                  */                           << 
1851                 if (vma->vm_userfaultfd_ctx.c << 
1852                     (vma->vm_flags & vm_flags << 
1853                         goto skip;            << 
1854                                               << 
1855                 if (vma->vm_start > start)    << 
1856                         start = vma->vm_start << 
1857                 vma_end = min(end, vma->vm_en << 
1858                                               << 
1859                 new_flags = (vma->vm_flags &  << 
1860                 vma = vma_modify_flags_uffd(& << 
1861                                             n << 
1862                                             ( << 
1863                 if (IS_ERR(vma))              << 
1864                         return PTR_ERR(vma);  << 
1865                                               << 
1866                 /*                            << 
1867                  * In the vma_merge() success << 
1868                  * the next vma was merged in << 
1869                  * the current one has not be << 
1870                  */                           << 
1871                 userfaultfd_set_ctx(vma, ctx, << 
1872                                               << 
1873                 if (is_vm_hugetlb_page(vma) & << 
1874                         hugetlb_unshare_all_p << 
1875                                               << 
1876 skip:                                         << 
1877                 prev = vma;                   << 
1878                 start = vma->vm_end;          << 
1879         }                                     << 
1880                                               << 
1881         return 0;                             << 
1882 }                                             << 
1883                                               << 
1884 void userfaultfd_release_new(struct userfault << 
1885 {                                             << 
1886         struct mm_struct *mm = ctx->mm;       << 
1887         struct vm_area_struct *vma;           << 
1888         VMA_ITERATOR(vmi, mm, 0);             << 
1889                                               << 
1890         /* the various vma->vm_userfaultfd_ct << 
1891         mmap_write_lock(mm);                  << 
1892         for_each_vma(vmi, vma) {              << 
1893                 if (vma->vm_userfaultfd_ctx.c << 
1894                         userfaultfd_reset_ctx << 
1895         }                                     << 
1896         mmap_write_unlock(mm);                << 
1897 }                                             << 
1898                                               << 
1899 void userfaultfd_release_all(struct mm_struct << 
1900                              struct userfault << 
1901 {                                             << 
1902         struct vm_area_struct *vma, *prev;    << 
1903         VMA_ITERATOR(vmi, mm, 0);             << 
1904                                               << 
1905         if (!mmget_not_zero(mm))              << 
1906                 return;                       << 
1907                                               << 
1908         /*                                    << 
1909          * Flush page faults out of all CPUs. << 
1910          * must be retried without returning  << 
1911          * userfaultfd_ctx_get() succeeds but << 
1912          * changes while handle_userfault rel << 
1913          * it's critical that released is set << 
1914          * taking the mmap_lock for writing.  << 
1915          */                                   << 
1916         mmap_write_lock(mm);                  << 
1917         prev = NULL;                          << 
1918         for_each_vma(vmi, vma) {              << 
1919                 cond_resched();               << 
1920                 BUG_ON(!!vma->vm_userfaultfd_ << 
1921                        !!(vma->vm_flags & __V << 
1922                 if (vma->vm_userfaultfd_ctx.c << 
1923                         prev = vma;           << 
1924                         continue;             << 
1925                 }                             << 
1926                                               << 
1927                 vma = userfaultfd_clear_vma(& << 
1928                                             v << 
1929                 prev = vma;                   << 
1930         }                                     << 
1931         mmap_write_unlock(mm);                << 
1932         mmput(mm);                            << 
1933 }                                                706 }
1934                                                  707 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php