~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/mm/userfaultfd.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /mm/userfaultfd.c (Version linux-6.12-rc7) and /mm/userfaultfd.c (Version linux-5.17.15)


  1 // SPDX-License-Identifier: GPL-2.0-only            1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*                                                  2 /*
  3  *  mm/userfaultfd.c                                3  *  mm/userfaultfd.c
  4  *                                                  4  *
  5  *  Copyright (C) 2015  Red Hat, Inc.               5  *  Copyright (C) 2015  Red Hat, Inc.
  6  */                                                 6  */
  7                                                     7 
  8 #include <linux/mm.h>                               8 #include <linux/mm.h>
  9 #include <linux/sched/signal.h>                     9 #include <linux/sched/signal.h>
 10 #include <linux/pagemap.h>                         10 #include <linux/pagemap.h>
 11 #include <linux/rmap.h>                            11 #include <linux/rmap.h>
 12 #include <linux/swap.h>                            12 #include <linux/swap.h>
 13 #include <linux/swapops.h>                         13 #include <linux/swapops.h>
 14 #include <linux/userfaultfd_k.h>                   14 #include <linux/userfaultfd_k.h>
 15 #include <linux/mmu_notifier.h>                    15 #include <linux/mmu_notifier.h>
 16 #include <linux/hugetlb.h>                         16 #include <linux/hugetlb.h>
 17 #include <linux/shmem_fs.h>                        17 #include <linux/shmem_fs.h>
 18 #include <asm/tlbflush.h>                          18 #include <asm/tlbflush.h>
 19 #include <asm/tlb.h>                           << 
 20 #include "internal.h"                              19 #include "internal.h"
 21                                                    20 
 22 static __always_inline                             21 static __always_inline
 23 bool validate_dst_vma(struct vm_area_struct *d !!  22 struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm,
                                                   >>  23                                     unsigned long dst_start,
                                                   >>  24                                     unsigned long len)
 24 {                                                  25 {
 25         /* Make sure that the dst range is ful !!  26         /*
 26         if (dst_end > dst_vma->vm_end)         !!  27          * Make sure that the dst range is both valid and fully within a
 27                 return false;                  !!  28          * single existing vma.
                                                   >>  29          */
                                                   >>  30         struct vm_area_struct *dst_vma;
                                                   >>  31 
                                                   >>  32         dst_vma = find_vma(dst_mm, dst_start);
                                                   >>  33         if (!dst_vma)
                                                   >>  34                 return NULL;
                                                   >>  35 
                                                   >>  36         if (dst_start < dst_vma->vm_start ||
                                                   >>  37             dst_start + len > dst_vma->vm_end)
                                                   >>  38                 return NULL;
 28                                                    39 
 29         /*                                         40         /*
 30          * Check the vma is registered in uffd     41          * Check the vma is registered in uffd, this is required to
 31          * enforce the VM_MAYWRITE check done      42          * enforce the VM_MAYWRITE check done at uffd registration
 32          * time.                                   43          * time.
 33          */                                        44          */
 34         if (!dst_vma->vm_userfaultfd_ctx.ctx)      45         if (!dst_vma->vm_userfaultfd_ctx.ctx)
 35                 return false;                  !!  46                 return NULL;
 36                                                << 
 37         return true;                           << 
 38 }                                              << 
 39                                                << 
 40 static __always_inline                         << 
 41 struct vm_area_struct *find_vma_and_prepare_an << 
 42                                                << 
 43 {                                              << 
 44         struct vm_area_struct *vma;            << 
 45                                                << 
 46         mmap_assert_locked(mm);                << 
 47         vma = vma_lookup(mm, addr);            << 
 48         if (!vma)                              << 
 49                 vma = ERR_PTR(-ENOENT);        << 
 50         else if (!(vma->vm_flags & VM_SHARED)  << 
 51                  unlikely(anon_vma_prepare(vma << 
 52                 vma = ERR_PTR(-ENOMEM);        << 
 53                                                << 
 54         return vma;                            << 
 55 }                                              << 
 56                                                << 
 57 #ifdef CONFIG_PER_VMA_LOCK                     << 
 58 /*                                             << 
 59  * uffd_lock_vma() - Lookup and lock vma corre << 
 60  * @mm: mm to search vma in.                   << 
 61  * @address: address that the vma should conta << 
 62  *                                             << 
 63  * Should be called without holding mmap_lock. << 
 64  *                                             << 
 65  * Return: A locked vma containing @address, - << 
 66  * -ENOMEM if anon_vma couldn't be allocated.  << 
 67  */                                            << 
 68 static struct vm_area_struct *uffd_lock_vma(st << 
 69                                        unsigne << 
 70 {                                              << 
 71         struct vm_area_struct *vma;            << 
 72                                                << 
 73         vma = lock_vma_under_rcu(mm, address); << 
 74         if (vma) {                             << 
 75                 /*                             << 
 76                  * We know we're going to need << 
 77                  * that early.                 << 
 78                  */                            << 
 79                 if (!(vma->vm_flags & VM_SHARE << 
 80                         vma_end_read(vma);     << 
 81                 else                           << 
 82                         return vma;            << 
 83         }                                      << 
 84                                                << 
 85         mmap_read_lock(mm);                    << 
 86         vma = find_vma_and_prepare_anon(mm, ad << 
 87         if (!IS_ERR(vma)) {                    << 
 88                 /*                             << 
 89                  * We cannot use vma_start_rea << 
 90                  * false locked (see comment i << 
 91                  * can avoid that by directly  << 
 92                  * mmap_lock, which guarantees << 
 93                  * vma for write (vma_start_wr << 
 94                  */                            << 
 95                 down_read(&vma->vm_lock->lock) << 
 96         }                                      << 
 97                                                << 
 98         mmap_read_unlock(mm);                  << 
 99         return vma;                            << 
100 }                                              << 
101                                                << 
102 static struct vm_area_struct *uffd_mfill_lock( << 
103                                                << 
104                                                << 
105 {                                              << 
106         struct vm_area_struct *dst_vma;        << 
107                                                << 
108         dst_vma = uffd_lock_vma(dst_mm, dst_st << 
109         if (IS_ERR(dst_vma) || validate_dst_vm << 
110                 return dst_vma;                << 
111                                                << 
112         vma_end_read(dst_vma);                 << 
113         return ERR_PTR(-ENOENT);               << 
114 }                                              << 
115                                                << 
116 static void uffd_mfill_unlock(struct vm_area_s << 
117 {                                              << 
118         vma_end_read(vma);                     << 
119 }                                              << 
120                                                << 
121 #else                                          << 
122                                                << 
123 static struct vm_area_struct *uffd_mfill_lock( << 
124                                                << 
125                                                << 
126 {                                              << 
127         struct vm_area_struct *dst_vma;        << 
128                                                << 
129         mmap_read_lock(dst_mm);                << 
130         dst_vma = find_vma_and_prepare_anon(ds << 
131         if (IS_ERR(dst_vma))                   << 
132                 goto out_unlock;               << 
133                                                << 
134         if (validate_dst_vma(dst_vma, dst_star << 
135                 return dst_vma;                << 
136                                                    47 
137         dst_vma = ERR_PTR(-ENOENT);            << 
138 out_unlock:                                    << 
139         mmap_read_unlock(dst_mm);              << 
140         return dst_vma;                            48         return dst_vma;
141 }                                                  49 }
142                                                    50 
143 static void uffd_mfill_unlock(struct vm_area_s << 
144 {                                              << 
145         mmap_read_unlock(vma->vm_mm);          << 
146 }                                              << 
147 #endif                                         << 
148                                                << 
149 /* Check if dst_addr is outside of file's size << 
150 static bool mfill_file_over_size(struct vm_are << 
151                                  unsigned long << 
152 {                                              << 
153         struct inode *inode;                   << 
154         pgoff_t offset, max_off;               << 
155                                                << 
156         if (!dst_vma->vm_file)                 << 
157                 return false;                  << 
158                                                << 
159         inode = dst_vma->vm_file->f_inode;     << 
160         offset = linear_page_index(dst_vma, ds << 
161         max_off = DIV_ROUND_UP(i_size_read(ino << 
162         return offset >= max_off;              << 
163 }                                              << 
164                                                << 
165 /*                                                 51 /*
166  * Install PTEs, to map dst_addr (within dst_v     52  * Install PTEs, to map dst_addr (within dst_vma) to page.
167  *                                                 53  *
168  * This function handles both MCOPY_ATOMIC_NOR     54  * This function handles both MCOPY_ATOMIC_NORMAL and _CONTINUE for both shmem
169  * and anon, and for both shared and private V     55  * and anon, and for both shared and private VMAs.
170  */                                                56  */
171 int mfill_atomic_install_pte(pmd_t *dst_pmd,   !!  57 int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
172                              struct vm_area_st     58                              struct vm_area_struct *dst_vma,
173                              unsigned long dst     59                              unsigned long dst_addr, struct page *page,
174                              bool newly_alloca !!  60                              bool newly_allocated, bool wp_copy)
175 {                                                  61 {
176         int ret;                                   62         int ret;
177         struct mm_struct *dst_mm = dst_vma->vm << 
178         pte_t _dst_pte, *dst_pte;                  63         pte_t _dst_pte, *dst_pte;
179         bool writable = dst_vma->vm_flags & VM     64         bool writable = dst_vma->vm_flags & VM_WRITE;
180         bool vm_shared = dst_vma->vm_flags & V     65         bool vm_shared = dst_vma->vm_flags & VM_SHARED;
                                                   >>  66         bool page_in_cache = page->mapping;
181         spinlock_t *ptl;                           67         spinlock_t *ptl;
182         struct folio *folio = page_folio(page) !!  68         struct inode *inode;
183         bool page_in_cache = folio_mapping(fol !!  69         pgoff_t offset, max_off;
184                                                    70 
185         _dst_pte = mk_pte(page, dst_vma->vm_pa     71         _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
186         _dst_pte = pte_mkdirty(_dst_pte);          72         _dst_pte = pte_mkdirty(_dst_pte);
187         if (page_in_cache && !vm_shared)           73         if (page_in_cache && !vm_shared)
188                 writable = false;                  74                 writable = false;
189         if (writable)                          !!  75 
190                 _dst_pte = pte_mkwrite(_dst_pt !!  76         /*
191         if (flags & MFILL_ATOMIC_WP)           !!  77          * Always mark a PTE as write-protected when needed, regardless of
                                                   >>  78          * VM_WRITE, which the user might change.
                                                   >>  79          */
                                                   >>  80         if (wp_copy)
192                 _dst_pte = pte_mkuffd_wp(_dst_     81                 _dst_pte = pte_mkuffd_wp(_dst_pte);
                                                   >>  82         else if (writable)
                                                   >>  83                 _dst_pte = pte_mkwrite(_dst_pte);
193                                                    84 
194         ret = -EAGAIN;                         << 
195         dst_pte = pte_offset_map_lock(dst_mm,      85         dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
196         if (!dst_pte)                          << 
197                 goto out;                      << 
198                                                    86 
199         if (mfill_file_over_size(dst_vma, dst_ !!  87         if (vma_is_shmem(dst_vma)) {
                                                   >>  88                 /* serialize against truncate with the page table lock */
                                                   >>  89                 inode = dst_vma->vm_file->f_inode;
                                                   >>  90                 offset = linear_page_index(dst_vma, dst_addr);
                                                   >>  91                 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
200                 ret = -EFAULT;                     92                 ret = -EFAULT;
201                 goto out_unlock;               !!  93                 if (unlikely(offset >= max_off))
                                                   >>  94                         goto out_unlock;
202         }                                          95         }
203                                                    96 
204         ret = -EEXIST;                             97         ret = -EEXIST;
205         /*                                     !!  98         if (!pte_none(*dst_pte))
206          * We allow to overwrite a pte marker: << 
207          * registered, we firstly wr-protect a << 
208          * page backing it, then access the pa << 
209          */                                    << 
210         if (!pte_none_mostly(ptep_get(dst_pte) << 
211                 goto out_unlock;                   99                 goto out_unlock;
212                                                   100 
213         if (page_in_cache) {                   !! 101         if (page_in_cache)
214                 /* Usually, cache pages are al !! 102                 page_add_file_rmap(page, false);
215                 if (newly_allocated)           !! 103         else
216                         folio_add_lru(folio);  !! 104                 page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
217                 folio_add_file_rmap_pte(folio, << 
218         } else {                               << 
219                 folio_add_new_anon_rmap(folio, << 
220                 folio_add_lru_vma(folio, dst_v << 
221         }                                      << 
222                                                   105 
223         /*                                        106         /*
224          * Must happen after rmap, as mm_count    107          * Must happen after rmap, as mm_counter() checks mapping (via
225          * PageAnon()), which is set by __page    108          * PageAnon()), which is set by __page_set_anon_rmap().
226          */                                       109          */
227         inc_mm_counter(dst_mm, mm_counter(foli !! 110         inc_mm_counter(dst_mm, mm_counter(page));
                                                   >> 111 
                                                   >> 112         if (newly_allocated)
                                                   >> 113                 lru_cache_add_inactive_or_unevictable(page, dst_vma);
228                                                   114 
229         set_pte_at(dst_mm, dst_addr, dst_pte,     115         set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
230                                                   116 
231         /* No need to invalidate - it was non-    117         /* No need to invalidate - it was non-present before */
232         update_mmu_cache(dst_vma, dst_addr, ds    118         update_mmu_cache(dst_vma, dst_addr, dst_pte);
233         ret = 0;                                  119         ret = 0;
234 out_unlock:                                       120 out_unlock:
235         pte_unmap_unlock(dst_pte, ptl);           121         pte_unmap_unlock(dst_pte, ptl);
236 out:                                           << 
237         return ret;                               122         return ret;
238 }                                                 123 }
239                                                   124 
240 static int mfill_atomic_pte_copy(pmd_t *dst_pm !! 125 static int mcopy_atomic_pte(struct mm_struct *dst_mm,
241                                  struct vm_are !! 126                             pmd_t *dst_pmd,
242                                  unsigned long !! 127                             struct vm_area_struct *dst_vma,
243                                  unsigned long !! 128                             unsigned long dst_addr,
244                                  uffd_flags_t  !! 129                             unsigned long src_addr,
245                                  struct folio  !! 130                             struct page **pagep,
                                                   >> 131                             bool wp_copy)
246 {                                                 132 {
247         void *kaddr;                           !! 133         void *page_kaddr;
248         int ret;                                  134         int ret;
249         struct folio *folio;                   !! 135         struct page *page;
250                                                   136 
251         if (!*foliop) {                        !! 137         if (!*pagep) {
252                 ret = -ENOMEM;                    138                 ret = -ENOMEM;
253                 folio = vma_alloc_folio(GFP_HI !! 139                 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr);
254                                         dst_ad !! 140                 if (!page)
255                 if (!folio)                    << 
256                         goto out;                 141                         goto out;
257                                                   142 
258                 kaddr = kmap_local_folio(folio !! 143                 page_kaddr = kmap_atomic(page);
259                 /*                             !! 144                 ret = copy_from_user(page_kaddr,
260                  * The read mmap_lock is held  !! 145                                      (const void __user *) src_addr,
261                  * mmap_lock being read recurs << 
262                  * possible if a writer has ta << 
263                  *                             << 
264                  * process A thread 1 takes re << 
265                  * process A thread 2 calls mm << 
266                  * process B thread 1 takes pa << 
267                  * process B thread 2 calls mm << 
268                  * process A thread 1 blocks t << 
269                  * process B thread 1 blocks t << 
270                  *                             << 
271                  * Disable page faults to prev << 
272                  * and retry the copy outside  << 
273                  */                            << 
274                 pagefault_disable();           << 
275                 ret = copy_from_user(kaddr, (c << 
276                                      PAGE_SIZE    146                                      PAGE_SIZE);
277                 pagefault_enable();            !! 147                 kunmap_atomic(page_kaddr);
278                 kunmap_local(kaddr);           << 
279                                                   148 
280                 /* fallback to copy_from_user     149                 /* fallback to copy_from_user outside mmap_lock */
281                 if (unlikely(ret)) {              150                 if (unlikely(ret)) {
282                         ret = -ENOENT;            151                         ret = -ENOENT;
283                         *foliop = folio;       !! 152                         *pagep = page;
284                         /* don't free the page    153                         /* don't free the page */
285                         goto out;                 154                         goto out;
286                 }                                 155                 }
287                                                   156 
288                 flush_dcache_folio(folio);     !! 157                 flush_dcache_page(page);
289         } else {                                  158         } else {
290                 folio = *foliop;               !! 159                 page = *pagep;
291                 *foliop = NULL;                !! 160                 *pagep = NULL;
292         }                                         161         }
293                                                   162 
294         /*                                        163         /*
295          * The memory barrier inside __folio_m !! 164          * The memory barrier inside __SetPageUptodate makes sure that
296          * preceding stores to the page conten    165          * preceding stores to the page contents become visible before
297          * the set_pte_at() write.                166          * the set_pte_at() write.
298          */                                       167          */
299         __folio_mark_uptodate(folio);          !! 168         __SetPageUptodate(page);
300                                                   169 
301         ret = -ENOMEM;                            170         ret = -ENOMEM;
302         if (mem_cgroup_charge(folio, dst_vma-> !! 171         if (mem_cgroup_charge(page_folio(page), dst_mm, GFP_KERNEL))
303                 goto out_release;                 172                 goto out_release;
304                                                   173 
305         ret = mfill_atomic_install_pte(dst_pmd !! 174         ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
306                                        &folio- !! 175                                        page, true, wp_copy);
307         if (ret)                                  176         if (ret)
308                 goto out_release;                 177                 goto out_release;
309 out:                                              178 out:
310         return ret;                               179         return ret;
311 out_release:                                      180 out_release:
312         folio_put(folio);                      !! 181         put_page(page);
313         goto out;                                 182         goto out;
314 }                                                 183 }
315                                                   184 
316 static int mfill_atomic_pte_zeroed_folio(pmd_t !! 185 static int mfill_zeropage_pte(struct mm_struct *dst_mm,
317                                          struc !! 186                               pmd_t *dst_pmd,
318                                          unsig !! 187                               struct vm_area_struct *dst_vma,
319 {                                              !! 188                               unsigned long dst_addr)
320         struct folio *folio;                   << 
321         int ret = -ENOMEM;                     << 
322                                                << 
323         folio = vma_alloc_zeroed_movable_folio << 
324         if (!folio)                            << 
325                 return ret;                    << 
326                                                << 
327         if (mem_cgroup_charge(folio, dst_vma-> << 
328                 goto out_put;                  << 
329                                                << 
330         /*                                     << 
331          * The memory barrier inside __folio_m << 
332          * zeroing out the folio become visibl << 
333          * using set_pte_at(). See do_anonymou << 
334          */                                    << 
335         __folio_mark_uptodate(folio);          << 
336                                                << 
337         ret = mfill_atomic_install_pte(dst_pmd << 
338                                        &folio- << 
339         if (ret)                               << 
340                 goto out_put;                  << 
341                                                << 
342         return 0;                              << 
343 out_put:                                       << 
344         folio_put(folio);                      << 
345         return ret;                            << 
346 }                                              << 
347                                                << 
348 static int mfill_atomic_pte_zeropage(pmd_t *ds << 
349                                      struct vm << 
350                                      unsigned  << 
351 {                                                 189 {
352         pte_t _dst_pte, *dst_pte;                 190         pte_t _dst_pte, *dst_pte;
353         spinlock_t *ptl;                          191         spinlock_t *ptl;
354         int ret;                                  192         int ret;
355                                                !! 193         pgoff_t offset, max_off;
356         if (mm_forbids_zeropage(dst_vma->vm_mm !! 194         struct inode *inode;
357                 return mfill_atomic_pte_zeroed << 
358                                                   195 
359         _dst_pte = pte_mkspecial(pfn_pte(my_ze    196         _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
360                                          dst_v    197                                          dst_vma->vm_page_prot));
361         ret = -EAGAIN;                         !! 198         dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
362         dst_pte = pte_offset_map_lock(dst_vma- !! 199         if (dst_vma->vm_file) {
363         if (!dst_pte)                          !! 200                 /* the shmem MAP_PRIVATE case requires checking the i_size */
364                 goto out;                      !! 201                 inode = dst_vma->vm_file->f_inode;
365         if (mfill_file_over_size(dst_vma, dst_ !! 202                 offset = linear_page_index(dst_vma, dst_addr);
                                                   >> 203                 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
366                 ret = -EFAULT;                    204                 ret = -EFAULT;
367                 goto out_unlock;               !! 205                 if (unlikely(offset >= max_off))
                                                   >> 206                         goto out_unlock;
368         }                                         207         }
369         ret = -EEXIST;                            208         ret = -EEXIST;
370         if (!pte_none(ptep_get(dst_pte)))      !! 209         if (!pte_none(*dst_pte))
371                 goto out_unlock;                  210                 goto out_unlock;
372         set_pte_at(dst_vma->vm_mm, dst_addr, d !! 211         set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
373         /* No need to invalidate - it was non-    212         /* No need to invalidate - it was non-present before */
374         update_mmu_cache(dst_vma, dst_addr, ds    213         update_mmu_cache(dst_vma, dst_addr, dst_pte);
375         ret = 0;                                  214         ret = 0;
376 out_unlock:                                       215 out_unlock:
377         pte_unmap_unlock(dst_pte, ptl);           216         pte_unmap_unlock(dst_pte, ptl);
378 out:                                           << 
379         return ret;                               217         return ret;
380 }                                                 218 }
381                                                   219 
382 /* Handles UFFDIO_CONTINUE for all shmem VMAs     220 /* Handles UFFDIO_CONTINUE for all shmem VMAs (shared or private). */
383 static int mfill_atomic_pte_continue(pmd_t *ds !! 221 static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
384                                      struct vm !! 222                                 pmd_t *dst_pmd,
385                                      unsigned  !! 223                                 struct vm_area_struct *dst_vma,
386                                      uffd_flag !! 224                                 unsigned long dst_addr,
                                                   >> 225                                 bool wp_copy)
387 {                                                 226 {
388         struct inode *inode = file_inode(dst_v    227         struct inode *inode = file_inode(dst_vma->vm_file);
389         pgoff_t pgoff = linear_page_index(dst_    228         pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
390         struct folio *folio;                   << 
391         struct page *page;                        229         struct page *page;
392         int ret;                                  230         int ret;
393                                                   231 
394         ret = shmem_get_folio(inode, pgoff, 0, !! 232         ret = shmem_getpage(inode, pgoff, &page, SGP_READ);
395         /* Our caller expects us to return -EF << 
396         if (ret == -ENOENT)                    << 
397                 ret = -EFAULT;                 << 
398         if (ret)                                  233         if (ret)
399                 goto out;                         234                 goto out;
400         if (!folio) {                          !! 235         if (!page) {
401                 ret = -EFAULT;                    236                 ret = -EFAULT;
402                 goto out;                         237                 goto out;
403         }                                         238         }
404                                                   239 
405         page = folio_file_page(folio, pgoff);  << 
406         if (PageHWPoison(page)) {                 240         if (PageHWPoison(page)) {
407                 ret = -EIO;                       241                 ret = -EIO;
408                 goto out_release;                 242                 goto out_release;
409         }                                         243         }
410                                                   244 
411         ret = mfill_atomic_install_pte(dst_pmd !! 245         ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
412                                        page, f !! 246                                        page, false, wp_copy);
413         if (ret)                                  247         if (ret)
414                 goto out_release;                 248                 goto out_release;
415                                                   249 
416         folio_unlock(folio);                   !! 250         unlock_page(page);
417         ret = 0;                                  251         ret = 0;
418 out:                                              252 out:
419         return ret;                               253         return ret;
420 out_release:                                      254 out_release:
421         folio_unlock(folio);                   !! 255         unlock_page(page);
422         folio_put(folio);                      !! 256         put_page(page);
423         goto out;                                 257         goto out;
424 }                                                 258 }
425                                                   259 
426 /* Handles UFFDIO_POISON for all non-hugetlb V << 
427 static int mfill_atomic_pte_poison(pmd_t *dst_ << 
428                                    struct vm_a << 
429                                    unsigned lo << 
430                                    uffd_flags_ << 
431 {                                              << 
432         int ret;                               << 
433         struct mm_struct *dst_mm = dst_vma->vm << 
434         pte_t _dst_pte, *dst_pte;              << 
435         spinlock_t *ptl;                       << 
436                                                << 
437         _dst_pte = make_pte_marker(PTE_MARKER_ << 
438         ret = -EAGAIN;                         << 
439         dst_pte = pte_offset_map_lock(dst_mm,  << 
440         if (!dst_pte)                          << 
441                 goto out;                      << 
442                                                << 
443         if (mfill_file_over_size(dst_vma, dst_ << 
444                 ret = -EFAULT;                 << 
445                 goto out_unlock;               << 
446         }                                      << 
447                                                << 
448         ret = -EEXIST;                         << 
449         /* Refuse to overwrite any PTE, even a << 
450         if (!pte_none(ptep_get(dst_pte)))      << 
451                 goto out_unlock;               << 
452                                                << 
453         set_pte_at(dst_mm, dst_addr, dst_pte,  << 
454                                                << 
455         /* No need to invalidate - it was non- << 
456         update_mmu_cache(dst_vma, dst_addr, ds << 
457         ret = 0;                               << 
458 out_unlock:                                    << 
459         pte_unmap_unlock(dst_pte, ptl);        << 
460 out:                                           << 
461         return ret;                            << 
462 }                                              << 
463                                                << 
464 static pmd_t *mm_alloc_pmd(struct mm_struct *m    260 static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
465 {                                                 261 {
466         pgd_t *pgd;                               262         pgd_t *pgd;
467         p4d_t *p4d;                               263         p4d_t *p4d;
468         pud_t *pud;                               264         pud_t *pud;
469                                                   265 
470         pgd = pgd_offset(mm, address);            266         pgd = pgd_offset(mm, address);
471         p4d = p4d_alloc(mm, pgd, address);        267         p4d = p4d_alloc(mm, pgd, address);
472         if (!p4d)                                 268         if (!p4d)
473                 return NULL;                      269                 return NULL;
474         pud = pud_alloc(mm, p4d, address);        270         pud = pud_alloc(mm, p4d, address);
475         if (!pud)                                 271         if (!pud)
476                 return NULL;                      272                 return NULL;
477         /*                                        273         /*
478          * Note that we didn't run this becaus    274          * Note that we didn't run this because the pmd was
479          * missing, the *pmd may be already es    275          * missing, the *pmd may be already established and in
480          * turn it may also be a trans_huge_pm    276          * turn it may also be a trans_huge_pmd.
481          */                                       277          */
482         return pmd_alloc(mm, pud, address);       278         return pmd_alloc(mm, pud, address);
483 }                                                 279 }
484                                                   280 
485 #ifdef CONFIG_HUGETLB_PAGE                        281 #ifdef CONFIG_HUGETLB_PAGE
486 /*                                                282 /*
487  * mfill_atomic processing for HUGETLB vmas.   !! 283  * __mcopy_atomic processing for HUGETLB vmas.  Note that this routine is
488  * called with either vma-lock or mmap_lock he !! 284  * called with mmap_lock held, it will release mmap_lock before returning.
489  * before returning.                           << 
490  */                                               285  */
491 static __always_inline ssize_t mfill_atomic_hu !! 286 static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
492                                                << 
493                                                   287                                               struct vm_area_struct *dst_vma,
494                                                   288                                               unsigned long dst_start,
495                                                   289                                               unsigned long src_start,
496                                                   290                                               unsigned long len,
497                                                !! 291                                               enum mcopy_atomic_mode mode)
498 {                                                 292 {
499         struct mm_struct *dst_mm = dst_vma->vm !! 293         int vm_shared = dst_vma->vm_flags & VM_SHARED;
500         ssize_t err;                              294         ssize_t err;
501         pte_t *dst_pte;                           295         pte_t *dst_pte;
502         unsigned long src_addr, dst_addr;         296         unsigned long src_addr, dst_addr;
503         long copied;                              297         long copied;
504         struct folio *folio;                   !! 298         struct page *page;
505         unsigned long vma_hpagesize;              299         unsigned long vma_hpagesize;
506         pgoff_t idx;                              300         pgoff_t idx;
507         u32 hash;                                 301         u32 hash;
508         struct address_space *mapping;            302         struct address_space *mapping;
509                                                   303 
510         /*                                        304         /*
511          * There is no default zero huge page     305          * There is no default zero huge page for all huge page sizes as
512          * supported by hugetlb.  A PMD_SIZE h    306          * supported by hugetlb.  A PMD_SIZE huge pages may exist as used
513          * by THP.  Since we can not reliably     307          * by THP.  Since we can not reliably insert a zero page, this
514          * feature is not supported.              308          * feature is not supported.
515          */                                       309          */
516         if (uffd_flags_mode_is(flags, MFILL_AT !! 310         if (mode == MCOPY_ATOMIC_ZEROPAGE) {
517                 up_read(&ctx->map_changing_loc !! 311                 mmap_read_unlock(dst_mm);
518                 uffd_mfill_unlock(dst_vma);    << 
519                 return -EINVAL;                   312                 return -EINVAL;
520         }                                         313         }
521                                                   314 
522         src_addr = src_start;                     315         src_addr = src_start;
523         dst_addr = dst_start;                     316         dst_addr = dst_start;
524         copied = 0;                               317         copied = 0;
525         folio = NULL;                          !! 318         page = NULL;
526         vma_hpagesize = vma_kernel_pagesize(ds    319         vma_hpagesize = vma_kernel_pagesize(dst_vma);
527                                                   320 
528         /*                                        321         /*
529          * Validate alignment based on huge pa    322          * Validate alignment based on huge page size
530          */                                       323          */
531         err = -EINVAL;                            324         err = -EINVAL;
532         if (dst_start & (vma_hpagesize - 1) ||    325         if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1))
533                 goto out_unlock;                  326                 goto out_unlock;
534                                                   327 
535 retry:                                            328 retry:
536         /*                                        329         /*
537          * On routine entry dst_vma is set.  I    330          * On routine entry dst_vma is set.  If we had to drop mmap_lock and
538          * retry, dst_vma will be set to NULL     331          * retry, dst_vma will be set to NULL and we must lookup again.
539          */                                       332          */
540         if (!dst_vma) {                           333         if (!dst_vma) {
541                 dst_vma = uffd_mfill_lock(dst_ << 
542                 if (IS_ERR(dst_vma)) {         << 
543                         err = PTR_ERR(dst_vma) << 
544                         goto out;              << 
545                 }                              << 
546                                                << 
547                 err = -ENOENT;                    334                 err = -ENOENT;
548                 if (!is_vm_hugetlb_page(dst_vm !! 335                 dst_vma = find_dst_vma(dst_mm, dst_start, len);
549                         goto out_unlock_vma;   !! 336                 if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
                                                   >> 337                         goto out_unlock;
550                                                   338 
551                 err = -EINVAL;                    339                 err = -EINVAL;
552                 if (vma_hpagesize != vma_kerne    340                 if (vma_hpagesize != vma_kernel_pagesize(dst_vma))
553                         goto out_unlock_vma;   !! 341                         goto out_unlock;
554                                                   342 
555                 /*                             !! 343                 vm_shared = dst_vma->vm_flags & VM_SHARED;
556                  * If memory mappings are chan !! 344         }
557                  * operation (e.g. mremap) run !! 345 
558                  * request the user to retry l !! 346         /*
559                  */                            !! 347          * If not shared, ensure the dst_vma has a anon_vma.
560                 down_read(&ctx->map_changing_l !! 348          */
561                 err = -EAGAIN;                 !! 349         err = -ENOMEM;
562                 if (atomic_read(&ctx->mmap_cha !! 350         if (!vm_shared) {
                                                   >> 351                 if (unlikely(anon_vma_prepare(dst_vma)))
563                         goto out_unlock;          352                         goto out_unlock;
564         }                                         353         }
565                                                   354 
566         while (src_addr < src_start + len) {      355         while (src_addr < src_start + len) {
567                 BUG_ON(dst_addr >= dst_start +    356                 BUG_ON(dst_addr >= dst_start + len);
568                                                   357 
569                 /*                                358                 /*
570                  * Serialize via vma_lock and  !! 359                  * Serialize via i_mmap_rwsem and hugetlb_fault_mutex.
571                  * vma_lock ensures the dst_pt !! 360                  * i_mmap_rwsem ensures the dst_pte remains valid even
572                  * in the case of shared pmds.    361                  * in the case of shared pmds.  fault mutex prevents
573                  * races with other faulting t    362                  * races with other faulting threads.
574                  */                               363                  */
575                 idx = linear_page_index(dst_vm << 
576                 mapping = dst_vma->vm_file->f_    364                 mapping = dst_vma->vm_file->f_mapping;
                                                   >> 365                 i_mmap_lock_read(mapping);
                                                   >> 366                 idx = linear_page_index(dst_vma, dst_addr);
577                 hash = hugetlb_fault_mutex_has    367                 hash = hugetlb_fault_mutex_hash(mapping, idx);
578                 mutex_lock(&hugetlb_fault_mute    368                 mutex_lock(&hugetlb_fault_mutex_table[hash]);
579                 hugetlb_vma_lock_read(dst_vma) << 
580                                                   369 
581                 err = -ENOMEM;                    370                 err = -ENOMEM;
582                 dst_pte = huge_pte_alloc(dst_m    371                 dst_pte = huge_pte_alloc(dst_mm, dst_vma, dst_addr, vma_hpagesize);
583                 if (!dst_pte) {                   372                 if (!dst_pte) {
584                         hugetlb_vma_unlock_rea << 
585                         mutex_unlock(&hugetlb_    373                         mutex_unlock(&hugetlb_fault_mutex_table[hash]);
                                                   >> 374                         i_mmap_unlock_read(mapping);
586                         goto out_unlock;          375                         goto out_unlock;
587                 }                                 376                 }
588                                                   377 
589                 if (!uffd_flags_mode_is(flags, !! 378                 if (mode != MCOPY_ATOMIC_CONTINUE &&
590                     !huge_pte_none_mostly(huge !! 379                     !huge_pte_none(huge_ptep_get(dst_pte))) {
591                         err = -EEXIST;            380                         err = -EEXIST;
592                         hugetlb_vma_unlock_rea << 
593                         mutex_unlock(&hugetlb_    381                         mutex_unlock(&hugetlb_fault_mutex_table[hash]);
                                                   >> 382                         i_mmap_unlock_read(mapping);
594                         goto out_unlock;          383                         goto out_unlock;
595                 }                                 384                 }
596                                                   385 
597                 err = hugetlb_mfill_atomic_pte !! 386                 err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
598                                                !! 387                                                dst_addr, src_addr, mode, &page);
599                                                   388 
600                 hugetlb_vma_unlock_read(dst_vm << 
601                 mutex_unlock(&hugetlb_fault_mu    389                 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
                                                   >> 390                 i_mmap_unlock_read(mapping);
602                                                   391 
603                 cond_resched();                   392                 cond_resched();
604                                                   393 
605                 if (unlikely(err == -ENOENT))     394                 if (unlikely(err == -ENOENT)) {
606                         up_read(&ctx->map_chan !! 395                         mmap_read_unlock(dst_mm);
607                         uffd_mfill_unlock(dst_ !! 396                         BUG_ON(!page);
608                         BUG_ON(!folio);        << 
609                                                   397 
610                         err = copy_folio_from_ !! 398                         err = copy_huge_page_from_user(page,
611                                                !! 399                                                 (const void __user *)src_addr,
                                                   >> 400                                                 vma_hpagesize / PAGE_SIZE,
                                                   >> 401                                                 true);
612                         if (unlikely(err)) {      402                         if (unlikely(err)) {
613                                 err = -EFAULT;    403                                 err = -EFAULT;
614                                 goto out;         404                                 goto out;
615                         }                         405                         }
                                                   >> 406                         mmap_read_lock(dst_mm);
616                                                   407 
617                         dst_vma = NULL;           408                         dst_vma = NULL;
618                         goto retry;               409                         goto retry;
619                 } else                            410                 } else
620                         BUG_ON(folio);         !! 411                         BUG_ON(page);
621                                                   412 
622                 if (!err) {                       413                 if (!err) {
623                         dst_addr += vma_hpages    414                         dst_addr += vma_hpagesize;
624                         src_addr += vma_hpages    415                         src_addr += vma_hpagesize;
625                         copied += vma_hpagesiz    416                         copied += vma_hpagesize;
626                                                   417 
627                         if (fatal_signal_pendi    418                         if (fatal_signal_pending(current))
628                                 err = -EINTR;     419                                 err = -EINTR;
629                 }                                 420                 }
630                 if (err)                          421                 if (err)
631                         break;                    422                         break;
632         }                                         423         }
633                                                   424 
634 out_unlock:                                       425 out_unlock:
635         up_read(&ctx->map_changing_lock);      !! 426         mmap_read_unlock(dst_mm);
636 out_unlock_vma:                                << 
637         uffd_mfill_unlock(dst_vma);            << 
638 out:                                              427 out:
639         if (folio)                             !! 428         if (page)
640                 folio_put(folio);              !! 429                 put_page(page);
641         BUG_ON(copied < 0);                       430         BUG_ON(copied < 0);
642         BUG_ON(err > 0);                          431         BUG_ON(err > 0);
643         BUG_ON(!copied && !err);                  432         BUG_ON(!copied && !err);
644         return copied ? copied : err;             433         return copied ? copied : err;
645 }                                                 434 }
646 #else /* !CONFIG_HUGETLB_PAGE */                  435 #else /* !CONFIG_HUGETLB_PAGE */
647 /* fail at build time if gcc attempts to use t    436 /* fail at build time if gcc attempts to use this */
648 extern ssize_t mfill_atomic_hugetlb(struct use !! 437 extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
649                                     struct vm_ !! 438                                       struct vm_area_struct *dst_vma,
650                                     unsigned l !! 439                                       unsigned long dst_start,
651                                     unsigned l !! 440                                       unsigned long src_start,
652                                     unsigned l !! 441                                       unsigned long len,
653                                     uffd_flags !! 442                                       enum mcopy_atomic_mode mode);
654 #endif /* CONFIG_HUGETLB_PAGE */                  443 #endif /* CONFIG_HUGETLB_PAGE */
655                                                   444 
656 static __always_inline ssize_t mfill_atomic_pt !! 445 static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
                                                   >> 446                                                 pmd_t *dst_pmd,
657                                                   447                                                 struct vm_area_struct *dst_vma,
658                                                   448                                                 unsigned long dst_addr,
659                                                   449                                                 unsigned long src_addr,
660                                                !! 450                                                 struct page **page,
661                                                !! 451                                                 enum mcopy_atomic_mode mode,
                                                   >> 452                                                 bool wp_copy)
662 {                                                 453 {
663         ssize_t err;                              454         ssize_t err;
664                                                   455 
665         if (uffd_flags_mode_is(flags, MFILL_AT !! 456         if (mode == MCOPY_ATOMIC_CONTINUE) {
666                 return mfill_atomic_pte_contin !! 457                 return mcontinue_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
667                                                !! 458                                             wp_copy);
668         } else if (uffd_flags_mode_is(flags, M << 
669                 return mfill_atomic_pte_poison << 
670                                                << 
671         }                                         459         }
672                                                   460 
673         /*                                        461         /*
674          * The normal page fault path for a sh    462          * The normal page fault path for a shmem will invoke the
675          * fault, fill the hole in the file an    463          * fault, fill the hole in the file and COW it right away. The
676          * result generates plain anonymous me    464          * result generates plain anonymous memory. So when we are
677          * asked to fill an hole in a MAP_PRIV    465          * asked to fill an hole in a MAP_PRIVATE shmem mapping, we'll
678          * generate anonymous memory directly     466          * generate anonymous memory directly without actually filling
679          * the hole. For the MAP_PRIVATE case     467          * the hole. For the MAP_PRIVATE case the robustness check
680          * only happens in the pagetable (to v    468          * only happens in the pagetable (to verify it's still none)
681          * and not in the radix tree.             469          * and not in the radix tree.
682          */                                       470          */
683         if (!(dst_vma->vm_flags & VM_SHARED))     471         if (!(dst_vma->vm_flags & VM_SHARED)) {
684                 if (uffd_flags_mode_is(flags,  !! 472                 if (mode == MCOPY_ATOMIC_NORMAL)
685                         err = mfill_atomic_pte !! 473                         err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
686                                                !! 474                                                dst_addr, src_addr, page,
687                                                !! 475                                                wp_copy);
688                 else                              476                 else
689                         err = mfill_atomic_pte !! 477                         err = mfill_zeropage_pte(dst_mm, dst_pmd,
690                                                   478                                                  dst_vma, dst_addr);
691         } else {                                  479         } else {
692                 err = shmem_mfill_atomic_pte(d !! 480                 VM_WARN_ON_ONCE(wp_copy);
                                                   >> 481                 err = shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
693                                              d    482                                              dst_addr, src_addr,
694                                              f !! 483                                              mode != MCOPY_ATOMIC_NORMAL,
                                                   >> 484                                              page);
695         }                                         485         }
696                                                   486 
697         return err;                               487         return err;
698 }                                                 488 }
699                                                   489 
700 static __always_inline ssize_t mfill_atomic(st !! 490 static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
701                                             un !! 491                                               unsigned long dst_start,
702                                             un !! 492                                               unsigned long src_start,
703                                             un !! 493                                               unsigned long len,
704                                             uf !! 494                                               enum mcopy_atomic_mode mcopy_mode,
                                                   >> 495                                               atomic_t *mmap_changing,
                                                   >> 496                                               __u64 mode)
705 {                                                 497 {
706         struct mm_struct *dst_mm = ctx->mm;    << 
707         struct vm_area_struct *dst_vma;           498         struct vm_area_struct *dst_vma;
708         ssize_t err;                              499         ssize_t err;
709         pmd_t *dst_pmd;                           500         pmd_t *dst_pmd;
710         unsigned long src_addr, dst_addr;         501         unsigned long src_addr, dst_addr;
711         long copied;                              502         long copied;
712         struct folio *folio;                   !! 503         struct page *page;
                                                   >> 504         bool wp_copy;
713                                                   505 
714         /*                                        506         /*
715          * Sanitize the command parameters:       507          * Sanitize the command parameters:
716          */                                       508          */
717         BUG_ON(dst_start & ~PAGE_MASK);           509         BUG_ON(dst_start & ~PAGE_MASK);
718         BUG_ON(len & ~PAGE_MASK);                 510         BUG_ON(len & ~PAGE_MASK);
719                                                   511 
720         /* Does the address range wrap, or is     512         /* Does the address range wrap, or is the span zero-sized? */
721         BUG_ON(src_start + len <= src_start);     513         BUG_ON(src_start + len <= src_start);
722         BUG_ON(dst_start + len <= dst_start);     514         BUG_ON(dst_start + len <= dst_start);
723                                                   515 
724         src_addr = src_start;                     516         src_addr = src_start;
725         dst_addr = dst_start;                     517         dst_addr = dst_start;
726         copied = 0;                               518         copied = 0;
727         folio = NULL;                          !! 519         page = NULL;
728 retry:                                            520 retry:
729         /*                                     !! 521         mmap_read_lock(dst_mm);
730          * Make sure the vma is not shared, th << 
731          * both valid and fully within a singl << 
732          */                                    << 
733         dst_vma = uffd_mfill_lock(dst_mm, dst_ << 
734         if (IS_ERR(dst_vma)) {                 << 
735                 err = PTR_ERR(dst_vma);        << 
736                 goto out;                      << 
737         }                                      << 
738                                                   522 
739         /*                                        523         /*
740          * If memory mappings are changing bec    524          * If memory mappings are changing because of non-cooperative
741          * operation (e.g. mremap) running in     525          * operation (e.g. mremap) running in parallel, bail out and
742          * request the user to retry later        526          * request the user to retry later
743          */                                       527          */
744         down_read(&ctx->map_changing_lock);    << 
745         err = -EAGAIN;                            528         err = -EAGAIN;
746         if (atomic_read(&ctx->mmap_changing))  !! 529         if (mmap_changing && atomic_read(mmap_changing))
                                                   >> 530                 goto out_unlock;
                                                   >> 531 
                                                   >> 532         /*
                                                   >> 533          * Make sure the vma is not shared, that the dst range is
                                                   >> 534          * both valid and fully within a single existing vma.
                                                   >> 535          */
                                                   >> 536         err = -ENOENT;
                                                   >> 537         dst_vma = find_dst_vma(dst_mm, dst_start, len);
                                                   >> 538         if (!dst_vma)
747                 goto out_unlock;                  539                 goto out_unlock;
748                                                   540 
749         err = -EINVAL;                            541         err = -EINVAL;
750         /*                                        542         /*
751          * shmem_zero_setup is invoked in mmap    543          * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but
752          * it will overwrite vm_ops, so vma_is    544          * it will overwrite vm_ops, so vma_is_anonymous must return false.
753          */                                       545          */
754         if (WARN_ON_ONCE(vma_is_anonymous(dst_    546         if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) &&
755             dst_vma->vm_flags & VM_SHARED))       547             dst_vma->vm_flags & VM_SHARED))
756                 goto out_unlock;                  548                 goto out_unlock;
757                                                   549 
758         /*                                        550         /*
759          * validate 'mode' now that we know th    551          * validate 'mode' now that we know the dst_vma: don't allow
760          * a wrprotect copy if the userfaultfd    552          * a wrprotect copy if the userfaultfd didn't register as WP.
761          */                                       553          */
762         if ((flags & MFILL_ATOMIC_WP) && !(dst !! 554         wp_copy = mode & UFFDIO_COPY_MODE_WP;
                                                   >> 555         if (wp_copy && !(dst_vma->vm_flags & VM_UFFD_WP))
763                 goto out_unlock;                  556                 goto out_unlock;
764                                                   557 
765         /*                                        558         /*
766          * If this is a HUGETLB vma, pass off     559          * If this is a HUGETLB vma, pass off to appropriate routine
767          */                                       560          */
768         if (is_vm_hugetlb_page(dst_vma))          561         if (is_vm_hugetlb_page(dst_vma))
769                 return  mfill_atomic_hugetlb(c !! 562                 return  __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
770                                              s !! 563                                                 src_start, len, mcopy_mode);
771                                                   564 
772         if (!vma_is_anonymous(dst_vma) && !vma    565         if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
773                 goto out_unlock;                  566                 goto out_unlock;
774         if (!vma_is_shmem(dst_vma) &&          !! 567         if (!vma_is_shmem(dst_vma) && mcopy_mode == MCOPY_ATOMIC_CONTINUE)
775             uffd_flags_mode_is(flags, MFILL_AT !! 568                 goto out_unlock;
                                                   >> 569 
                                                   >> 570         /*
                                                   >> 571          * Ensure the dst_vma has a anon_vma or this page
                                                   >> 572          * would get a NULL anon_vma when moved in the
                                                   >> 573          * dst_vma.
                                                   >> 574          */
                                                   >> 575         err = -ENOMEM;
                                                   >> 576         if (!(dst_vma->vm_flags & VM_SHARED) &&
                                                   >> 577             unlikely(anon_vma_prepare(dst_vma)))
776                 goto out_unlock;                  578                 goto out_unlock;
777                                                   579 
778         while (src_addr < src_start + len) {      580         while (src_addr < src_start + len) {
779                 pmd_t dst_pmdval;                 581                 pmd_t dst_pmdval;
780                                                   582 
781                 BUG_ON(dst_addr >= dst_start +    583                 BUG_ON(dst_addr >= dst_start + len);
782                                                   584 
783                 dst_pmd = mm_alloc_pmd(dst_mm,    585                 dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
784                 if (unlikely(!dst_pmd)) {         586                 if (unlikely(!dst_pmd)) {
785                         err = -ENOMEM;            587                         err = -ENOMEM;
786                         break;                    588                         break;
787                 }                                 589                 }
788                                                   590 
789                 dst_pmdval = pmdp_get_lockless !! 591                 dst_pmdval = pmd_read_atomic(dst_pmd);
790                 if (unlikely(pmd_none(dst_pmdv << 
791                     unlikely(__pte_alloc(dst_m << 
792                         err = -ENOMEM;         << 
793                         break;                 << 
794                 }                              << 
795                 dst_pmdval = pmdp_get_lockless << 
796                 /*                                592                 /*
797                  * If the dst_pmd is THP don't !! 593                  * If the dst_pmd is mapped as THP don't
798                  * (This includes the case whe !! 594                  * override it and just be strict.
799                  * changed back to none after  << 
800                  */                               595                  */
801                 if (unlikely(!pmd_present(dst_ !! 596                 if (unlikely(pmd_trans_huge(dst_pmdval))) {
802                              pmd_devmap(dst_pm << 
803                         err = -EEXIST;            597                         err = -EEXIST;
804                         break;                    598                         break;
805                 }                                 599                 }
806                 if (unlikely(pmd_bad(dst_pmdva !! 600                 if (unlikely(pmd_none(dst_pmdval)) &&
                                                   >> 601                     unlikely(__pte_alloc(dst_mm, dst_pmd))) {
                                                   >> 602                         err = -ENOMEM;
                                                   >> 603                         break;
                                                   >> 604                 }
                                                   >> 605                 /* If an huge pmd materialized from under us fail */
                                                   >> 606                 if (unlikely(pmd_trans_huge(*dst_pmd))) {
807                         err = -EFAULT;            607                         err = -EFAULT;
808                         break;                    608                         break;
809                 }                                 609                 }
810                 /*                             << 
811                  * For shmem mappings, khugepa << 
812                  * tables under us; pte_offset << 
813                  */                            << 
814                                                   610 
815                 err = mfill_atomic_pte(dst_pmd !! 611                 BUG_ON(pmd_none(*dst_pmd));
816                                        src_add !! 612                 BUG_ON(pmd_trans_huge(*dst_pmd));
                                                   >> 613 
                                                   >> 614                 err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
                                                   >> 615                                        src_addr, &page, mcopy_mode, wp_copy);
817                 cond_resched();                   616                 cond_resched();
818                                                   617 
819                 if (unlikely(err == -ENOENT))     618                 if (unlikely(err == -ENOENT)) {
820                         void *kaddr;           !! 619                         void *page_kaddr;
821                                                   620 
822                         up_read(&ctx->map_chan !! 621                         mmap_read_unlock(dst_mm);
823                         uffd_mfill_unlock(dst_ !! 622                         BUG_ON(!page);
824                         BUG_ON(!folio);        << 
825                                                   623 
826                         kaddr = kmap_local_fol !! 624                         page_kaddr = kmap(page);
827                         err = copy_from_user(k !! 625                         err = copy_from_user(page_kaddr,
828                                              (    626                                              (const void __user *) src_addr,
829                                              P    627                                              PAGE_SIZE);
830                         kunmap_local(kaddr);   !! 628                         kunmap(page);
831                         if (unlikely(err)) {      629                         if (unlikely(err)) {
832                                 err = -EFAULT;    630                                 err = -EFAULT;
833                                 goto out;         631                                 goto out;
834                         }                         632                         }
835                         flush_dcache_folio(fol !! 633                         flush_dcache_page(page);
836                         goto retry;               634                         goto retry;
837                 } else                            635                 } else
838                         BUG_ON(folio);         !! 636                         BUG_ON(page);
839                                                   637 
840                 if (!err) {                       638                 if (!err) {
841                         dst_addr += PAGE_SIZE;    639                         dst_addr += PAGE_SIZE;
842                         src_addr += PAGE_SIZE;    640                         src_addr += PAGE_SIZE;
843                         copied += PAGE_SIZE;      641                         copied += PAGE_SIZE;
844                                                   642 
845                         if (fatal_signal_pendi    643                         if (fatal_signal_pending(current))
846                                 err = -EINTR;     644                                 err = -EINTR;
847                 }                                 645                 }
848                 if (err)                          646                 if (err)
849                         break;                    647                         break;
850         }                                         648         }
851                                                   649 
852 out_unlock:                                       650 out_unlock:
853         up_read(&ctx->map_changing_lock);      !! 651         mmap_read_unlock(dst_mm);
854         uffd_mfill_unlock(dst_vma);            << 
855 out:                                              652 out:
856         if (folio)                             !! 653         if (page)
857                 folio_put(folio);              !! 654                 put_page(page);
858         BUG_ON(copied < 0);                       655         BUG_ON(copied < 0);
859         BUG_ON(err > 0);                          656         BUG_ON(err > 0);
860         BUG_ON(!copied && !err);                  657         BUG_ON(!copied && !err);
861         return copied ? copied : err;             658         return copied ? copied : err;
862 }                                                 659 }
863                                                   660 
864 ssize_t mfill_atomic_copy(struct userfaultfd_c !! 661 ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
865                           unsigned long src_st !! 662                      unsigned long src_start, unsigned long len,
866                           uffd_flags_t flags)  !! 663                      atomic_t *mmap_changing, __u64 mode)
867 {                                                 664 {
868         return mfill_atomic(ctx, dst_start, sr !! 665         return __mcopy_atomic(dst_mm, dst_start, src_start, len,
869                             uffd_flags_set_mod !! 666                               MCOPY_ATOMIC_NORMAL, mmap_changing, mode);
870 }                                              << 
871                                                << 
872 ssize_t mfill_atomic_zeropage(struct userfault << 
873                               unsigned long st << 
874                               unsigned long le << 
875 {                                              << 
876         return mfill_atomic(ctx, start, 0, len << 
877                             uffd_flags_set_mod << 
878 }                                              << 
879                                                << 
880 ssize_t mfill_atomic_continue(struct userfault << 
881                               unsigned long le << 
882 {                                              << 
883                                                << 
884         /*                                     << 
885          * A caller might reasonably assume th << 
886          * smp_wmb() to ensure that any writes << 
887          * the thread doing the UFFDIO_CONTINU << 
888          * subsequent loads from the page thro << 
889          */                                    << 
890         smp_wmb();                             << 
891                                                << 
892         return mfill_atomic(ctx, start, 0, len << 
893                             uffd_flags_set_mod << 
894 }                                                 667 }
895                                                   668 
896 ssize_t mfill_atomic_poison(struct userfaultfd !! 669 ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
897                             unsigned long len, !! 670                        unsigned long len, atomic_t *mmap_changing)
898 {                                                 671 {
899         return mfill_atomic(ctx, start, 0, len !! 672         return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE,
900                             uffd_flags_set_mod !! 673                               mmap_changing, 0);
901 }                                                 674 }
902                                                   675 
903 long uffd_wp_range(struct vm_area_struct *dst_ !! 676 ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
904                    unsigned long start, unsign !! 677                        unsigned long len, atomic_t *mmap_changing)
905 {                                                 678 {
906         unsigned int mm_cp_flags;              !! 679         return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_CONTINUE,
907         struct mmu_gather tlb;                 !! 680                               mmap_changing, 0);
908         long ret;                              << 
909                                                << 
910         VM_WARN_ONCE(start < dst_vma->vm_start << 
911                         "The address range exc << 
912         if (enable_wp)                         << 
913                 mm_cp_flags = MM_CP_UFFD_WP;   << 
914         else                                   << 
915                 mm_cp_flags = MM_CP_UFFD_WP_RE << 
916                                                << 
917         /*                                     << 
918          * vma->vm_page_prot already reflects  << 
919          * VMA (see userfaultfd_set_vm_flags() << 
920          * to be write-protected as default wh << 
921          * Try upgrading write permissions man << 
922          */                                    << 
923         if (!enable_wp && vma_wants_manual_pte << 
924                 mm_cp_flags |= MM_CP_TRY_CHANG << 
925         tlb_gather_mmu(&tlb, dst_vma->vm_mm);  << 
926         ret = change_protection(&tlb, dst_vma, << 
927         tlb_finish_mmu(&tlb);                  << 
928                                                << 
929         return ret;                            << 
930 }                                                 681 }
931                                                   682 
932 int mwriteprotect_range(struct userfaultfd_ctx !! 683 int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
933                         unsigned long len, boo !! 684                         unsigned long len, bool enable_wp,
                                                   >> 685                         atomic_t *mmap_changing)
934 {                                                 686 {
935         struct mm_struct *dst_mm = ctx->mm;    << 
936         unsigned long end = start + len;       << 
937         unsigned long _start, _end;            << 
938         struct vm_area_struct *dst_vma;           687         struct vm_area_struct *dst_vma;
939         unsigned long page_mask;               !! 688         pgprot_t newprot;
940         long err;                              !! 689         int err;
941         VMA_ITERATOR(vmi, dst_mm, start);      << 
942                                                   690 
943         /*                                        691         /*
944          * Sanitize the command parameters:       692          * Sanitize the command parameters:
945          */                                       693          */
946         BUG_ON(start & ~PAGE_MASK);               694         BUG_ON(start & ~PAGE_MASK);
947         BUG_ON(len & ~PAGE_MASK);                 695         BUG_ON(len & ~PAGE_MASK);
948                                                   696 
949         /* Does the address range wrap, or is     697         /* Does the address range wrap, or is the span zero-sized? */
950         BUG_ON(start + len <= start);             698         BUG_ON(start + len <= start);
951                                                   699 
952         mmap_read_lock(dst_mm);                   700         mmap_read_lock(dst_mm);
953                                                   701 
954         /*                                        702         /*
955          * If memory mappings are changing bec    703          * If memory mappings are changing because of non-cooperative
956          * operation (e.g. mremap) running in     704          * operation (e.g. mremap) running in parallel, bail out and
957          * request the user to retry later        705          * request the user to retry later
958          */                                       706          */
959         down_read(&ctx->map_changing_lock);    << 
960         err = -EAGAIN;                            707         err = -EAGAIN;
961         if (atomic_read(&ctx->mmap_changing))  !! 708         if (mmap_changing && atomic_read(mmap_changing))
962                 goto out_unlock;                  709                 goto out_unlock;
963                                                   710 
964         err = -ENOENT;                            711         err = -ENOENT;
965         for_each_vma_range(vmi, dst_vma, end)  !! 712         dst_vma = find_dst_vma(dst_mm, start, len);
966                                                << 
967                 if (!userfaultfd_wp(dst_vma))  << 
968                         err = -ENOENT;         << 
969                         break;                 << 
970                 }                              << 
971                                                << 
972                 if (is_vm_hugetlb_page(dst_vma << 
973                         err = -EINVAL;         << 
974                         page_mask = vma_kernel << 
975                         if ((start & page_mask << 
976                                 break;         << 
977                 }                              << 
978                                                << 
979                 _start = max(dst_vma->vm_start << 
980                 _end = min(dst_vma->vm_end, en << 
981                                                << 
982                 err = uffd_wp_range(dst_vma, _ << 
983                                                << 
984                 /* Return 0 on success, <0 on  << 
985                 if (err < 0)                   << 
986                         break;                 << 
987                 err = 0;                       << 
988         }                                      << 
989 out_unlock:                                    << 
990         up_read(&ctx->map_changing_lock);      << 
991         mmap_read_unlock(dst_mm);              << 
992         return err;                            << 
993 }                                              << 
994                                                << 
995                                                << 
996 void double_pt_lock(spinlock_t *ptl1,          << 
997                     spinlock_t *ptl2)          << 
998         __acquires(ptl1)                       << 
999         __acquires(ptl2)                       << 
1000 {                                             << 
1001         if (ptl1 > ptl2)                      << 
1002                 swap(ptl1, ptl2);             << 
1003         /* lock in virtual address order to a << 
1004         spin_lock(ptl1);                      << 
1005         if (ptl1 != ptl2)                     << 
1006                 spin_lock_nested(ptl2, SINGLE << 
1007         else                                  << 
1008                 __acquire(ptl2);              << 
1009 }                                             << 
1010                                               << 
1011 void double_pt_unlock(spinlock_t *ptl1,       << 
1012                       spinlock_t *ptl2)       << 
1013         __releases(ptl1)                      << 
1014         __releases(ptl2)                      << 
1015 {                                             << 
1016         spin_unlock(ptl1);                    << 
1017         if (ptl1 != ptl2)                     << 
1018                 spin_unlock(ptl2);            << 
1019         else                                  << 
1020                 __release(ptl2);              << 
1021 }                                             << 
1022                                               << 
1023                                               << 
1024 static int move_present_pte(struct mm_struct  << 
1025                             struct vm_area_st << 
1026                             struct vm_area_st << 
1027                             unsigned long dst << 
1028                             pte_t *dst_pte, p << 
1029                             pte_t orig_dst_pt << 
1030                             spinlock_t *dst_p << 
1031                             struct folio *src << 
1032 {                                             << 
1033         int err = 0;                          << 
1034                                               << 
1035         double_pt_lock(dst_ptl, src_ptl);     << 
1036                                               << 
1037         if (!pte_same(ptep_get(src_pte), orig << 
1038             !pte_same(ptep_get(dst_pte), orig << 
1039                 err = -EAGAIN;                << 
1040                 goto out;                     << 
1041         }                                     << 
1042         if (folio_test_large(src_folio) ||    << 
1043             folio_maybe_dma_pinned(src_folio) << 
1044             !PageAnonExclusive(&src_folio->pa << 
1045                 err = -EBUSY;                 << 
1046                 goto out;                     << 
1047         }                                     << 
1048                                               << 
1049         orig_src_pte = ptep_clear_flush(src_v << 
1050         /* Folio got pinned from under us. Pu << 
1051         if (folio_maybe_dma_pinned(src_folio) << 
1052                 set_pte_at(mm, src_addr, src_ << 
1053                 err = -EBUSY;                 << 
1054                 goto out;                     << 
1055         }                                     << 
1056                                               << 
1057         folio_move_anon_rmap(src_folio, dst_v << 
1058         src_folio->index = linear_page_index( << 
1059                                               << 
1060         orig_dst_pte = mk_pte(&src_folio->pag << 
1061         /* Follow mremap() behavior and treat << 
1062         orig_dst_pte = pte_mkwrite(pte_mkdirt << 
1063                                               << 
1064         set_pte_at(mm, dst_addr, dst_pte, ori << 
1065 out:                                          << 
1066         double_pt_unlock(dst_ptl, src_ptl);   << 
1067         return err;                           << 
1068 }                                             << 
1069                                               << 
1070 static int move_swap_pte(struct mm_struct *mm << 
1071                          unsigned long dst_ad << 
1072                          pte_t *dst_pte, pte_ << 
1073                          pte_t orig_dst_pte,  << 
1074                          spinlock_t *dst_ptl, << 
1075 {                                             << 
1076         if (!pte_swp_exclusive(orig_src_pte)) << 
1077                 return -EBUSY;                << 
1078                                               << 
1079         double_pt_lock(dst_ptl, src_ptl);     << 
1080                                               << 
1081         if (!pte_same(ptep_get(src_pte), orig << 
1082             !pte_same(ptep_get(dst_pte), orig << 
1083                 double_pt_unlock(dst_ptl, src << 
1084                 return -EAGAIN;               << 
1085         }                                     << 
1086                                               << 
1087         orig_src_pte = ptep_get_and_clear(mm, << 
1088         set_pte_at(mm, dst_addr, dst_pte, ori << 
1089         double_pt_unlock(dst_ptl, src_ptl);   << 
1090                                               << 
1091         return 0;                             << 
1092 }                                             << 
1093                                               << 
1094 static int move_zeropage_pte(struct mm_struct << 
1095                              struct vm_area_s << 
1096                              struct vm_area_s << 
1097                              unsigned long ds << 
1098                              pte_t *dst_pte,  << 
1099                              pte_t orig_dst_p << 
1100                              spinlock_t *dst_ << 
1101 {                                             << 
1102         pte_t zero_pte;                       << 
1103                                               << 
1104         double_pt_lock(dst_ptl, src_ptl);     << 
1105         if (!pte_same(ptep_get(src_pte), orig << 
1106             !pte_same(ptep_get(dst_pte), orig << 
1107                 double_pt_unlock(dst_ptl, src << 
1108                 return -EAGAIN;               << 
1109         }                                     << 
1110                                               << 
1111         zero_pte = pte_mkspecial(pfn_pte(my_z << 
1112                                          dst_ << 
1113         ptep_clear_flush(src_vma, src_addr, s << 
1114         set_pte_at(mm, dst_addr, dst_pte, zer << 
1115         double_pt_unlock(dst_ptl, src_ptl);   << 
1116                                               << 
1117         return 0;                             << 
1118 }                                             << 
1119                                               << 
1120                                               << 
1121 /*                                            << 
1122  * The mmap_lock for reading is held by the c << 
1123  * from src_pmd to dst_pmd if possible, and r << 
1124  * in moving the page.                        << 
1125  */                                           << 
1126 static int move_pages_pte(struct mm_struct *m << 
1127                           struct vm_area_stru << 
1128                           struct vm_area_stru << 
1129                           unsigned long dst_a << 
1130                           __u64 mode)         << 
1131 {                                             << 
1132         swp_entry_t entry;                    << 
1133         pte_t orig_src_pte, orig_dst_pte;     << 
1134         pte_t src_folio_pte;                  << 
1135         spinlock_t *src_ptl, *dst_ptl;        << 
1136         pte_t *src_pte = NULL;                << 
1137         pte_t *dst_pte = NULL;                << 
1138                                               << 
1139         struct folio *src_folio = NULL;       << 
1140         struct anon_vma *src_anon_vma = NULL; << 
1141         struct mmu_notifier_range range;      << 
1142         int err = 0;                          << 
1143                                               << 
1144         flush_cache_range(src_vma, src_addr,  << 
1145         mmu_notifier_range_init(&range, MMU_N << 
1146                                 src_addr, src << 
1147         mmu_notifier_invalidate_range_start(& << 
1148 retry:                                        << 
1149         dst_pte = pte_offset_map_nolock(mm, d << 
1150                                               << 
1151         /* Retry if a huge pmd materialized f << 
1152         if (unlikely(!dst_pte)) {             << 
1153                 err = -EAGAIN;                << 
1154                 goto out;                     << 
1155         }                                     << 
1156                                               << 
1157         src_pte = pte_offset_map_nolock(mm, s << 
1158                                               << 
1159         /*                                    << 
1160          * We held the mmap_lock for reading  << 
1161          * can zap transparent huge pages und << 
1162          * transparent huge page fault can es << 
1163          * transparent huge pages under us.   << 
1164          */                                   << 
1165         if (unlikely(!src_pte)) {             << 
1166                 err = -EAGAIN;                << 
1167                 goto out;                     << 
1168         }                                     << 
1169                                               << 
1170         /* Sanity checks before the operation << 
1171         if (WARN_ON_ONCE(pmd_none(*dst_pmd))  << 
1172             WARN_ON_ONCE(pmd_trans_huge(*dst_ << 
1173                 err = -EINVAL;                << 
1174                 goto out;                     << 
1175         }                                     << 
1176                                               << 
1177         spin_lock(dst_ptl);                   << 
1178         orig_dst_pte = ptep_get(dst_pte);     << 
1179         spin_unlock(dst_ptl);                 << 
1180         if (!pte_none(orig_dst_pte)) {        << 
1181                 err = -EEXIST;                << 
1182                 goto out;                     << 
1183         }                                     << 
1184                                               << 
1185         spin_lock(src_ptl);                   << 
1186         orig_src_pte = ptep_get(src_pte);     << 
1187         spin_unlock(src_ptl);                 << 
1188         if (pte_none(orig_src_pte)) {         << 
1189                 if (!(mode & UFFDIO_MOVE_MODE << 
1190                         err = -ENOENT;        << 
1191                 else /* nothing to do to move << 
1192                         err = 0;              << 
1193                 goto out;                     << 
1194         }                                     << 
1195                                               << 
1196         /* If PTE changed after we locked the << 
1197         if (src_folio && unlikely(!pte_same(s << 
1198                 err = -EAGAIN;                << 
1199                 goto out;                     << 
1200         }                                     << 
1201                                               << 
1202         if (pte_present(orig_src_pte)) {      << 
1203                 if (is_zero_pfn(pte_pfn(orig_ << 
1204                         err = move_zeropage_p << 
1205                                               << 
1206                                               << 
1207                                               << 
1208                         goto out;             << 
1209                 }                             << 
1210                                               << 
1211                 /*                            << 
1212                  * Pin and lock both source f << 
1213                  * RCU read section, we can't << 
1214                  * unmap the ptes, obtain the << 
1215                  */                           << 
1216                 if (!src_folio) {             << 
1217                         struct folio *folio;  << 
1218                                               << 
1219                         /*                    << 
1220                          * Pin the page while << 
1221                          * page isn't freed u << 
1222                          */                   << 
1223                         spin_lock(src_ptl);   << 
1224                         if (!pte_same(orig_sr << 
1225                                 spin_unlock(s << 
1226                                 err = -EAGAIN << 
1227                                 goto out;     << 
1228                         }                     << 
1229                                               << 
1230                         folio = vm_normal_fol << 
1231                         if (!folio || !PageAn << 
1232                                 spin_unlock(s << 
1233                                 err = -EBUSY; << 
1234                                 goto out;     << 
1235                         }                     << 
1236                                               << 
1237                         folio_get(folio);     << 
1238                         src_folio = folio;    << 
1239                         src_folio_pte = orig_ << 
1240                         spin_unlock(src_ptl); << 
1241                                               << 
1242                         if (!folio_trylock(sr << 
1243                                 pte_unmap(&or << 
1244                                 pte_unmap(&or << 
1245                                 src_pte = dst << 
1246                                 /* now we can << 
1247                                 folio_lock(sr << 
1248                                 goto retry;   << 
1249                         }                     << 
1250                                               << 
1251                         if (WARN_ON_ONCE(!fol << 
1252                                 err = -EBUSY; << 
1253                                 goto out;     << 
1254                         }                     << 
1255                 }                             << 
1256                                               << 
1257                 /* at this point we have src_ << 
1258                 if (folio_test_large(src_foli << 
1259                         /* split_folio() can  << 
1260                         pte_unmap(&orig_src_p << 
1261                         pte_unmap(&orig_dst_p << 
1262                         src_pte = dst_pte = N << 
1263                         err = split_folio(src << 
1264                         if (err)              << 
1265                                 goto out;     << 
1266                         /* have to reacquire  << 
1267                         folio_unlock(src_foli << 
1268                         folio_put(src_folio); << 
1269                         src_folio = NULL;     << 
1270                         goto retry;           << 
1271                 }                             << 
1272                                               << 
1273                 if (!src_anon_vma) {          << 
1274                         /*                    << 
1275                          * folio_referenced w << 
1276                          * without the folio  << 
1277                          * the anon_vma lock, << 
1278                          */                   << 
1279                         src_anon_vma = folio_ << 
1280                         if (!src_anon_vma) {  << 
1281                                 /* page was u << 
1282                                 err = -EAGAIN << 
1283                                 goto out;     << 
1284                         }                     << 
1285                         if (!anon_vma_trylock << 
1286                                 pte_unmap(&or << 
1287                                 pte_unmap(&or << 
1288                                 src_pte = dst << 
1289                                 /* now we can << 
1290                                 anon_vma_lock << 
1291                                 goto retry;   << 
1292                         }                     << 
1293                 }                             << 
1294                                               << 
1295                 err = move_present_pte(mm,  d << 
1296                                        dst_ad << 
1297                                        orig_d << 
1298                                        dst_pt << 
1299         } else {                              << 
1300                 entry = pte_to_swp_entry(orig << 
1301                 if (non_swap_entry(entry)) {  << 
1302                         if (is_migration_entr << 
1303                                 pte_unmap(&or << 
1304                                 pte_unmap(&or << 
1305                                 src_pte = dst << 
1306                                 migration_ent << 
1307                                 err = -EAGAIN << 
1308                         } else                << 
1309                                 err = -EFAULT << 
1310                         goto out;             << 
1311                 }                             << 
1312                                               << 
1313                 err = move_swap_pte(mm, dst_a << 
1314                                     dst_pte,  << 
1315                                     orig_dst_ << 
1316                                     dst_ptl,  << 
1317         }                                     << 
1318                                               << 
1319 out:                                          << 
1320         if (src_anon_vma) {                   << 
1321                 anon_vma_unlock_write(src_ano << 
1322                 put_anon_vma(src_anon_vma);   << 
1323         }                                     << 
1324         if (src_folio) {                      << 
1325                 folio_unlock(src_folio);      << 
1326                 folio_put(src_folio);         << 
1327         }                                     << 
1328         if (dst_pte)                          << 
1329                 pte_unmap(dst_pte);           << 
1330         if (src_pte)                          << 
1331                 pte_unmap(src_pte);           << 
1332         mmu_notifier_invalidate_range_end(&ra << 
1333                                               << 
1334         return err;                           << 
1335 }                                             << 
1336                                               << 
1337 #ifdef CONFIG_TRANSPARENT_HUGEPAGE            << 
1338 static inline bool move_splits_huge_pmd(unsig << 
1339                                         unsig << 
1340                                         unsig << 
1341 {                                             << 
1342         return (src_addr & ~HPAGE_PMD_MASK) | << 
1343                 src_end - src_addr < HPAGE_PM << 
1344 }                                             << 
1345 #else                                         << 
1346 static inline bool move_splits_huge_pmd(unsig << 
1347                                         unsig << 
1348                                         unsig << 
1349 {                                             << 
1350         /* This is unreachable anyway, just t << 
1351         return false;                         << 
1352 }                                             << 
1353 #endif                                        << 
1354                                               << 
1355 static inline bool vma_move_compatible(struct << 
1356 {                                             << 
1357         return !(vma->vm_flags & (VM_PFNMAP | << 
1358                                   VM_MIXEDMAP << 
1359 }                                             << 
1360                                               << 
1361 static int validate_move_areas(struct userfau << 
1362                                struct vm_area << 
1363                                struct vm_area << 
1364 {                                             << 
1365         /* Only allow moving if both have the << 
1366         if ((src_vma->vm_flags & VM_ACCESS_FL << 
1367             pgprot_val(src_vma->vm_page_prot) << 
1368                 return -EINVAL;               << 
1369                                               << 
1370         /* Only allow moving if both are mloc << 
1371         if ((src_vma->vm_flags & VM_LOCKED) ! << 
1372                 return -EINVAL;               << 
1373                                               << 
1374         /*                                    << 
1375          * For now, we keep it simple and onl << 
1376          * Access flags are equal, therefore  << 
1377          */                                   << 
1378         if (!(src_vma->vm_flags & VM_WRITE))  << 
1379                 return -EINVAL;               << 
1380                                               << 
1381         /* Check if vma flags indicate conten << 
1382         if (!vma_move_compatible(src_vma) ||  << 
1383                 return -EINVAL;               << 
1384                                               << 
1385         /* Ensure dst_vma is registered in uf << 
1386         if (!dst_vma->vm_userfaultfd_ctx.ctx  << 
1387             dst_vma->vm_userfaultfd_ctx.ctx ! << 
1388                 return -EINVAL;               << 
1389                                               << 
1390         /* Only allow moving across anonymous << 
1391         if (!vma_is_anonymous(src_vma) || !vm << 
1392                 return -EINVAL;               << 
1393                                               << 
1394         return 0;                             << 
1395 }                                             << 
1396                                               << 
1397 static __always_inline                        << 
1398 int find_vmas_mm_locked(struct mm_struct *mm, << 
1399                         unsigned long dst_sta << 
1400                         unsigned long src_sta << 
1401                         struct vm_area_struct << 
1402                         struct vm_area_struct << 
1403 {                                             << 
1404         struct vm_area_struct *vma;           << 
1405                                               << 
1406         mmap_assert_locked(mm);               << 
1407         vma = find_vma_and_prepare_anon(mm, d << 
1408         if (IS_ERR(vma))                      << 
1409                 return PTR_ERR(vma);          << 
1410                                               << 
1411         *dst_vmap = vma;                      << 
1412         /* Skip finding src_vma if src_start  << 
1413         if (src_start >= vma->vm_start && src << 
1414                 goto out_success;             << 
1415                                               << 
1416         vma = vma_lookup(mm, src_start);      << 
1417         if (!vma)                             << 
1418                 return -ENOENT;               << 
1419 out_success:                                  << 
1420         *src_vmap = vma;                      << 
1421         return 0;                             << 
1422 }                                             << 
1423                                               << 
1424 #ifdef CONFIG_PER_VMA_LOCK                    << 
1425 static int uffd_move_lock(struct mm_struct *m << 
1426                           unsigned long dst_s << 
1427                           unsigned long src_s << 
1428                           struct vm_area_stru << 
1429                           struct vm_area_stru << 
1430 {                                             << 
1431         struct vm_area_struct *vma;           << 
1432         int err;                              << 
1433                                               << 
1434         vma = uffd_lock_vma(mm, dst_start);   << 
1435         if (IS_ERR(vma))                      << 
1436                 return PTR_ERR(vma);          << 
1437                                               << 
1438         *dst_vmap = vma;                      << 
1439         /*                                    << 
1440          * Skip finding src_vma if src_start  << 
1441          * that we don't lock the same vma tw << 
1442          */                                   << 
1443         if (src_start >= vma->vm_start && src << 
1444                 *src_vmap = vma;              << 
1445                 return 0;                     << 
1446         }                                     << 
1447                                               << 
1448         /*                                    << 
1449          * Using uffd_lock_vma() to get src_v << 
1450          *                                    << 
1451          * Thread1                            << 
1452          * -------                            << 
1453          * vma_start_read(dst_vma)            << 
1454          *                                    << 
1455          *                                    << 
1456          * vma_start_read(src_vma)            << 
1457          * mmap_read_lock(mm)                 << 
1458          *                                    << 
1459          */                                   << 
1460         *src_vmap = lock_vma_under_rcu(mm, sr << 
1461         if (likely(*src_vmap))                << 
1462                 return 0;                     << 
1463                                               << 
1464         /* Undo any locking and retry in mmap << 
1465         vma_end_read(*dst_vmap);              << 
1466                                               << 
1467         mmap_read_lock(mm);                   << 
1468         err = find_vmas_mm_locked(mm, dst_sta << 
1469         if (!err) {                           << 
1470                 /*                            << 
1471                  * See comment in uffd_lock_v << 
1472                  * vma_start_read() here.     << 
1473                  */                           << 
1474                 down_read(&(*dst_vmap)->vm_lo << 
1475                 if (*dst_vmap != *src_vmap)   << 
1476                         down_read_nested(&(*s << 
1477                                          SING << 
1478         }                                     << 
1479         mmap_read_unlock(mm);                 << 
1480         return err;                           << 
1481 }                                             << 
1482                                               << 
1483 static void uffd_move_unlock(struct vm_area_s << 
1484                              struct vm_area_s << 
1485 {                                             << 
1486         vma_end_read(src_vma);                << 
1487         if (src_vma != dst_vma)               << 
1488                 vma_end_read(dst_vma);        << 
1489 }                                             << 
1490                                               << 
1491 #else                                         << 
1492                                               << 
1493 static int uffd_move_lock(struct mm_struct *m << 
1494                           unsigned long dst_s << 
1495                           unsigned long src_s << 
1496                           struct vm_area_stru << 
1497                           struct vm_area_stru << 
1498 {                                             << 
1499         int err;                              << 
1500                                               << 
1501         mmap_read_lock(mm);                   << 
1502         err = find_vmas_mm_locked(mm, dst_sta << 
1503         if (err)                              << 
1504                 mmap_read_unlock(mm);         << 
1505         return err;                           << 
1506 }                                             << 
1507                                               << 
1508 static void uffd_move_unlock(struct vm_area_s << 
1509                              struct vm_area_s << 
1510 {                                             << 
1511         mmap_assert_locked(src_vma->vm_mm);   << 
1512         mmap_read_unlock(dst_vma->vm_mm);     << 
1513 }                                             << 
1514 #endif                                        << 
1515                                               << 
1516 /**                                           << 
1517  * move_pages - move arbitrary anonymous page << 
1518  * @ctx: pointer to the userfaultfd context   << 
1519  * @dst_start: start of the destination virtu << 
1520  * @src_start: start of the source virtual me << 
1521  * @len: length of the virtual memory range   << 
1522  * @mode: flags from uffdio_move.mode         << 
1523  *                                            << 
1524  * It will either use the mmap_lock in read m << 
1525  *                                            << 
1526  * move_pages() remaps arbitrary anonymous pa << 
1527  * copy. It only works on non shared anonymou << 
1528  * be relocated without generating non linear << 
1529  * code.                                      << 
1530  *                                            << 
1531  * It provides a zero copy mechanism to handl << 
1532  * The source vma pages should have mapcount  << 
1533  * enforced by using madvise(MADV_DONTFORK) o << 
1534  *                                            << 
1535  * The thread receiving the page during the u << 
1536  * will receive the faulting page in the sour << 
1537  * storage or any other I/O device (MADV_DONT << 
1538  * avoids move_pages() to fail with -EBUSY if << 
1539  * move_pages() is called), then it will call << 
1540  * page in the faulting address in the destin << 
1541  *                                            << 
1542  * This userfaultfd command works purely via  << 
1543  * most efficient way to move physical non sh << 
1544  * across different virtual addresses. Unlike << 
1545  * it does not create any new vmas. The mappi << 
1546  * address is atomic.                         << 
1547  *                                            << 
1548  * It only works if the vma protection bits a << 
1549  * source and destination vma.                << 
1550  *                                            << 
1551  * It can remap non shared anonymous pages wi << 
1552  *                                            << 
1553  * If the source virtual memory range has any << 
1554  * the destination virtual memory range is no << 
1555  * move_pages() will fail respectively with - << 
1556  * provides a very strict behavior to avoid a << 
1557  * corruption going unnoticed if there are us << 
1558  * Only one thread should resolve the userlan << 
1559  * time for any given faulting address. This  << 
1560  * try to both call move_pages() on the same  << 
1561  * same time, the second thread will get an e << 
1562  * command.                                   << 
1563  *                                            << 
1564  * The command retval will return "len" is su << 
1565  * however can be interrupted by fatal signal << 
1566  * interrupted it will return the number of b << 
1567  * remapped before the interruption if any, o << 
1568  * none. It will never return zero. Either it << 
1569  * an amount of bytes successfully moved. If  << 
1570  * "short" remap, the move_pages() command sh << 
1571  * userland with src+retval, dst+reval, len-r << 
1572  * about the error that interrupted it.       << 
1573  *                                            << 
1574  * The UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES flag  << 
1575  * prevent -ENOENT errors to materialize if t << 
1576  * source virtual range that is being remappe << 
1577  * accounted as successfully remapped in the  << 
1578  * command. This is mostly useful to remap hu << 
1579  * virtual regions without knowing if there a << 
1580  * in the regions or not, but preventing the  << 
1581  * the hugepmd during the remap.              << 
1582  *                                            << 
1583  * If there's any rmap walk that is taking th << 
1584  * first obtaining the folio lock (the only c << 
1585  * folio_referenced), they will have to verif << 
1586  * has changed after taking the anon_vma lock << 
1587  * should release the lock and retry obtainin << 
1588  * it means the anon_vma was changed by move_ << 
1589  * could be obtained. This is the only additi << 
1590  * the rmap code to provide this anonymous pa << 
1591  */                                           << 
1592 ssize_t move_pages(struct userfaultfd_ctx *ct << 
1593                    unsigned long src_start, u << 
1594 {                                             << 
1595         struct mm_struct *mm = ctx->mm;       << 
1596         struct vm_area_struct *src_vma, *dst_ << 
1597         unsigned long src_addr, dst_addr;     << 
1598         pmd_t *src_pmd, *dst_pmd;             << 
1599         long err = -EINVAL;                   << 
1600         ssize_t moved = 0;                    << 
1601                                               << 
1602         /* Sanitize the command parameters. * << 
1603         if (WARN_ON_ONCE(src_start & ~PAGE_MA << 
1604             WARN_ON_ONCE(dst_start & ~PAGE_MA << 
1605             WARN_ON_ONCE(len & ~PAGE_MASK))   << 
1606                 goto out;                     << 
1607                                               << 
1608         /* Does the address range wrap, or is << 
1609         if (WARN_ON_ONCE(src_start + len <= s << 
1610             WARN_ON_ONCE(dst_start + len <= d << 
1611                 goto out;                     << 
1612                                               << 
1613         err = uffd_move_lock(mm, dst_start, s << 
1614         if (err)                              << 
1615                 goto out;                     << 
1616                                               << 
1617         /* Re-check after taking map_changing << 
1618         err = -EAGAIN;                        << 
1619         down_read(&ctx->map_changing_lock);   << 
1620         if (likely(atomic_read(&ctx->mmap_cha << 
1621                 goto out_unlock;              << 
1622         /*                                       713         /*
1623          * Make sure the vma is not shared, t !! 714          * Make sure the vma is not shared, that the dst range is
1624          * ranges are both valid and fully wi !! 715          * both valid and fully within a single existing vma.
1625          * vma.                               << 
1626          */                                      716          */
1627         err = -EINVAL;                        !! 717         if (!dst_vma || (dst_vma->vm_flags & VM_SHARED))
1628         if (src_vma->vm_flags & VM_SHARED)    << 
1629                 goto out_unlock;                 718                 goto out_unlock;
1630         if (src_start + len > src_vma->vm_end !! 719         if (!userfaultfd_wp(dst_vma))
1631                 goto out_unlock;                 720                 goto out_unlock;
1632                                               !! 721         if (!vma_is_anonymous(dst_vma))
1633         if (dst_vma->vm_flags & VM_SHARED)    << 
1634                 goto out_unlock;              << 
1635         if (dst_start + len > dst_vma->vm_end << 
1636                 goto out_unlock;              << 
1637                                               << 
1638         err = validate_move_areas(ctx, src_vm << 
1639         if (err)                              << 
1640                 goto out_unlock;                 722                 goto out_unlock;
1641                                                  723 
1642         for (src_addr = src_start, dst_addr = !! 724         if (enable_wp)
1643              src_addr < src_start + len;) {   !! 725                 newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
1644                 spinlock_t *ptl;              !! 726         else
1645                 pmd_t dst_pmdval;             !! 727                 newprot = vm_get_page_prot(dst_vma->vm_flags);
1646                 unsigned long step_size;      << 
1647                                               << 
1648                 /*                            << 
1649                  * Below works because anonym << 
1650                  * transparent huge PUD. If f << 
1651                  * that case would need to be << 
1652                  */                           << 
1653                 src_pmd = mm_find_pmd(mm, src << 
1654                 if (unlikely(!src_pmd)) {     << 
1655                         if (!(mode & UFFDIO_M << 
1656                                 err = -ENOENT << 
1657                                 break;        << 
1658                         }                     << 
1659                         src_pmd = mm_alloc_pm << 
1660                         if (unlikely(!src_pmd << 
1661                                 err = -ENOMEM << 
1662                                 break;        << 
1663                         }                     << 
1664                 }                             << 
1665                 dst_pmd = mm_alloc_pmd(mm, ds << 
1666                 if (unlikely(!dst_pmd)) {     << 
1667                         err = -ENOMEM;        << 
1668                         break;                << 
1669                 }                             << 
1670                                               << 
1671                 dst_pmdval = pmdp_get_lockles << 
1672                 /*                            << 
1673                  * If the dst_pmd is mapped a << 
1674                  * be strict. If dst_pmd chan << 
1675                  * move_pages_huge_pmd() will << 
1676                  * while move_pages_pte() wil << 
1677                  */                           << 
1678                 if (unlikely(pmd_trans_huge(d << 
1679                         err = -EEXIST;        << 
1680                         break;                << 
1681                 }                             << 
1682                                               << 
1683                 ptl = pmd_trans_huge_lock(src << 
1684                 if (ptl) {                    << 
1685                         if (pmd_devmap(*src_p << 
1686                                 spin_unlock(p << 
1687                                 err = -ENOENT << 
1688                                 break;        << 
1689                         }                     << 
1690                                               << 
1691                         /* Check if we can mo << 
1692                         if (move_splits_huge_ << 
1693                             !pmd_none(dst_pmd << 
1694                                 struct folio  << 
1695                                               << 
1696                                 if (!folio || << 
1697                                               << 
1698                                         spin_ << 
1699                                         err = << 
1700                                         break << 
1701                                 }             << 
1702                                               << 
1703                                 spin_unlock(p << 
1704                                 split_huge_pm << 
1705                                 /* The folio  << 
1706                                 continue;     << 
1707                         }                     << 
1708                                               << 
1709                         err = move_pages_huge << 
1710                                               << 
1711                                               << 
1712                         step_size = HPAGE_PMD << 
1713                 } else {                      << 
1714                         if (pmd_none(*src_pmd << 
1715                                 if (!(mode &  << 
1716                                         err = << 
1717                                         break << 
1718                                 }             << 
1719                                 if (unlikely( << 
1720                                         err = << 
1721                                         break << 
1722                                 }             << 
1723                         }                     << 
1724                                               << 
1725                         if (unlikely(pte_allo << 
1726                                 err = -ENOMEM << 
1727                                 break;        << 
1728                         }                     << 
1729                                               << 
1730                         err = move_pages_pte( << 
1731                                               << 
1732                                               << 
1733                         step_size = PAGE_SIZE << 
1734                 }                             << 
1735                                               << 
1736                 cond_resched();               << 
1737                                               << 
1738                 if (fatal_signal_pending(curr << 
1739                         /* Do not override an << 
1740                         if (!err || err == -E << 
1741                                 err = -EINTR; << 
1742                         break;                << 
1743                 }                             << 
1744                                               << 
1745                 if (err) {                    << 
1746                         if (err == -EAGAIN)   << 
1747                                 continue;     << 
1748                         break;                << 
1749                 }                             << 
1750                                                  728 
1751                 /* Proceed to the next page * !! 729         change_protection(dst_vma, start, start + len, newprot,
1752                 dst_addr += step_size;        !! 730                           enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
1753                 src_addr += step_size;        << 
1754                 moved += step_size;           << 
1755         }                                     << 
1756                                                  731 
                                                   >> 732         err = 0;
1757 out_unlock:                                      733 out_unlock:
1758         up_read(&ctx->map_changing_lock);     !! 734         mmap_read_unlock(dst_mm);
1759         uffd_move_unlock(dst_vma, src_vma);   !! 735         return err;
1760 out:                                          << 
1761         VM_WARN_ON(moved < 0);                << 
1762         VM_WARN_ON(err > 0);                  << 
1763         VM_WARN_ON(!moved && !err);           << 
1764         return moved ? moved : err;           << 
1765 }                                             << 
1766                                               << 
1767 static void userfaultfd_set_vm_flags(struct v << 
1768                                      vm_flags << 
1769 {                                             << 
1770         const bool uffd_wp_changed = (vma->vm << 
1771                                               << 
1772         vm_flags_reset(vma, flags);           << 
1773         /*                                    << 
1774          * For shared mappings, we want to en << 
1775          * userfaultfd-wp is enabled (see vma << 
1776          * recalculate vma->vm_page_prot when << 
1777          */                                   << 
1778         if ((vma->vm_flags & VM_SHARED) && uf << 
1779                 vma_set_page_prot(vma);       << 
1780 }                                             << 
1781                                               << 
1782 static void userfaultfd_set_ctx(struct vm_are << 
1783                                 struct userfa << 
1784                                 unsigned long << 
1785 {                                             << 
1786         vma_start_write(vma);                 << 
1787         vma->vm_userfaultfd_ctx = (struct vm_ << 
1788         userfaultfd_set_vm_flags(vma,         << 
1789                                  (vma->vm_fla << 
1790 }                                             << 
1791                                               << 
1792 void userfaultfd_reset_ctx(struct vm_area_str << 
1793 {                                             << 
1794         userfaultfd_set_ctx(vma, NULL, 0);    << 
1795 }                                             << 
1796                                               << 
1797 struct vm_area_struct *userfaultfd_clear_vma( << 
1798                                               << 
1799                                               << 
1800                                               << 
1801                                               << 
1802 {                                             << 
1803         struct vm_area_struct *ret;           << 
1804                                               << 
1805         /* Reset ptes for the whole vma range << 
1806         if (userfaultfd_wp(vma))              << 
1807                 uffd_wp_range(vma, start, end << 
1808                                               << 
1809         ret = vma_modify_flags_uffd(vmi, prev << 
1810                                     vma->vm_f << 
1811                                     NULL_VM_U << 
1812                                               << 
1813         /*                                    << 
1814          * In the vma_merge() successful mpro << 
1815          * the next vma was merged into the c << 
1816          * the current one has not been updat << 
1817          */                                   << 
1818         if (!IS_ERR(ret))                     << 
1819                 userfaultfd_reset_ctx(ret);   << 
1820                                               << 
1821         return ret;                           << 
1822 }                                             << 
1823                                               << 
1824 /* Assumes mmap write lock taken, and mm_stru << 
1825 int userfaultfd_register_range(struct userfau << 
1826                                struct vm_area << 
1827                                unsigned long  << 
1828                                unsigned long  << 
1829                                bool wp_async) << 
1830 {                                             << 
1831         VMA_ITERATOR(vmi, ctx->mm, start);    << 
1832         struct vm_area_struct *prev = vma_pre << 
1833         unsigned long vma_end;                << 
1834         unsigned long new_flags;              << 
1835                                               << 
1836         if (vma->vm_start < start)            << 
1837                 prev = vma;                   << 
1838                                               << 
1839         for_each_vma_range(vmi, vma, end) {   << 
1840                 cond_resched();               << 
1841                                               << 
1842                 BUG_ON(!vma_can_userfault(vma << 
1843                 BUG_ON(vma->vm_userfaultfd_ct << 
1844                        vma->vm_userfaultfd_ct << 
1845                 WARN_ON(!(vma->vm_flags & VM_ << 
1846                                               << 
1847                 /*                            << 
1848                  * Nothing to do: this vma is << 
1849                  * userfaultfd and with the r << 
1850                  */                           << 
1851                 if (vma->vm_userfaultfd_ctx.c << 
1852                     (vma->vm_flags & vm_flags << 
1853                         goto skip;            << 
1854                                               << 
1855                 if (vma->vm_start > start)    << 
1856                         start = vma->vm_start << 
1857                 vma_end = min(end, vma->vm_en << 
1858                                               << 
1859                 new_flags = (vma->vm_flags &  << 
1860                 vma = vma_modify_flags_uffd(& << 
1861                                             n << 
1862                                             ( << 
1863                 if (IS_ERR(vma))              << 
1864                         return PTR_ERR(vma);  << 
1865                                               << 
1866                 /*                            << 
1867                  * In the vma_merge() success << 
1868                  * the next vma was merged in << 
1869                  * the current one has not be << 
1870                  */                           << 
1871                 userfaultfd_set_ctx(vma, ctx, << 
1872                                               << 
1873                 if (is_vm_hugetlb_page(vma) & << 
1874                         hugetlb_unshare_all_p << 
1875                                               << 
1876 skip:                                         << 
1877                 prev = vma;                   << 
1878                 start = vma->vm_end;          << 
1879         }                                     << 
1880                                               << 
1881         return 0;                             << 
1882 }                                             << 
1883                                               << 
1884 void userfaultfd_release_new(struct userfault << 
1885 {                                             << 
1886         struct mm_struct *mm = ctx->mm;       << 
1887         struct vm_area_struct *vma;           << 
1888         VMA_ITERATOR(vmi, mm, 0);             << 
1889                                               << 
1890         /* the various vma->vm_userfaultfd_ct << 
1891         mmap_write_lock(mm);                  << 
1892         for_each_vma(vmi, vma) {              << 
1893                 if (vma->vm_userfaultfd_ctx.c << 
1894                         userfaultfd_reset_ctx << 
1895         }                                     << 
1896         mmap_write_unlock(mm);                << 
1897 }                                             << 
1898                                               << 
1899 void userfaultfd_release_all(struct mm_struct << 
1900                              struct userfault << 
1901 {                                             << 
1902         struct vm_area_struct *vma, *prev;    << 
1903         VMA_ITERATOR(vmi, mm, 0);             << 
1904                                               << 
1905         if (!mmget_not_zero(mm))              << 
1906                 return;                       << 
1907                                               << 
1908         /*                                    << 
1909          * Flush page faults out of all CPUs. << 
1910          * must be retried without returning  << 
1911          * userfaultfd_ctx_get() succeeds but << 
1912          * changes while handle_userfault rel << 
1913          * it's critical that released is set << 
1914          * taking the mmap_lock for writing.  << 
1915          */                                   << 
1916         mmap_write_lock(mm);                  << 
1917         prev = NULL;                          << 
1918         for_each_vma(vmi, vma) {              << 
1919                 cond_resched();               << 
1920                 BUG_ON(!!vma->vm_userfaultfd_ << 
1921                        !!(vma->vm_flags & __V << 
1922                 if (vma->vm_userfaultfd_ctx.c << 
1923                         prev = vma;           << 
1924                         continue;             << 
1925                 }                             << 
1926                                               << 
1927                 vma = userfaultfd_clear_vma(& << 
1928                                             v << 
1929                 prev = vma;                   << 
1930         }                                     << 
1931         mmap_write_unlock(mm);                << 
1932         mmput(mm);                            << 
1933 }                                                736 }
1934                                                  737 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php