~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/mm/userfaultfd.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /mm/userfaultfd.c (Version linux-6.12-rc7) and /mm/userfaultfd.c (Version linux-4.19.323)


  1 // SPDX-License-Identifier: GPL-2.0-only       << 
  2 /*                                                  1 /*
  3  *  mm/userfaultfd.c                                2  *  mm/userfaultfd.c
  4  *                                                  3  *
  5  *  Copyright (C) 2015  Red Hat, Inc.               4  *  Copyright (C) 2015  Red Hat, Inc.
                                                   >>   5  *
                                                   >>   6  *  This work is licensed under the terms of the GNU GPL, version 2. See
                                                   >>   7  *  the COPYING file in the top-level directory.
  6  */                                                 8  */
  7                                                     9 
  8 #include <linux/mm.h>                              10 #include <linux/mm.h>
  9 #include <linux/sched/signal.h>                    11 #include <linux/sched/signal.h>
 10 #include <linux/pagemap.h>                         12 #include <linux/pagemap.h>
 11 #include <linux/rmap.h>                            13 #include <linux/rmap.h>
 12 #include <linux/swap.h>                            14 #include <linux/swap.h>
 13 #include <linux/swapops.h>                         15 #include <linux/swapops.h>
 14 #include <linux/userfaultfd_k.h>                   16 #include <linux/userfaultfd_k.h>
 15 #include <linux/mmu_notifier.h>                    17 #include <linux/mmu_notifier.h>
 16 #include <linux/hugetlb.h>                         18 #include <linux/hugetlb.h>
 17 #include <linux/shmem_fs.h>                        19 #include <linux/shmem_fs.h>
 18 #include <asm/tlbflush.h>                          20 #include <asm/tlbflush.h>
 19 #include <asm/tlb.h>                           << 
 20 #include "internal.h"                              21 #include "internal.h"
 21                                                    22 
 22 static __always_inline                         !!  23 static int mcopy_atomic_pte(struct mm_struct *dst_mm,
 23 bool validate_dst_vma(struct vm_area_struct *d !!  24                             pmd_t *dst_pmd,
 24 {                                              !!  25                             struct vm_area_struct *dst_vma,
 25         /* Make sure that the dst range is ful !!  26                             unsigned long dst_addr,
 26         if (dst_end > dst_vma->vm_end)         !!  27                             unsigned long src_addr,
 27                 return false;                  !!  28                             struct page **pagep)
 28                                                << 
 29         /*                                     << 
 30          * Check the vma is registered in uffd << 
 31          * enforce the VM_MAYWRITE check done  << 
 32          * time.                               << 
 33          */                                    << 
 34         if (!dst_vma->vm_userfaultfd_ctx.ctx)  << 
 35                 return false;                  << 
 36                                                << 
 37         return true;                           << 
 38 }                                              << 
 39                                                << 
 40 static __always_inline                         << 
 41 struct vm_area_struct *find_vma_and_prepare_an << 
 42                                                << 
 43 {                                              << 
 44         struct vm_area_struct *vma;            << 
 45                                                << 
 46         mmap_assert_locked(mm);                << 
 47         vma = vma_lookup(mm, addr);            << 
 48         if (!vma)                              << 
 49                 vma = ERR_PTR(-ENOENT);        << 
 50         else if (!(vma->vm_flags & VM_SHARED)  << 
 51                  unlikely(anon_vma_prepare(vma << 
 52                 vma = ERR_PTR(-ENOMEM);        << 
 53                                                << 
 54         return vma;                            << 
 55 }                                              << 
 56                                                << 
 57 #ifdef CONFIG_PER_VMA_LOCK                     << 
 58 /*                                             << 
 59  * uffd_lock_vma() - Lookup and lock vma corre << 
 60  * @mm: mm to search vma in.                   << 
 61  * @address: address that the vma should conta << 
 62  *                                             << 
 63  * Should be called without holding mmap_lock. << 
 64  *                                             << 
 65  * Return: A locked vma containing @address, - << 
 66  * -ENOMEM if anon_vma couldn't be allocated.  << 
 67  */                                            << 
 68 static struct vm_area_struct *uffd_lock_vma(st << 
 69                                        unsigne << 
 70 {                                              << 
 71         struct vm_area_struct *vma;            << 
 72                                                << 
 73         vma = lock_vma_under_rcu(mm, address); << 
 74         if (vma) {                             << 
 75                 /*                             << 
 76                  * We know we're going to need << 
 77                  * that early.                 << 
 78                  */                            << 
 79                 if (!(vma->vm_flags & VM_SHARE << 
 80                         vma_end_read(vma);     << 
 81                 else                           << 
 82                         return vma;            << 
 83         }                                      << 
 84                                                << 
 85         mmap_read_lock(mm);                    << 
 86         vma = find_vma_and_prepare_anon(mm, ad << 
 87         if (!IS_ERR(vma)) {                    << 
 88                 /*                             << 
 89                  * We cannot use vma_start_rea << 
 90                  * false locked (see comment i << 
 91                  * can avoid that by directly  << 
 92                  * mmap_lock, which guarantees << 
 93                  * vma for write (vma_start_wr << 
 94                  */                            << 
 95                 down_read(&vma->vm_lock->lock) << 
 96         }                                      << 
 97                                                << 
 98         mmap_read_unlock(mm);                  << 
 99         return vma;                            << 
100 }                                              << 
101                                                << 
102 static struct vm_area_struct *uffd_mfill_lock( << 
103                                                << 
104                                                << 
105 {                                              << 
106         struct vm_area_struct *dst_vma;        << 
107                                                << 
108         dst_vma = uffd_lock_vma(dst_mm, dst_st << 
109         if (IS_ERR(dst_vma) || validate_dst_vm << 
110                 return dst_vma;                << 
111                                                << 
112         vma_end_read(dst_vma);                 << 
113         return ERR_PTR(-ENOENT);               << 
114 }                                              << 
115                                                << 
116 static void uffd_mfill_unlock(struct vm_area_s << 
117 {                                              << 
118         vma_end_read(vma);                     << 
119 }                                              << 
120                                                << 
121 #else                                          << 
122                                                << 
123 static struct vm_area_struct *uffd_mfill_lock( << 
124                                                << 
125                                                << 
126 {                                              << 
127         struct vm_area_struct *dst_vma;        << 
128                                                << 
129         mmap_read_lock(dst_mm);                << 
130         dst_vma = find_vma_and_prepare_anon(ds << 
131         if (IS_ERR(dst_vma))                   << 
132                 goto out_unlock;               << 
133                                                << 
134         if (validate_dst_vma(dst_vma, dst_star << 
135                 return dst_vma;                << 
136                                                << 
137         dst_vma = ERR_PTR(-ENOENT);            << 
138 out_unlock:                                    << 
139         mmap_read_unlock(dst_mm);              << 
140         return dst_vma;                        << 
141 }                                              << 
142                                                << 
143 static void uffd_mfill_unlock(struct vm_area_s << 
144 {                                              << 
145         mmap_read_unlock(vma->vm_mm);          << 
146 }                                              << 
147 #endif                                         << 
148                                                << 
149 /* Check if dst_addr is outside of file's size << 
150 static bool mfill_file_over_size(struct vm_are << 
151                                  unsigned long << 
152 {                                              << 
153         struct inode *inode;                   << 
154         pgoff_t offset, max_off;               << 
155                                                << 
156         if (!dst_vma->vm_file)                 << 
157                 return false;                  << 
158                                                << 
159         inode = dst_vma->vm_file->f_inode;     << 
160         offset = linear_page_index(dst_vma, ds << 
161         max_off = DIV_ROUND_UP(i_size_read(ino << 
162         return offset >= max_off;              << 
163 }                                              << 
164                                                << 
165 /*                                             << 
166  * Install PTEs, to map dst_addr (within dst_v << 
167  *                                             << 
168  * This function handles both MCOPY_ATOMIC_NOR << 
169  * and anon, and for both shared and private V << 
170  */                                            << 
171 int mfill_atomic_install_pte(pmd_t *dst_pmd,   << 
172                              struct vm_area_st << 
173                              unsigned long dst << 
174                              bool newly_alloca << 
175 {                                                  29 {
176         int ret;                               !!  30         struct mem_cgroup *memcg;
177         struct mm_struct *dst_mm = dst_vma->vm << 
178         pte_t _dst_pte, *dst_pte;                  31         pte_t _dst_pte, *dst_pte;
179         bool writable = dst_vma->vm_flags & VM << 
180         bool vm_shared = dst_vma->vm_flags & V << 
181         spinlock_t *ptl;                           32         spinlock_t *ptl;
182         struct folio *folio = page_folio(page) !!  33         void *page_kaddr;
183         bool page_in_cache = folio_mapping(fol << 
184                                                << 
185         _dst_pte = mk_pte(page, dst_vma->vm_pa << 
186         _dst_pte = pte_mkdirty(_dst_pte);      << 
187         if (page_in_cache && !vm_shared)       << 
188                 writable = false;              << 
189         if (writable)                          << 
190                 _dst_pte = pte_mkwrite(_dst_pt << 
191         if (flags & MFILL_ATOMIC_WP)           << 
192                 _dst_pte = pte_mkuffd_wp(_dst_ << 
193                                                << 
194         ret = -EAGAIN;                         << 
195         dst_pte = pte_offset_map_lock(dst_mm,  << 
196         if (!dst_pte)                          << 
197                 goto out;                      << 
198                                                << 
199         if (mfill_file_over_size(dst_vma, dst_ << 
200                 ret = -EFAULT;                 << 
201                 goto out_unlock;               << 
202         }                                      << 
203                                                << 
204         ret = -EEXIST;                         << 
205         /*                                     << 
206          * We allow to overwrite a pte marker: << 
207          * registered, we firstly wr-protect a << 
208          * page backing it, then access the pa << 
209          */                                    << 
210         if (!pte_none_mostly(ptep_get(dst_pte) << 
211                 goto out_unlock;               << 
212                                                << 
213         if (page_in_cache) {                   << 
214                 /* Usually, cache pages are al << 
215                 if (newly_allocated)           << 
216                         folio_add_lru(folio);  << 
217                 folio_add_file_rmap_pte(folio, << 
218         } else {                               << 
219                 folio_add_new_anon_rmap(folio, << 
220                 folio_add_lru_vma(folio, dst_v << 
221         }                                      << 
222                                                << 
223         /*                                     << 
224          * Must happen after rmap, as mm_count << 
225          * PageAnon()), which is set by __page << 
226          */                                    << 
227         inc_mm_counter(dst_mm, mm_counter(foli << 
228                                                << 
229         set_pte_at(dst_mm, dst_addr, dst_pte,  << 
230                                                << 
231         /* No need to invalidate - it was non- << 
232         update_mmu_cache(dst_vma, dst_addr, ds << 
233         ret = 0;                               << 
234 out_unlock:                                    << 
235         pte_unmap_unlock(dst_pte, ptl);        << 
236 out:                                           << 
237         return ret;                            << 
238 }                                              << 
239                                                << 
240 static int mfill_atomic_pte_copy(pmd_t *dst_pm << 
241                                  struct vm_are << 
242                                  unsigned long << 
243                                  unsigned long << 
244                                  uffd_flags_t  << 
245                                  struct folio  << 
246 {                                              << 
247         void *kaddr;                           << 
248         int ret;                                   34         int ret;
249         struct folio *folio;                   !!  35         struct page *page;
                                                   >>  36         pgoff_t offset, max_off;
                                                   >>  37         struct inode *inode;
250                                                    38 
251         if (!*foliop) {                        !!  39         if (!*pagep) {
252                 ret = -ENOMEM;                     40                 ret = -ENOMEM;
253                 folio = vma_alloc_folio(GFP_HI !!  41                 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr);
254                                         dst_ad !!  42                 if (!page)
255                 if (!folio)                    << 
256                         goto out;                  43                         goto out;
257                                                    44 
258                 kaddr = kmap_local_folio(folio !!  45                 page_kaddr = kmap_atomic(page);
259                 /*                             !!  46                 ret = copy_from_user(page_kaddr,
260                  * The read mmap_lock is held  !!  47                                      (const void __user *) src_addr,
261                  * mmap_lock being read recurs << 
262                  * possible if a writer has ta << 
263                  *                             << 
264                  * process A thread 1 takes re << 
265                  * process A thread 2 calls mm << 
266                  * process B thread 1 takes pa << 
267                  * process B thread 2 calls mm << 
268                  * process A thread 1 blocks t << 
269                  * process B thread 1 blocks t << 
270                  *                             << 
271                  * Disable page faults to prev << 
272                  * and retry the copy outside  << 
273                  */                            << 
274                 pagefault_disable();           << 
275                 ret = copy_from_user(kaddr, (c << 
276                                      PAGE_SIZE     48                                      PAGE_SIZE);
277                 pagefault_enable();            !!  49                 kunmap_atomic(page_kaddr);
278                 kunmap_local(kaddr);           << 
279                                                    50 
280                 /* fallback to copy_from_user  !!  51                 /* fallback to copy_from_user outside mmap_sem */
281                 if (unlikely(ret)) {               52                 if (unlikely(ret)) {
282                         ret = -ENOENT;             53                         ret = -ENOENT;
283                         *foliop = folio;       !!  54                         *pagep = page;
284                         /* don't free the page     55                         /* don't free the page */
285                         goto out;                  56                         goto out;
286                 }                                  57                 }
287                                                    58 
288                 flush_dcache_folio(folio);     !!  59                 flush_dcache_page(page);
289         } else {                                   60         } else {
290                 folio = *foliop;               !!  61                 page = *pagep;
291                 *foliop = NULL;                !!  62                 *pagep = NULL;
292         }                                          63         }
293                                                    64 
294         /*                                         65         /*
295          * The memory barrier inside __folio_m !!  66          * The memory barrier inside __SetPageUptodate makes sure that
296          * preceding stores to the page conten !!  67          * preceeding stores to the page contents become visible before
297          * the set_pte_at() write.                 68          * the set_pte_at() write.
298          */                                        69          */
299         __folio_mark_uptodate(folio);          !!  70         __SetPageUptodate(page);
300                                                    71 
301         ret = -ENOMEM;                             72         ret = -ENOMEM;
302         if (mem_cgroup_charge(folio, dst_vma-> !!  73         if (mem_cgroup_try_charge(page, dst_mm, GFP_KERNEL, &memcg, false))
303                 goto out_release;                  74                 goto out_release;
304                                                    75 
305         ret = mfill_atomic_install_pte(dst_pmd !!  76         _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
306                                        &folio- !!  77         if (dst_vma->vm_flags & VM_WRITE)
307         if (ret)                               !!  78                 _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
308                 goto out_release;              << 
309 out:                                           << 
310         return ret;                            << 
311 out_release:                                   << 
312         folio_put(folio);                      << 
313         goto out;                              << 
314 }                                              << 
315                                                << 
316 static int mfill_atomic_pte_zeroed_folio(pmd_t << 
317                                          struc << 
318                                          unsig << 
319 {                                              << 
320         struct folio *folio;                   << 
321         int ret = -ENOMEM;                     << 
322                                                << 
323         folio = vma_alloc_zeroed_movable_folio << 
324         if (!folio)                            << 
325                 return ret;                    << 
326                                                << 
327         if (mem_cgroup_charge(folio, dst_vma-> << 
328                 goto out_put;                  << 
329                                                << 
330         /*                                     << 
331          * The memory barrier inside __folio_m << 
332          * zeroing out the folio become visibl << 
333          * using set_pte_at(). See do_anonymou << 
334          */                                    << 
335         __folio_mark_uptodate(folio);          << 
336                                                << 
337         ret = mfill_atomic_install_pte(dst_pmd << 
338                                        &folio- << 
339         if (ret)                               << 
340                 goto out_put;                  << 
341                                                << 
342         return 0;                              << 
343 out_put:                                       << 
344         folio_put(folio);                      << 
345         return ret;                            << 
346 }                                              << 
347                                                << 
348 static int mfill_atomic_pte_zeropage(pmd_t *ds << 
349                                      struct vm << 
350                                      unsigned  << 
351 {                                              << 
352         pte_t _dst_pte, *dst_pte;              << 
353         spinlock_t *ptl;                       << 
354         int ret;                               << 
355                                                << 
356         if (mm_forbids_zeropage(dst_vma->vm_mm << 
357                 return mfill_atomic_pte_zeroed << 
358                                                    79 
359         _dst_pte = pte_mkspecial(pfn_pte(my_ze !!  80         dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
360                                          dst_v !!  81         if (dst_vma->vm_file) {
361         ret = -EAGAIN;                         !!  82                 /* the shmem MAP_PRIVATE case requires checking the i_size */
362         dst_pte = pte_offset_map_lock(dst_vma- !!  83                 inode = dst_vma->vm_file->f_inode;
363         if (!dst_pte)                          !!  84                 offset = linear_page_index(dst_vma, dst_addr);
364                 goto out;                      !!  85                 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
365         if (mfill_file_over_size(dst_vma, dst_ << 
366                 ret = -EFAULT;                     86                 ret = -EFAULT;
367                 goto out_unlock;               !!  87                 if (unlikely(offset >= max_off))
                                                   >>  88                         goto out_release_uncharge_unlock;
368         }                                          89         }
369         ret = -EEXIST;                             90         ret = -EEXIST;
370         if (!pte_none(ptep_get(dst_pte)))      !!  91         if (!pte_none(*dst_pte))
371                 goto out_unlock;               !!  92                 goto out_release_uncharge_unlock;
372         set_pte_at(dst_vma->vm_mm, dst_addr, d << 
373         /* No need to invalidate - it was non- << 
374         update_mmu_cache(dst_vma, dst_addr, ds << 
375         ret = 0;                               << 
376 out_unlock:                                    << 
377         pte_unmap_unlock(dst_pte, ptl);        << 
378 out:                                           << 
379         return ret;                            << 
380 }                                              << 
381                                                    93 
382 /* Handles UFFDIO_CONTINUE for all shmem VMAs  !!  94         inc_mm_counter(dst_mm, MM_ANONPAGES);
383 static int mfill_atomic_pte_continue(pmd_t *ds !!  95         page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
384                                      struct vm !!  96         mem_cgroup_commit_charge(page, memcg, false, false);
385                                      unsigned  !!  97         lru_cache_add_active_or_unevictable(page, dst_vma);
386                                      uffd_flag << 
387 {                                              << 
388         struct inode *inode = file_inode(dst_v << 
389         pgoff_t pgoff = linear_page_index(dst_ << 
390         struct folio *folio;                   << 
391         struct page *page;                     << 
392         int ret;                               << 
393                                                << 
394         ret = shmem_get_folio(inode, pgoff, 0, << 
395         /* Our caller expects us to return -EF << 
396         if (ret == -ENOENT)                    << 
397                 ret = -EFAULT;                 << 
398         if (ret)                               << 
399                 goto out;                      << 
400         if (!folio) {                          << 
401                 ret = -EFAULT;                 << 
402                 goto out;                      << 
403         }                                      << 
404                                                    98 
405         page = folio_file_page(folio, pgoff);  !!  99         set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
406         if (PageHWPoison(page)) {              << 
407                 ret = -EIO;                    << 
408                 goto out_release;              << 
409         }                                      << 
410                                                   100 
411         ret = mfill_atomic_install_pte(dst_pmd !! 101         /* No need to invalidate - it was non-present before */
412                                        page, f !! 102         update_mmu_cache(dst_vma, dst_addr, dst_pte);
413         if (ret)                               << 
414                 goto out_release;              << 
415                                                   103 
416         folio_unlock(folio);                   !! 104         pte_unmap_unlock(dst_pte, ptl);
417         ret = 0;                                  105         ret = 0;
418 out:                                              106 out:
419         return ret;                               107         return ret;
                                                   >> 108 out_release_uncharge_unlock:
                                                   >> 109         pte_unmap_unlock(dst_pte, ptl);
                                                   >> 110         mem_cgroup_cancel_charge(page, memcg, false);
420 out_release:                                      111 out_release:
421         folio_unlock(folio);                   !! 112         put_page(page);
422         folio_put(folio);                      << 
423         goto out;                                 113         goto out;
424 }                                                 114 }
425                                                   115 
426 /* Handles UFFDIO_POISON for all non-hugetlb V !! 116 static int mfill_zeropage_pte(struct mm_struct *dst_mm,
427 static int mfill_atomic_pte_poison(pmd_t *dst_ !! 117                               pmd_t *dst_pmd,
428                                    struct vm_a !! 118                               struct vm_area_struct *dst_vma,
429                                    unsigned lo !! 119                               unsigned long dst_addr)
430                                    uffd_flags_ << 
431 {                                                 120 {
432         int ret;                               << 
433         struct mm_struct *dst_mm = dst_vma->vm << 
434         pte_t _dst_pte, *dst_pte;                 121         pte_t _dst_pte, *dst_pte;
435         spinlock_t *ptl;                          122         spinlock_t *ptl;
                                                   >> 123         int ret;
                                                   >> 124         pgoff_t offset, max_off;
                                                   >> 125         struct inode *inode;
436                                                   126 
437         _dst_pte = make_pte_marker(PTE_MARKER_ !! 127         _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
438         ret = -EAGAIN;                         !! 128                                          dst_vma->vm_page_prot));
439         dst_pte = pte_offset_map_lock(dst_mm,     129         dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
440         if (!dst_pte)                          !! 130         if (dst_vma->vm_file) {
441                 goto out;                      !! 131                 /* the shmem MAP_PRIVATE case requires checking the i_size */
442                                                !! 132                 inode = dst_vma->vm_file->f_inode;
443         if (mfill_file_over_size(dst_vma, dst_ !! 133                 offset = linear_page_index(dst_vma, dst_addr);
                                                   >> 134                 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
444                 ret = -EFAULT;                    135                 ret = -EFAULT;
445                 goto out_unlock;               !! 136                 if (unlikely(offset >= max_off))
                                                   >> 137                         goto out_unlock;
446         }                                         138         }
447                                                << 
448         ret = -EEXIST;                            139         ret = -EEXIST;
449         /* Refuse to overwrite any PTE, even a !! 140         if (!pte_none(*dst_pte))
450         if (!pte_none(ptep_get(dst_pte)))      << 
451                 goto out_unlock;                  141                 goto out_unlock;
452                                                << 
453         set_pte_at(dst_mm, dst_addr, dst_pte,     142         set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
454                                                << 
455         /* No need to invalidate - it was non-    143         /* No need to invalidate - it was non-present before */
456         update_mmu_cache(dst_vma, dst_addr, ds    144         update_mmu_cache(dst_vma, dst_addr, dst_pte);
457         ret = 0;                                  145         ret = 0;
458 out_unlock:                                       146 out_unlock:
459         pte_unmap_unlock(dst_pte, ptl);           147         pte_unmap_unlock(dst_pte, ptl);
460 out:                                           << 
461         return ret;                               148         return ret;
462 }                                                 149 }
463                                                   150 
464 static pmd_t *mm_alloc_pmd(struct mm_struct *m    151 static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
465 {                                                 152 {
466         pgd_t *pgd;                               153         pgd_t *pgd;
467         p4d_t *p4d;                               154         p4d_t *p4d;
468         pud_t *pud;                               155         pud_t *pud;
469                                                   156 
470         pgd = pgd_offset(mm, address);            157         pgd = pgd_offset(mm, address);
471         p4d = p4d_alloc(mm, pgd, address);        158         p4d = p4d_alloc(mm, pgd, address);
472         if (!p4d)                                 159         if (!p4d)
473                 return NULL;                      160                 return NULL;
474         pud = pud_alloc(mm, p4d, address);        161         pud = pud_alloc(mm, p4d, address);
475         if (!pud)                                 162         if (!pud)
476                 return NULL;                      163                 return NULL;
477         /*                                        164         /*
478          * Note that we didn't run this becaus    165          * Note that we didn't run this because the pmd was
479          * missing, the *pmd may be already es    166          * missing, the *pmd may be already established and in
480          * turn it may also be a trans_huge_pm    167          * turn it may also be a trans_huge_pmd.
481          */                                       168          */
482         return pmd_alloc(mm, pud, address);       169         return pmd_alloc(mm, pud, address);
483 }                                                 170 }
484                                                   171 
485 #ifdef CONFIG_HUGETLB_PAGE                        172 #ifdef CONFIG_HUGETLB_PAGE
486 /*                                                173 /*
487  * mfill_atomic processing for HUGETLB vmas.   !! 174  * __mcopy_atomic processing for HUGETLB vmas.  Note that this routine is
488  * called with either vma-lock or mmap_lock he !! 175  * called with mmap_sem held, it will release mmap_sem before returning.
489  * before returning.                           << 
490  */                                               176  */
491 static __always_inline ssize_t mfill_atomic_hu !! 177 static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
492                                                << 
493                                                   178                                               struct vm_area_struct *dst_vma,
494                                                   179                                               unsigned long dst_start,
495                                                   180                                               unsigned long src_start,
496                                                   181                                               unsigned long len,
497                                                !! 182                                               bool *mmap_changing,
                                                   >> 183                                               bool zeropage)
498 {                                                 184 {
499         struct mm_struct *dst_mm = dst_vma->vm !! 185         int vm_alloc_shared = dst_vma->vm_flags & VM_SHARED;
                                                   >> 186         int vm_shared = dst_vma->vm_flags & VM_SHARED;
500         ssize_t err;                              187         ssize_t err;
501         pte_t *dst_pte;                           188         pte_t *dst_pte;
502         unsigned long src_addr, dst_addr;         189         unsigned long src_addr, dst_addr;
503         long copied;                              190         long copied;
504         struct folio *folio;                   !! 191         struct page *page;
                                                   >> 192         struct hstate *h;
505         unsigned long vma_hpagesize;              193         unsigned long vma_hpagesize;
506         pgoff_t idx;                              194         pgoff_t idx;
507         u32 hash;                                 195         u32 hash;
508         struct address_space *mapping;            196         struct address_space *mapping;
509                                                   197 
510         /*                                        198         /*
511          * There is no default zero huge page     199          * There is no default zero huge page for all huge page sizes as
512          * supported by hugetlb.  A PMD_SIZE h    200          * supported by hugetlb.  A PMD_SIZE huge pages may exist as used
513          * by THP.  Since we can not reliably     201          * by THP.  Since we can not reliably insert a zero page, this
514          * feature is not supported.              202          * feature is not supported.
515          */                                       203          */
516         if (uffd_flags_mode_is(flags, MFILL_AT !! 204         if (zeropage) {
517                 up_read(&ctx->map_changing_loc !! 205                 up_read(&dst_mm->mmap_sem);
518                 uffd_mfill_unlock(dst_vma);    << 
519                 return -EINVAL;                   206                 return -EINVAL;
520         }                                         207         }
521                                                   208 
522         src_addr = src_start;                     209         src_addr = src_start;
523         dst_addr = dst_start;                     210         dst_addr = dst_start;
524         copied = 0;                               211         copied = 0;
525         folio = NULL;                          !! 212         page = NULL;
526         vma_hpagesize = vma_kernel_pagesize(ds    213         vma_hpagesize = vma_kernel_pagesize(dst_vma);
527                                                   214 
528         /*                                        215         /*
529          * Validate alignment based on huge pa    216          * Validate alignment based on huge page size
530          */                                       217          */
531         err = -EINVAL;                            218         err = -EINVAL;
532         if (dst_start & (vma_hpagesize - 1) ||    219         if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1))
533                 goto out_unlock;                  220                 goto out_unlock;
534                                                   221 
535 retry:                                            222 retry:
536         /*                                        223         /*
537          * On routine entry dst_vma is set.  I !! 224          * On routine entry dst_vma is set.  If we had to drop mmap_sem and
538          * retry, dst_vma will be set to NULL     225          * retry, dst_vma will be set to NULL and we must lookup again.
539          */                                       226          */
540         if (!dst_vma) {                           227         if (!dst_vma) {
541                 dst_vma = uffd_mfill_lock(dst_ << 
542                 if (IS_ERR(dst_vma)) {         << 
543                         err = PTR_ERR(dst_vma) << 
544                         goto out;              << 
545                 }                              << 
546                                                << 
547                 err = -ENOENT;                    228                 err = -ENOENT;
548                 if (!is_vm_hugetlb_page(dst_vm !! 229                 dst_vma = find_vma(dst_mm, dst_start);
549                         goto out_unlock_vma;   !! 230                 if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
                                                   >> 231                         goto out_unlock;
                                                   >> 232                 /*
                                                   >> 233                  * Check the vma is registered in uffd, this is
                                                   >> 234                  * required to enforce the VM_MAYWRITE check done at
                                                   >> 235                  * uffd registration time.
                                                   >> 236                  */
                                                   >> 237                 if (!dst_vma->vm_userfaultfd_ctx.ctx)
                                                   >> 238                         goto out_unlock;
                                                   >> 239 
                                                   >> 240                 if (dst_start < dst_vma->vm_start ||
                                                   >> 241                     dst_start + len > dst_vma->vm_end)
                                                   >> 242                         goto out_unlock;
550                                                   243 
551                 err = -EINVAL;                    244                 err = -EINVAL;
552                 if (vma_hpagesize != vma_kerne    245                 if (vma_hpagesize != vma_kernel_pagesize(dst_vma))
553                         goto out_unlock_vma;   !! 246                         goto out_unlock;
554                                                   247 
555                 /*                             !! 248                 vm_shared = dst_vma->vm_flags & VM_SHARED;
556                  * If memory mappings are chan !! 249         }
557                  * operation (e.g. mremap) run !! 250 
558                  * request the user to retry l !! 251         if (WARN_ON(dst_addr & (vma_hpagesize - 1) ||
559                  */                            !! 252                     (len - copied) & (vma_hpagesize - 1)))
560                 down_read(&ctx->map_changing_l !! 253                 goto out_unlock;
561                 err = -EAGAIN;                 !! 254 
562                 if (atomic_read(&ctx->mmap_cha !! 255         /*
                                                   >> 256          * If not shared, ensure the dst_vma has a anon_vma.
                                                   >> 257          */
                                                   >> 258         err = -ENOMEM;
                                                   >> 259         if (!vm_shared) {
                                                   >> 260                 if (unlikely(anon_vma_prepare(dst_vma)))
563                         goto out_unlock;          261                         goto out_unlock;
564         }                                         262         }
565                                                   263 
                                                   >> 264         h = hstate_vma(dst_vma);
                                                   >> 265 
566         while (src_addr < src_start + len) {      266         while (src_addr < src_start + len) {
                                                   >> 267                 pte_t dst_pteval;
                                                   >> 268 
567                 BUG_ON(dst_addr >= dst_start +    269                 BUG_ON(dst_addr >= dst_start + len);
                                                   >> 270                 VM_BUG_ON(dst_addr & ~huge_page_mask(h));
568                                                   271 
569                 /*                                272                 /*
570                  * Serialize via vma_lock and  !! 273                  * Serialize via hugetlb_fault_mutex
571                  * vma_lock ensures the dst_pt << 
572                  * in the case of shared pmds. << 
573                  * races with other faulting t << 
574                  */                               274                  */
575                 idx = linear_page_index(dst_vm    275                 idx = linear_page_index(dst_vma, dst_addr);
576                 mapping = dst_vma->vm_file->f_    276                 mapping = dst_vma->vm_file->f_mapping;
577                 hash = hugetlb_fault_mutex_has !! 277                 hash = hugetlb_fault_mutex_hash(h, mapping, idx);
578                 mutex_lock(&hugetlb_fault_mute    278                 mutex_lock(&hugetlb_fault_mutex_table[hash]);
579                 hugetlb_vma_lock_read(dst_vma) << 
580                                                   279 
581                 err = -ENOMEM;                    280                 err = -ENOMEM;
582                 dst_pte = huge_pte_alloc(dst_m !! 281                 dst_pte = huge_pte_alloc(dst_mm, dst_addr, huge_page_size(h));
583                 if (!dst_pte) {                   282                 if (!dst_pte) {
584                         hugetlb_vma_unlock_rea << 
585                         mutex_unlock(&hugetlb_    283                         mutex_unlock(&hugetlb_fault_mutex_table[hash]);
586                         goto out_unlock;          284                         goto out_unlock;
587                 }                                 285                 }
588                                                   286 
589                 if (!uffd_flags_mode_is(flags, !! 287                 err = -EEXIST;
590                     !huge_pte_none_mostly(huge !! 288                 dst_pteval = huge_ptep_get(dst_pte);
591                         err = -EEXIST;         !! 289                 if (!huge_pte_none(dst_pteval)) {
592                         hugetlb_vma_unlock_rea << 
593                         mutex_unlock(&hugetlb_    290                         mutex_unlock(&hugetlb_fault_mutex_table[hash]);
594                         goto out_unlock;          291                         goto out_unlock;
595                 }                                 292                 }
596                                                   293 
597                 err = hugetlb_mfill_atomic_pte !! 294                 err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
598                                                !! 295                                                 dst_addr, src_addr, &page);
599                                                   296 
600                 hugetlb_vma_unlock_read(dst_vm << 
601                 mutex_unlock(&hugetlb_fault_mu    297                 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
                                                   >> 298                 vm_alloc_shared = vm_shared;
602                                                   299 
603                 cond_resched();                   300                 cond_resched();
604                                                   301 
605                 if (unlikely(err == -ENOENT))     302                 if (unlikely(err == -ENOENT)) {
606                         up_read(&ctx->map_chan !! 303                         up_read(&dst_mm->mmap_sem);
607                         uffd_mfill_unlock(dst_ !! 304                         BUG_ON(!page);
608                         BUG_ON(!folio);        << 
609                                                   305 
610                         err = copy_folio_from_ !! 306                         err = copy_huge_page_from_user(page,
611                                                !! 307                                                 (const void __user *)src_addr,
                                                   >> 308                                                 pages_per_huge_page(h), true);
612                         if (unlikely(err)) {      309                         if (unlikely(err)) {
613                                 err = -EFAULT;    310                                 err = -EFAULT;
614                                 goto out;         311                                 goto out;
615                         }                         312                         }
                                                   >> 313                         down_read(&dst_mm->mmap_sem);
                                                   >> 314                         /*
                                                   >> 315                          * If memory mappings are changing because of non-cooperative
                                                   >> 316                          * operation (e.g. mremap) running in parallel, bail out and
                                                   >> 317                          * request the user to retry later
                                                   >> 318                          */
                                                   >> 319                         if (mmap_changing && READ_ONCE(*mmap_changing)) {
                                                   >> 320                                 err = -EAGAIN;
                                                   >> 321                                 break;
                                                   >> 322                         }
616                                                   323 
617                         dst_vma = NULL;           324                         dst_vma = NULL;
618                         goto retry;               325                         goto retry;
619                 } else                            326                 } else
620                         BUG_ON(folio);         !! 327                         BUG_ON(page);
621                                                   328 
622                 if (!err) {                       329                 if (!err) {
623                         dst_addr += vma_hpages    330                         dst_addr += vma_hpagesize;
624                         src_addr += vma_hpages    331                         src_addr += vma_hpagesize;
625                         copied += vma_hpagesiz    332                         copied += vma_hpagesize;
626                                                   333 
627                         if (fatal_signal_pendi    334                         if (fatal_signal_pending(current))
628                                 err = -EINTR;     335                                 err = -EINTR;
629                 }                                 336                 }
630                 if (err)                          337                 if (err)
631                         break;                    338                         break;
632         }                                         339         }
633                                                   340 
634 out_unlock:                                       341 out_unlock:
635         up_read(&ctx->map_changing_lock);      !! 342         up_read(&dst_mm->mmap_sem);
636 out_unlock_vma:                                << 
637         uffd_mfill_unlock(dst_vma);            << 
638 out:                                              343 out:
639         if (folio)                             !! 344         if (page) {
640                 folio_put(folio);              !! 345                 /*
                                                   >> 346                  * We encountered an error and are about to free a newly
                                                   >> 347                  * allocated huge page.
                                                   >> 348                  *
                                                   >> 349                  * Reservation handling is very subtle, and is different for
                                                   >> 350                  * private and shared mappings.  See the routine
                                                   >> 351                  * restore_reserve_on_error for details.  Unfortunately, we
                                                   >> 352                  * can not call restore_reserve_on_error now as it would
                                                   >> 353                  * require holding mmap_sem.
                                                   >> 354                  *
                                                   >> 355                  * If a reservation for the page existed in the reservation
                                                   >> 356                  * map of a private mapping, the map was modified to indicate
                                                   >> 357                  * the reservation was consumed when the page was allocated.
                                                   >> 358                  * We clear the PagePrivate flag now so that the global
                                                   >> 359                  * reserve count will not be incremented in free_huge_page.
                                                   >> 360                  * The reservation map will still indicate the reservation
                                                   >> 361                  * was consumed and possibly prevent later page allocation.
                                                   >> 362                  * This is better than leaking a global reservation.  If no
                                                   >> 363                  * reservation existed, it is still safe to clear PagePrivate
                                                   >> 364                  * as no adjustments to reservation counts were made during
                                                   >> 365                  * allocation.
                                                   >> 366                  *
                                                   >> 367                  * The reservation map for shared mappings indicates which
                                                   >> 368                  * pages have reservations.  When a huge page is allocated
                                                   >> 369                  * for an address with a reservation, no change is made to
                                                   >> 370                  * the reserve map.  In this case PagePrivate will be set
                                                   >> 371                  * to indicate that the global reservation count should be
                                                   >> 372                  * incremented when the page is freed.  This is the desired
                                                   >> 373                  * behavior.  However, when a huge page is allocated for an
                                                   >> 374                  * address without a reservation a reservation entry is added
                                                   >> 375                  * to the reservation map, and PagePrivate will not be set.
                                                   >> 376                  * When the page is freed, the global reserve count will NOT
                                                   >> 377                  * be incremented and it will appear as though we have leaked
                                                   >> 378                  * reserved page.  In this case, set PagePrivate so that the
                                                   >> 379                  * global reserve count will be incremented to match the
                                                   >> 380                  * reservation map entry which was created.
                                                   >> 381                  *
                                                   >> 382                  * Note that vm_alloc_shared is based on the flags of the vma
                                                   >> 383                  * for which the page was originally allocated.  dst_vma could
                                                   >> 384                  * be different or NULL on error.
                                                   >> 385                  */
                                                   >> 386                 if (vm_alloc_shared)
                                                   >> 387                         SetPagePrivate(page);
                                                   >> 388                 else
                                                   >> 389                         ClearPagePrivate(page);
                                                   >> 390                 put_page(page);
                                                   >> 391         }
641         BUG_ON(copied < 0);                       392         BUG_ON(copied < 0);
642         BUG_ON(err > 0);                          393         BUG_ON(err > 0);
643         BUG_ON(!copied && !err);                  394         BUG_ON(!copied && !err);
644         return copied ? copied : err;             395         return copied ? copied : err;
645 }                                                 396 }
646 #else /* !CONFIG_HUGETLB_PAGE */                  397 #else /* !CONFIG_HUGETLB_PAGE */
647 /* fail at build time if gcc attempts to use t    398 /* fail at build time if gcc attempts to use this */
648 extern ssize_t mfill_atomic_hugetlb(struct use !! 399 extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
649                                     struct vm_ !! 400                                       struct vm_area_struct *dst_vma,
650                                     unsigned l !! 401                                       unsigned long dst_start,
651                                     unsigned l !! 402                                       unsigned long src_start,
652                                     unsigned l !! 403                                       unsigned long len,
653                                     uffd_flags !! 404                                       bool *mmap_changing,
                                                   >> 405                                       bool zeropage);
654 #endif /* CONFIG_HUGETLB_PAGE */                  406 #endif /* CONFIG_HUGETLB_PAGE */
655                                                   407 
656 static __always_inline ssize_t mfill_atomic_pt !! 408 static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
                                                   >> 409                                                 pmd_t *dst_pmd,
657                                                   410                                                 struct vm_area_struct *dst_vma,
658                                                   411                                                 unsigned long dst_addr,
659                                                   412                                                 unsigned long src_addr,
660                                                !! 413                                                 struct page **page,
661                                                !! 414                                                 bool zeropage)
662 {                                                 415 {
663         ssize_t err;                              416         ssize_t err;
664                                                   417 
665         if (uffd_flags_mode_is(flags, MFILL_AT << 
666                 return mfill_atomic_pte_contin << 
667                                                << 
668         } else if (uffd_flags_mode_is(flags, M << 
669                 return mfill_atomic_pte_poison << 
670                                                << 
671         }                                      << 
672                                                << 
673         /*                                        418         /*
674          * The normal page fault path for a sh    419          * The normal page fault path for a shmem will invoke the
675          * fault, fill the hole in the file an    420          * fault, fill the hole in the file and COW it right away. The
676          * result generates plain anonymous me    421          * result generates plain anonymous memory. So when we are
677          * asked to fill an hole in a MAP_PRIV    422          * asked to fill an hole in a MAP_PRIVATE shmem mapping, we'll
678          * generate anonymous memory directly     423          * generate anonymous memory directly without actually filling
679          * the hole. For the MAP_PRIVATE case     424          * the hole. For the MAP_PRIVATE case the robustness check
680          * only happens in the pagetable (to v    425          * only happens in the pagetable (to verify it's still none)
681          * and not in the radix tree.             426          * and not in the radix tree.
682          */                                       427          */
683         if (!(dst_vma->vm_flags & VM_SHARED))     428         if (!(dst_vma->vm_flags & VM_SHARED)) {
684                 if (uffd_flags_mode_is(flags,  !! 429                 if (!zeropage)
685                         err = mfill_atomic_pte !! 430                         err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
686                                                !! 431                                                dst_addr, src_addr, page);
687                                                << 
688                 else                              432                 else
689                         err = mfill_atomic_pte !! 433                         err = mfill_zeropage_pte(dst_mm, dst_pmd,
690                                                   434                                                  dst_vma, dst_addr);
691         } else {                                  435         } else {
692                 err = shmem_mfill_atomic_pte(d !! 436                 if (!zeropage)
693                                              d !! 437                         err = shmem_mcopy_atomic_pte(dst_mm, dst_pmd,
694                                              f !! 438                                                      dst_vma, dst_addr,
                                                   >> 439                                                      src_addr, page);
                                                   >> 440                 else
                                                   >> 441                         err = shmem_mfill_zeropage_pte(dst_mm, dst_pmd,
                                                   >> 442                                                        dst_vma, dst_addr);
695         }                                         443         }
696                                                   444 
697         return err;                               445         return err;
698 }                                                 446 }
699                                                   447 
700 static __always_inline ssize_t mfill_atomic(st !! 448 static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
701                                             un !! 449                                               unsigned long dst_start,
702                                             un !! 450                                               unsigned long src_start,
703                                             un !! 451                                               unsigned long len,
704                                             uf !! 452                                               bool zeropage,
                                                   >> 453                                               bool *mmap_changing)
705 {                                                 454 {
706         struct mm_struct *dst_mm = ctx->mm;    << 
707         struct vm_area_struct *dst_vma;           455         struct vm_area_struct *dst_vma;
708         ssize_t err;                              456         ssize_t err;
709         pmd_t *dst_pmd;                           457         pmd_t *dst_pmd;
710         unsigned long src_addr, dst_addr;         458         unsigned long src_addr, dst_addr;
711         long copied;                              459         long copied;
712         struct folio *folio;                   !! 460         struct page *page;
713                                                   461 
714         /*                                        462         /*
715          * Sanitize the command parameters:       463          * Sanitize the command parameters:
716          */                                       464          */
717         BUG_ON(dst_start & ~PAGE_MASK);           465         BUG_ON(dst_start & ~PAGE_MASK);
718         BUG_ON(len & ~PAGE_MASK);                 466         BUG_ON(len & ~PAGE_MASK);
719                                                   467 
720         /* Does the address range wrap, or is     468         /* Does the address range wrap, or is the span zero-sized? */
721         BUG_ON(src_start + len <= src_start);     469         BUG_ON(src_start + len <= src_start);
722         BUG_ON(dst_start + len <= dst_start);     470         BUG_ON(dst_start + len <= dst_start);
723                                                   471 
724         src_addr = src_start;                     472         src_addr = src_start;
725         dst_addr = dst_start;                     473         dst_addr = dst_start;
726         copied = 0;                               474         copied = 0;
727         folio = NULL;                          !! 475         page = NULL;
728 retry:                                            476 retry:
729         /*                                     !! 477         down_read(&dst_mm->mmap_sem);
730          * Make sure the vma is not shared, th << 
731          * both valid and fully within a singl << 
732          */                                    << 
733         dst_vma = uffd_mfill_lock(dst_mm, dst_ << 
734         if (IS_ERR(dst_vma)) {                 << 
735                 err = PTR_ERR(dst_vma);        << 
736                 goto out;                      << 
737         }                                      << 
738                                                   478 
739         /*                                        479         /*
740          * If memory mappings are changing bec    480          * If memory mappings are changing because of non-cooperative
741          * operation (e.g. mremap) running in     481          * operation (e.g. mremap) running in parallel, bail out and
742          * request the user to retry later        482          * request the user to retry later
743          */                                       483          */
744         down_read(&ctx->map_changing_lock);    << 
745         err = -EAGAIN;                            484         err = -EAGAIN;
746         if (atomic_read(&ctx->mmap_changing))  !! 485         if (mmap_changing && READ_ONCE(*mmap_changing))
                                                   >> 486                 goto out_unlock;
                                                   >> 487 
                                                   >> 488         /*
                                                   >> 489          * Make sure the vma is not shared, that the dst range is
                                                   >> 490          * both valid and fully within a single existing vma.
                                                   >> 491          */
                                                   >> 492         err = -ENOENT;
                                                   >> 493         dst_vma = find_vma(dst_mm, dst_start);
                                                   >> 494         if (!dst_vma)
                                                   >> 495                 goto out_unlock;
                                                   >> 496         /*
                                                   >> 497          * Check the vma is registered in uffd, this is required to
                                                   >> 498          * enforce the VM_MAYWRITE check done at uffd registration
                                                   >> 499          * time.
                                                   >> 500          */
                                                   >> 501         if (!dst_vma->vm_userfaultfd_ctx.ctx)
                                                   >> 502                 goto out_unlock;
                                                   >> 503 
                                                   >> 504         if (dst_start < dst_vma->vm_start ||
                                                   >> 505             dst_start + len > dst_vma->vm_end)
747                 goto out_unlock;                  506                 goto out_unlock;
748                                                   507 
749         err = -EINVAL;                            508         err = -EINVAL;
750         /*                                        509         /*
751          * shmem_zero_setup is invoked in mmap    510          * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but
752          * it will overwrite vm_ops, so vma_is    511          * it will overwrite vm_ops, so vma_is_anonymous must return false.
753          */                                       512          */
754         if (WARN_ON_ONCE(vma_is_anonymous(dst_    513         if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) &&
755             dst_vma->vm_flags & VM_SHARED))       514             dst_vma->vm_flags & VM_SHARED))
756                 goto out_unlock;                  515                 goto out_unlock;
757                                                   516 
758         /*                                        517         /*
759          * validate 'mode' now that we know th << 
760          * a wrprotect copy if the userfaultfd << 
761          */                                    << 
762         if ((flags & MFILL_ATOMIC_WP) && !(dst << 
763                 goto out_unlock;               << 
764                                                << 
765         /*                                     << 
766          * If this is a HUGETLB vma, pass off     518          * If this is a HUGETLB vma, pass off to appropriate routine
767          */                                       519          */
768         if (is_vm_hugetlb_page(dst_vma))          520         if (is_vm_hugetlb_page(dst_vma))
769                 return  mfill_atomic_hugetlb(c !! 521                 return  __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
770                                              s !! 522                                                src_start, len, mmap_changing,
                                                   >> 523                                                zeropage);
771                                                   524 
772         if (!vma_is_anonymous(dst_vma) && !vma    525         if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
773                 goto out_unlock;                  526                 goto out_unlock;
774         if (!vma_is_shmem(dst_vma) &&          !! 527 
775             uffd_flags_mode_is(flags, MFILL_AT !! 528         /*
                                                   >> 529          * Ensure the dst_vma has a anon_vma or this page
                                                   >> 530          * would get a NULL anon_vma when moved in the
                                                   >> 531          * dst_vma.
                                                   >> 532          */
                                                   >> 533         err = -ENOMEM;
                                                   >> 534         if (!(dst_vma->vm_flags & VM_SHARED) &&
                                                   >> 535             unlikely(anon_vma_prepare(dst_vma)))
776                 goto out_unlock;                  536                 goto out_unlock;
777                                                   537 
778         while (src_addr < src_start + len) {      538         while (src_addr < src_start + len) {
779                 pmd_t dst_pmdval;                 539                 pmd_t dst_pmdval;
780                                                   540 
781                 BUG_ON(dst_addr >= dst_start +    541                 BUG_ON(dst_addr >= dst_start + len);
782                                                   542 
783                 dst_pmd = mm_alloc_pmd(dst_mm,    543                 dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
784                 if (unlikely(!dst_pmd)) {         544                 if (unlikely(!dst_pmd)) {
785                         err = -ENOMEM;            545                         err = -ENOMEM;
786                         break;                    546                         break;
787                 }                                 547                 }
788                                                   548 
789                 dst_pmdval = pmdp_get_lockless !! 549                 dst_pmdval = pmd_read_atomic(dst_pmd);
790                 if (unlikely(pmd_none(dst_pmdv << 
791                     unlikely(__pte_alloc(dst_m << 
792                         err = -ENOMEM;         << 
793                         break;                 << 
794                 }                              << 
795                 dst_pmdval = pmdp_get_lockless << 
796                 /*                                550                 /*
797                  * If the dst_pmd is THP don't !! 551                  * If the dst_pmd is mapped as THP don't
798                  * (This includes the case whe !! 552                  * override it and just be strict.
799                  * changed back to none after  << 
800                  */                               553                  */
801                 if (unlikely(!pmd_present(dst_ !! 554                 if (unlikely(pmd_trans_huge(dst_pmdval))) {
802                              pmd_devmap(dst_pm << 
803                         err = -EEXIST;            555                         err = -EEXIST;
804                         break;                    556                         break;
805                 }                                 557                 }
806                 if (unlikely(pmd_bad(dst_pmdva !! 558                 if (unlikely(pmd_none(dst_pmdval)) &&
                                                   >> 559                     unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
                                                   >> 560                         err = -ENOMEM;
                                                   >> 561                         break;
                                                   >> 562                 }
                                                   >> 563                 /* If an huge pmd materialized from under us fail */
                                                   >> 564                 if (unlikely(pmd_trans_huge(*dst_pmd))) {
807                         err = -EFAULT;            565                         err = -EFAULT;
808                         break;                    566                         break;
809                 }                                 567                 }
810                 /*                             << 
811                  * For shmem mappings, khugepa << 
812                  * tables under us; pte_offset << 
813                  */                            << 
814                                                   568 
815                 err = mfill_atomic_pte(dst_pmd !! 569                 BUG_ON(pmd_none(*dst_pmd));
816                                        src_add !! 570                 BUG_ON(pmd_trans_huge(*dst_pmd));
                                                   >> 571 
                                                   >> 572                 err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
                                                   >> 573                                        src_addr, &page, zeropage);
817                 cond_resched();                   574                 cond_resched();
818                                                   575 
819                 if (unlikely(err == -ENOENT))     576                 if (unlikely(err == -ENOENT)) {
820                         void *kaddr;           !! 577                         void *page_kaddr;
821                                                   578 
822                         up_read(&ctx->map_chan !! 579                         up_read(&dst_mm->mmap_sem);
823                         uffd_mfill_unlock(dst_ !! 580                         BUG_ON(!page);
824                         BUG_ON(!folio);        << 
825                                                   581 
826                         kaddr = kmap_local_fol !! 582                         page_kaddr = kmap(page);
827                         err = copy_from_user(k !! 583                         err = copy_from_user(page_kaddr,
828                                              (    584                                              (const void __user *) src_addr,
829                                              P    585                                              PAGE_SIZE);
830                         kunmap_local(kaddr);   !! 586                         kunmap(page);
831                         if (unlikely(err)) {      587                         if (unlikely(err)) {
832                                 err = -EFAULT;    588                                 err = -EFAULT;
833                                 goto out;         589                                 goto out;
834                         }                         590                         }
835                         flush_dcache_folio(fol !! 591                         flush_dcache_page(page);
836                         goto retry;               592                         goto retry;
837                 } else                            593                 } else
838                         BUG_ON(folio);         !! 594                         BUG_ON(page);
839                                                   595 
840                 if (!err) {                       596                 if (!err) {
841                         dst_addr += PAGE_SIZE;    597                         dst_addr += PAGE_SIZE;
842                         src_addr += PAGE_SIZE;    598                         src_addr += PAGE_SIZE;
843                         copied += PAGE_SIZE;      599                         copied += PAGE_SIZE;
844                                                   600 
845                         if (fatal_signal_pendi    601                         if (fatal_signal_pending(current))
846                                 err = -EINTR;     602                                 err = -EINTR;
847                 }                                 603                 }
848                 if (err)                          604                 if (err)
849                         break;                    605                         break;
850         }                                         606         }
851                                                   607 
852 out_unlock:                                       608 out_unlock:
853         up_read(&ctx->map_changing_lock);      !! 609         up_read(&dst_mm->mmap_sem);
854         uffd_mfill_unlock(dst_vma);            << 
855 out:                                              610 out:
856         if (folio)                             !! 611         if (page)
857                 folio_put(folio);              !! 612                 put_page(page);
858         BUG_ON(copied < 0);                       613         BUG_ON(copied < 0);
859         BUG_ON(err > 0);                          614         BUG_ON(err > 0);
860         BUG_ON(!copied && !err);                  615         BUG_ON(!copied && !err);
861         return copied ? copied : err;             616         return copied ? copied : err;
862 }                                                 617 }
863                                                   618 
864 ssize_t mfill_atomic_copy(struct userfaultfd_c !! 619 ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
865                           unsigned long src_st !! 620                      unsigned long src_start, unsigned long len,
866                           uffd_flags_t flags)  !! 621                      bool *mmap_changing)
867 {                                                 622 {
868         return mfill_atomic(ctx, dst_start, sr !! 623         return __mcopy_atomic(dst_mm, dst_start, src_start, len, false,
869                             uffd_flags_set_mod !! 624                               mmap_changing);
870 }                                                 625 }
871                                                   626 
872 ssize_t mfill_atomic_zeropage(struct userfault !! 627 ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
873                               unsigned long st !! 628                        unsigned long len, bool *mmap_changing)
874                               unsigned long le << 
875 {                                                 629 {
876         return mfill_atomic(ctx, start, 0, len !! 630         return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing);
877                             uffd_flags_set_mod << 
878 }                                              << 
879                                                << 
880 ssize_t mfill_atomic_continue(struct userfault << 
881                               unsigned long le << 
882 {                                              << 
883                                                << 
884         /*                                     << 
885          * A caller might reasonably assume th << 
886          * smp_wmb() to ensure that any writes << 
887          * the thread doing the UFFDIO_CONTINU << 
888          * subsequent loads from the page thro << 
889          */                                    << 
890         smp_wmb();                             << 
891                                                << 
892         return mfill_atomic(ctx, start, 0, len << 
893                             uffd_flags_set_mod << 
894 }                                              << 
895                                                << 
896 ssize_t mfill_atomic_poison(struct userfaultfd << 
897                             unsigned long len, << 
898 {                                              << 
899         return mfill_atomic(ctx, start, 0, len << 
900                             uffd_flags_set_mod << 
901 }                                              << 
902                                                << 
903 long uffd_wp_range(struct vm_area_struct *dst_ << 
904                    unsigned long start, unsign << 
905 {                                              << 
906         unsigned int mm_cp_flags;              << 
907         struct mmu_gather tlb;                 << 
908         long ret;                              << 
909                                                << 
910         VM_WARN_ONCE(start < dst_vma->vm_start << 
911                         "The address range exc << 
912         if (enable_wp)                         << 
913                 mm_cp_flags = MM_CP_UFFD_WP;   << 
914         else                                   << 
915                 mm_cp_flags = MM_CP_UFFD_WP_RE << 
916                                                << 
917         /*                                     << 
918          * vma->vm_page_prot already reflects  << 
919          * VMA (see userfaultfd_set_vm_flags() << 
920          * to be write-protected as default wh << 
921          * Try upgrading write permissions man << 
922          */                                    << 
923         if (!enable_wp && vma_wants_manual_pte << 
924                 mm_cp_flags |= MM_CP_TRY_CHANG << 
925         tlb_gather_mmu(&tlb, dst_vma->vm_mm);  << 
926         ret = change_protection(&tlb, dst_vma, << 
927         tlb_finish_mmu(&tlb);                  << 
928                                                << 
929         return ret;                            << 
930 }                                              << 
931                                                << 
932 int mwriteprotect_range(struct userfaultfd_ctx << 
933                         unsigned long len, boo << 
934 {                                              << 
935         struct mm_struct *dst_mm = ctx->mm;    << 
936         unsigned long end = start + len;       << 
937         unsigned long _start, _end;            << 
938         struct vm_area_struct *dst_vma;        << 
939         unsigned long page_mask;               << 
940         long err;                              << 
941         VMA_ITERATOR(vmi, dst_mm, start);      << 
942                                                << 
943         /*                                     << 
944          * Sanitize the command parameters:    << 
945          */                                    << 
946         BUG_ON(start & ~PAGE_MASK);            << 
947         BUG_ON(len & ~PAGE_MASK);              << 
948                                                << 
949         /* Does the address range wrap, or is  << 
950         BUG_ON(start + len <= start);          << 
951                                                << 
952         mmap_read_lock(dst_mm);                << 
953                                                << 
954         /*                                     << 
955          * If memory mappings are changing bec << 
956          * operation (e.g. mremap) running in  << 
957          * request the user to retry later     << 
958          */                                    << 
959         down_read(&ctx->map_changing_lock);    << 
960         err = -EAGAIN;                         << 
961         if (atomic_read(&ctx->mmap_changing))  << 
962                 goto out_unlock;               << 
963                                                << 
964         err = -ENOENT;                         << 
965         for_each_vma_range(vmi, dst_vma, end)  << 
966                                                << 
967                 if (!userfaultfd_wp(dst_vma))  << 
968                         err = -ENOENT;         << 
969                         break;                 << 
970                 }                              << 
971                                                << 
972                 if (is_vm_hugetlb_page(dst_vma << 
973                         err = -EINVAL;         << 
974                         page_mask = vma_kernel << 
975                         if ((start & page_mask << 
976                                 break;         << 
977                 }                              << 
978                                                << 
979                 _start = max(dst_vma->vm_start << 
980                 _end = min(dst_vma->vm_end, en << 
981                                                << 
982                 err = uffd_wp_range(dst_vma, _ << 
983                                                << 
984                 /* Return 0 on success, <0 on  << 
985                 if (err < 0)                   << 
986                         break;                 << 
987                 err = 0;                       << 
988         }                                      << 
989 out_unlock:                                    << 
990         up_read(&ctx->map_changing_lock);      << 
991         mmap_read_unlock(dst_mm);              << 
992         return err;                            << 
993 }                                              << 
994                                                << 
995                                                << 
996 void double_pt_lock(spinlock_t *ptl1,          << 
997                     spinlock_t *ptl2)          << 
998         __acquires(ptl1)                       << 
999         __acquires(ptl2)                       << 
1000 {                                             << 
1001         if (ptl1 > ptl2)                      << 
1002                 swap(ptl1, ptl2);             << 
1003         /* lock in virtual address order to a << 
1004         spin_lock(ptl1);                      << 
1005         if (ptl1 != ptl2)                     << 
1006                 spin_lock_nested(ptl2, SINGLE << 
1007         else                                  << 
1008                 __acquire(ptl2);              << 
1009 }                                             << 
1010                                               << 
1011 void double_pt_unlock(spinlock_t *ptl1,       << 
1012                       spinlock_t *ptl2)       << 
1013         __releases(ptl1)                      << 
1014         __releases(ptl2)                      << 
1015 {                                             << 
1016         spin_unlock(ptl1);                    << 
1017         if (ptl1 != ptl2)                     << 
1018                 spin_unlock(ptl2);            << 
1019         else                                  << 
1020                 __release(ptl2);              << 
1021 }                                             << 
1022                                               << 
1023                                               << 
1024 static int move_present_pte(struct mm_struct  << 
1025                             struct vm_area_st << 
1026                             struct vm_area_st << 
1027                             unsigned long dst << 
1028                             pte_t *dst_pte, p << 
1029                             pte_t orig_dst_pt << 
1030                             spinlock_t *dst_p << 
1031                             struct folio *src << 
1032 {                                             << 
1033         int err = 0;                          << 
1034                                               << 
1035         double_pt_lock(dst_ptl, src_ptl);     << 
1036                                               << 
1037         if (!pte_same(ptep_get(src_pte), orig << 
1038             !pte_same(ptep_get(dst_pte), orig << 
1039                 err = -EAGAIN;                << 
1040                 goto out;                     << 
1041         }                                     << 
1042         if (folio_test_large(src_folio) ||    << 
1043             folio_maybe_dma_pinned(src_folio) << 
1044             !PageAnonExclusive(&src_folio->pa << 
1045                 err = -EBUSY;                 << 
1046                 goto out;                     << 
1047         }                                     << 
1048                                               << 
1049         orig_src_pte = ptep_clear_flush(src_v << 
1050         /* Folio got pinned from under us. Pu << 
1051         if (folio_maybe_dma_pinned(src_folio) << 
1052                 set_pte_at(mm, src_addr, src_ << 
1053                 err = -EBUSY;                 << 
1054                 goto out;                     << 
1055         }                                     << 
1056                                               << 
1057         folio_move_anon_rmap(src_folio, dst_v << 
1058         src_folio->index = linear_page_index( << 
1059                                               << 
1060         orig_dst_pte = mk_pte(&src_folio->pag << 
1061         /* Follow mremap() behavior and treat << 
1062         orig_dst_pte = pte_mkwrite(pte_mkdirt << 
1063                                               << 
1064         set_pte_at(mm, dst_addr, dst_pte, ori << 
1065 out:                                          << 
1066         double_pt_unlock(dst_ptl, src_ptl);   << 
1067         return err;                           << 
1068 }                                             << 
1069                                               << 
1070 static int move_swap_pte(struct mm_struct *mm << 
1071                          unsigned long dst_ad << 
1072                          pte_t *dst_pte, pte_ << 
1073                          pte_t orig_dst_pte,  << 
1074                          spinlock_t *dst_ptl, << 
1075 {                                             << 
1076         if (!pte_swp_exclusive(orig_src_pte)) << 
1077                 return -EBUSY;                << 
1078                                               << 
1079         double_pt_lock(dst_ptl, src_ptl);     << 
1080                                               << 
1081         if (!pte_same(ptep_get(src_pte), orig << 
1082             !pte_same(ptep_get(dst_pte), orig << 
1083                 double_pt_unlock(dst_ptl, src << 
1084                 return -EAGAIN;               << 
1085         }                                     << 
1086                                               << 
1087         orig_src_pte = ptep_get_and_clear(mm, << 
1088         set_pte_at(mm, dst_addr, dst_pte, ori << 
1089         double_pt_unlock(dst_ptl, src_ptl);   << 
1090                                               << 
1091         return 0;                             << 
1092 }                                             << 
1093                                               << 
1094 static int move_zeropage_pte(struct mm_struct << 
1095                              struct vm_area_s << 
1096                              struct vm_area_s << 
1097                              unsigned long ds << 
1098                              pte_t *dst_pte,  << 
1099                              pte_t orig_dst_p << 
1100                              spinlock_t *dst_ << 
1101 {                                             << 
1102         pte_t zero_pte;                       << 
1103                                               << 
1104         double_pt_lock(dst_ptl, src_ptl);     << 
1105         if (!pte_same(ptep_get(src_pte), orig << 
1106             !pte_same(ptep_get(dst_pte), orig << 
1107                 double_pt_unlock(dst_ptl, src << 
1108                 return -EAGAIN;               << 
1109         }                                     << 
1110                                               << 
1111         zero_pte = pte_mkspecial(pfn_pte(my_z << 
1112                                          dst_ << 
1113         ptep_clear_flush(src_vma, src_addr, s << 
1114         set_pte_at(mm, dst_addr, dst_pte, zer << 
1115         double_pt_unlock(dst_ptl, src_ptl);   << 
1116                                               << 
1117         return 0;                             << 
1118 }                                             << 
1119                                               << 
1120                                               << 
1121 /*                                            << 
1122  * The mmap_lock for reading is held by the c << 
1123  * from src_pmd to dst_pmd if possible, and r << 
1124  * in moving the page.                        << 
1125  */                                           << 
1126 static int move_pages_pte(struct mm_struct *m << 
1127                           struct vm_area_stru << 
1128                           struct vm_area_stru << 
1129                           unsigned long dst_a << 
1130                           __u64 mode)         << 
1131 {                                             << 
1132         swp_entry_t entry;                    << 
1133         pte_t orig_src_pte, orig_dst_pte;     << 
1134         pte_t src_folio_pte;                  << 
1135         spinlock_t *src_ptl, *dst_ptl;        << 
1136         pte_t *src_pte = NULL;                << 
1137         pte_t *dst_pte = NULL;                << 
1138                                               << 
1139         struct folio *src_folio = NULL;       << 
1140         struct anon_vma *src_anon_vma = NULL; << 
1141         struct mmu_notifier_range range;      << 
1142         int err = 0;                          << 
1143                                               << 
1144         flush_cache_range(src_vma, src_addr,  << 
1145         mmu_notifier_range_init(&range, MMU_N << 
1146                                 src_addr, src << 
1147         mmu_notifier_invalidate_range_start(& << 
1148 retry:                                        << 
1149         dst_pte = pte_offset_map_nolock(mm, d << 
1150                                               << 
1151         /* Retry if a huge pmd materialized f << 
1152         if (unlikely(!dst_pte)) {             << 
1153                 err = -EAGAIN;                << 
1154                 goto out;                     << 
1155         }                                     << 
1156                                               << 
1157         src_pte = pte_offset_map_nolock(mm, s << 
1158                                               << 
1159         /*                                    << 
1160          * We held the mmap_lock for reading  << 
1161          * can zap transparent huge pages und << 
1162          * transparent huge page fault can es << 
1163          * transparent huge pages under us.   << 
1164          */                                   << 
1165         if (unlikely(!src_pte)) {             << 
1166                 err = -EAGAIN;                << 
1167                 goto out;                     << 
1168         }                                     << 
1169                                               << 
1170         /* Sanity checks before the operation << 
1171         if (WARN_ON_ONCE(pmd_none(*dst_pmd))  << 
1172             WARN_ON_ONCE(pmd_trans_huge(*dst_ << 
1173                 err = -EINVAL;                << 
1174                 goto out;                     << 
1175         }                                     << 
1176                                               << 
1177         spin_lock(dst_ptl);                   << 
1178         orig_dst_pte = ptep_get(dst_pte);     << 
1179         spin_unlock(dst_ptl);                 << 
1180         if (!pte_none(orig_dst_pte)) {        << 
1181                 err = -EEXIST;                << 
1182                 goto out;                     << 
1183         }                                     << 
1184                                               << 
1185         spin_lock(src_ptl);                   << 
1186         orig_src_pte = ptep_get(src_pte);     << 
1187         spin_unlock(src_ptl);                 << 
1188         if (pte_none(orig_src_pte)) {         << 
1189                 if (!(mode & UFFDIO_MOVE_MODE << 
1190                         err = -ENOENT;        << 
1191                 else /* nothing to do to move << 
1192                         err = 0;              << 
1193                 goto out;                     << 
1194         }                                     << 
1195                                               << 
1196         /* If PTE changed after we locked the << 
1197         if (src_folio && unlikely(!pte_same(s << 
1198                 err = -EAGAIN;                << 
1199                 goto out;                     << 
1200         }                                     << 
1201                                               << 
1202         if (pte_present(orig_src_pte)) {      << 
1203                 if (is_zero_pfn(pte_pfn(orig_ << 
1204                         err = move_zeropage_p << 
1205                                               << 
1206                                               << 
1207                                               << 
1208                         goto out;             << 
1209                 }                             << 
1210                                               << 
1211                 /*                            << 
1212                  * Pin and lock both source f << 
1213                  * RCU read section, we can't << 
1214                  * unmap the ptes, obtain the << 
1215                  */                           << 
1216                 if (!src_folio) {             << 
1217                         struct folio *folio;  << 
1218                                               << 
1219                         /*                    << 
1220                          * Pin the page while << 
1221                          * page isn't freed u << 
1222                          */                   << 
1223                         spin_lock(src_ptl);   << 
1224                         if (!pte_same(orig_sr << 
1225                                 spin_unlock(s << 
1226                                 err = -EAGAIN << 
1227                                 goto out;     << 
1228                         }                     << 
1229                                               << 
1230                         folio = vm_normal_fol << 
1231                         if (!folio || !PageAn << 
1232                                 spin_unlock(s << 
1233                                 err = -EBUSY; << 
1234                                 goto out;     << 
1235                         }                     << 
1236                                               << 
1237                         folio_get(folio);     << 
1238                         src_folio = folio;    << 
1239                         src_folio_pte = orig_ << 
1240                         spin_unlock(src_ptl); << 
1241                                               << 
1242                         if (!folio_trylock(sr << 
1243                                 pte_unmap(&or << 
1244                                 pte_unmap(&or << 
1245                                 src_pte = dst << 
1246                                 /* now we can << 
1247                                 folio_lock(sr << 
1248                                 goto retry;   << 
1249                         }                     << 
1250                                               << 
1251                         if (WARN_ON_ONCE(!fol << 
1252                                 err = -EBUSY; << 
1253                                 goto out;     << 
1254                         }                     << 
1255                 }                             << 
1256                                               << 
1257                 /* at this point we have src_ << 
1258                 if (folio_test_large(src_foli << 
1259                         /* split_folio() can  << 
1260                         pte_unmap(&orig_src_p << 
1261                         pte_unmap(&orig_dst_p << 
1262                         src_pte = dst_pte = N << 
1263                         err = split_folio(src << 
1264                         if (err)              << 
1265                                 goto out;     << 
1266                         /* have to reacquire  << 
1267                         folio_unlock(src_foli << 
1268                         folio_put(src_folio); << 
1269                         src_folio = NULL;     << 
1270                         goto retry;           << 
1271                 }                             << 
1272                                               << 
1273                 if (!src_anon_vma) {          << 
1274                         /*                    << 
1275                          * folio_referenced w << 
1276                          * without the folio  << 
1277                          * the anon_vma lock, << 
1278                          */                   << 
1279                         src_anon_vma = folio_ << 
1280                         if (!src_anon_vma) {  << 
1281                                 /* page was u << 
1282                                 err = -EAGAIN << 
1283                                 goto out;     << 
1284                         }                     << 
1285                         if (!anon_vma_trylock << 
1286                                 pte_unmap(&or << 
1287                                 pte_unmap(&or << 
1288                                 src_pte = dst << 
1289                                 /* now we can << 
1290                                 anon_vma_lock << 
1291                                 goto retry;   << 
1292                         }                     << 
1293                 }                             << 
1294                                               << 
1295                 err = move_present_pte(mm,  d << 
1296                                        dst_ad << 
1297                                        orig_d << 
1298                                        dst_pt << 
1299         } else {                              << 
1300                 entry = pte_to_swp_entry(orig << 
1301                 if (non_swap_entry(entry)) {  << 
1302                         if (is_migration_entr << 
1303                                 pte_unmap(&or << 
1304                                 pte_unmap(&or << 
1305                                 src_pte = dst << 
1306                                 migration_ent << 
1307                                 err = -EAGAIN << 
1308                         } else                << 
1309                                 err = -EFAULT << 
1310                         goto out;             << 
1311                 }                             << 
1312                                               << 
1313                 err = move_swap_pte(mm, dst_a << 
1314                                     dst_pte,  << 
1315                                     orig_dst_ << 
1316                                     dst_ptl,  << 
1317         }                                     << 
1318                                               << 
1319 out:                                          << 
1320         if (src_anon_vma) {                   << 
1321                 anon_vma_unlock_write(src_ano << 
1322                 put_anon_vma(src_anon_vma);   << 
1323         }                                     << 
1324         if (src_folio) {                      << 
1325                 folio_unlock(src_folio);      << 
1326                 folio_put(src_folio);         << 
1327         }                                     << 
1328         if (dst_pte)                          << 
1329                 pte_unmap(dst_pte);           << 
1330         if (src_pte)                          << 
1331                 pte_unmap(src_pte);           << 
1332         mmu_notifier_invalidate_range_end(&ra << 
1333                                               << 
1334         return err;                           << 
1335 }                                             << 
1336                                               << 
1337 #ifdef CONFIG_TRANSPARENT_HUGEPAGE            << 
1338 static inline bool move_splits_huge_pmd(unsig << 
1339                                         unsig << 
1340                                         unsig << 
1341 {                                             << 
1342         return (src_addr & ~HPAGE_PMD_MASK) | << 
1343                 src_end - src_addr < HPAGE_PM << 
1344 }                                             << 
1345 #else                                         << 
1346 static inline bool move_splits_huge_pmd(unsig << 
1347                                         unsig << 
1348                                         unsig << 
1349 {                                             << 
1350         /* This is unreachable anyway, just t << 
1351         return false;                         << 
1352 }                                             << 
1353 #endif                                        << 
1354                                               << 
1355 static inline bool vma_move_compatible(struct << 
1356 {                                             << 
1357         return !(vma->vm_flags & (VM_PFNMAP | << 
1358                                   VM_MIXEDMAP << 
1359 }                                             << 
1360                                               << 
1361 static int validate_move_areas(struct userfau << 
1362                                struct vm_area << 
1363                                struct vm_area << 
1364 {                                             << 
1365         /* Only allow moving if both have the << 
1366         if ((src_vma->vm_flags & VM_ACCESS_FL << 
1367             pgprot_val(src_vma->vm_page_prot) << 
1368                 return -EINVAL;               << 
1369                                               << 
1370         /* Only allow moving if both are mloc << 
1371         if ((src_vma->vm_flags & VM_LOCKED) ! << 
1372                 return -EINVAL;               << 
1373                                               << 
1374         /*                                    << 
1375          * For now, we keep it simple and onl << 
1376          * Access flags are equal, therefore  << 
1377          */                                   << 
1378         if (!(src_vma->vm_flags & VM_WRITE))  << 
1379                 return -EINVAL;               << 
1380                                               << 
1381         /* Check if vma flags indicate conten << 
1382         if (!vma_move_compatible(src_vma) ||  << 
1383                 return -EINVAL;               << 
1384                                               << 
1385         /* Ensure dst_vma is registered in uf << 
1386         if (!dst_vma->vm_userfaultfd_ctx.ctx  << 
1387             dst_vma->vm_userfaultfd_ctx.ctx ! << 
1388                 return -EINVAL;               << 
1389                                               << 
1390         /* Only allow moving across anonymous << 
1391         if (!vma_is_anonymous(src_vma) || !vm << 
1392                 return -EINVAL;               << 
1393                                               << 
1394         return 0;                             << 
1395 }                                             << 
1396                                               << 
1397 static __always_inline                        << 
1398 int find_vmas_mm_locked(struct mm_struct *mm, << 
1399                         unsigned long dst_sta << 
1400                         unsigned long src_sta << 
1401                         struct vm_area_struct << 
1402                         struct vm_area_struct << 
1403 {                                             << 
1404         struct vm_area_struct *vma;           << 
1405                                               << 
1406         mmap_assert_locked(mm);               << 
1407         vma = find_vma_and_prepare_anon(mm, d << 
1408         if (IS_ERR(vma))                      << 
1409                 return PTR_ERR(vma);          << 
1410                                               << 
1411         *dst_vmap = vma;                      << 
1412         /* Skip finding src_vma if src_start  << 
1413         if (src_start >= vma->vm_start && src << 
1414                 goto out_success;             << 
1415                                               << 
1416         vma = vma_lookup(mm, src_start);      << 
1417         if (!vma)                             << 
1418                 return -ENOENT;               << 
1419 out_success:                                  << 
1420         *src_vmap = vma;                      << 
1421         return 0;                             << 
1422 }                                             << 
1423                                               << 
1424 #ifdef CONFIG_PER_VMA_LOCK                    << 
1425 static int uffd_move_lock(struct mm_struct *m << 
1426                           unsigned long dst_s << 
1427                           unsigned long src_s << 
1428                           struct vm_area_stru << 
1429                           struct vm_area_stru << 
1430 {                                             << 
1431         struct vm_area_struct *vma;           << 
1432         int err;                              << 
1433                                               << 
1434         vma = uffd_lock_vma(mm, dst_start);   << 
1435         if (IS_ERR(vma))                      << 
1436                 return PTR_ERR(vma);          << 
1437                                               << 
1438         *dst_vmap = vma;                      << 
1439         /*                                    << 
1440          * Skip finding src_vma if src_start  << 
1441          * that we don't lock the same vma tw << 
1442          */                                   << 
1443         if (src_start >= vma->vm_start && src << 
1444                 *src_vmap = vma;              << 
1445                 return 0;                     << 
1446         }                                     << 
1447                                               << 
1448         /*                                    << 
1449          * Using uffd_lock_vma() to get src_v << 
1450          *                                    << 
1451          * Thread1                            << 
1452          * -------                            << 
1453          * vma_start_read(dst_vma)            << 
1454          *                                    << 
1455          *                                    << 
1456          * vma_start_read(src_vma)            << 
1457          * mmap_read_lock(mm)                 << 
1458          *                                    << 
1459          */                                   << 
1460         *src_vmap = lock_vma_under_rcu(mm, sr << 
1461         if (likely(*src_vmap))                << 
1462                 return 0;                     << 
1463                                               << 
1464         /* Undo any locking and retry in mmap << 
1465         vma_end_read(*dst_vmap);              << 
1466                                               << 
1467         mmap_read_lock(mm);                   << 
1468         err = find_vmas_mm_locked(mm, dst_sta << 
1469         if (!err) {                           << 
1470                 /*                            << 
1471                  * See comment in uffd_lock_v << 
1472                  * vma_start_read() here.     << 
1473                  */                           << 
1474                 down_read(&(*dst_vmap)->vm_lo << 
1475                 if (*dst_vmap != *src_vmap)   << 
1476                         down_read_nested(&(*s << 
1477                                          SING << 
1478         }                                     << 
1479         mmap_read_unlock(mm);                 << 
1480         return err;                           << 
1481 }                                             << 
1482                                               << 
1483 static void uffd_move_unlock(struct vm_area_s << 
1484                              struct vm_area_s << 
1485 {                                             << 
1486         vma_end_read(src_vma);                << 
1487         if (src_vma != dst_vma)               << 
1488                 vma_end_read(dst_vma);        << 
1489 }                                             << 
1490                                               << 
1491 #else                                         << 
1492                                               << 
1493 static int uffd_move_lock(struct mm_struct *m << 
1494                           unsigned long dst_s << 
1495                           unsigned long src_s << 
1496                           struct vm_area_stru << 
1497                           struct vm_area_stru << 
1498 {                                             << 
1499         int err;                              << 
1500                                               << 
1501         mmap_read_lock(mm);                   << 
1502         err = find_vmas_mm_locked(mm, dst_sta << 
1503         if (err)                              << 
1504                 mmap_read_unlock(mm);         << 
1505         return err;                           << 
1506 }                                             << 
1507                                               << 
1508 static void uffd_move_unlock(struct vm_area_s << 
1509                              struct vm_area_s << 
1510 {                                             << 
1511         mmap_assert_locked(src_vma->vm_mm);   << 
1512         mmap_read_unlock(dst_vma->vm_mm);     << 
1513 }                                             << 
1514 #endif                                        << 
1515                                               << 
1516 /**                                           << 
1517  * move_pages - move arbitrary anonymous page << 
1518  * @ctx: pointer to the userfaultfd context   << 
1519  * @dst_start: start of the destination virtu << 
1520  * @src_start: start of the source virtual me << 
1521  * @len: length of the virtual memory range   << 
1522  * @mode: flags from uffdio_move.mode         << 
1523  *                                            << 
1524  * It will either use the mmap_lock in read m << 
1525  *                                            << 
1526  * move_pages() remaps arbitrary anonymous pa << 
1527  * copy. It only works on non shared anonymou << 
1528  * be relocated without generating non linear << 
1529  * code.                                      << 
1530  *                                            << 
1531  * It provides a zero copy mechanism to handl << 
1532  * The source vma pages should have mapcount  << 
1533  * enforced by using madvise(MADV_DONTFORK) o << 
1534  *                                            << 
1535  * The thread receiving the page during the u << 
1536  * will receive the faulting page in the sour << 
1537  * storage or any other I/O device (MADV_DONT << 
1538  * avoids move_pages() to fail with -EBUSY if << 
1539  * move_pages() is called), then it will call << 
1540  * page in the faulting address in the destin << 
1541  *                                            << 
1542  * This userfaultfd command works purely via  << 
1543  * most efficient way to move physical non sh << 
1544  * across different virtual addresses. Unlike << 
1545  * it does not create any new vmas. The mappi << 
1546  * address is atomic.                         << 
1547  *                                            << 
1548  * It only works if the vma protection bits a << 
1549  * source and destination vma.                << 
1550  *                                            << 
1551  * It can remap non shared anonymous pages wi << 
1552  *                                            << 
1553  * If the source virtual memory range has any << 
1554  * the destination virtual memory range is no << 
1555  * move_pages() will fail respectively with - << 
1556  * provides a very strict behavior to avoid a << 
1557  * corruption going unnoticed if there are us << 
1558  * Only one thread should resolve the userlan << 
1559  * time for any given faulting address. This  << 
1560  * try to both call move_pages() on the same  << 
1561  * same time, the second thread will get an e << 
1562  * command.                                   << 
1563  *                                            << 
1564  * The command retval will return "len" is su << 
1565  * however can be interrupted by fatal signal << 
1566  * interrupted it will return the number of b << 
1567  * remapped before the interruption if any, o << 
1568  * none. It will never return zero. Either it << 
1569  * an amount of bytes successfully moved. If  << 
1570  * "short" remap, the move_pages() command sh << 
1571  * userland with src+retval, dst+reval, len-r << 
1572  * about the error that interrupted it.       << 
1573  *                                            << 
1574  * The UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES flag  << 
1575  * prevent -ENOENT errors to materialize if t << 
1576  * source virtual range that is being remappe << 
1577  * accounted as successfully remapped in the  << 
1578  * command. This is mostly useful to remap hu << 
1579  * virtual regions without knowing if there a << 
1580  * in the regions or not, but preventing the  << 
1581  * the hugepmd during the remap.              << 
1582  *                                            << 
1583  * If there's any rmap walk that is taking th << 
1584  * first obtaining the folio lock (the only c << 
1585  * folio_referenced), they will have to verif << 
1586  * has changed after taking the anon_vma lock << 
1587  * should release the lock and retry obtainin << 
1588  * it means the anon_vma was changed by move_ << 
1589  * could be obtained. This is the only additi << 
1590  * the rmap code to provide this anonymous pa << 
1591  */                                           << 
1592 ssize_t move_pages(struct userfaultfd_ctx *ct << 
1593                    unsigned long src_start, u << 
1594 {                                             << 
1595         struct mm_struct *mm = ctx->mm;       << 
1596         struct vm_area_struct *src_vma, *dst_ << 
1597         unsigned long src_addr, dst_addr;     << 
1598         pmd_t *src_pmd, *dst_pmd;             << 
1599         long err = -EINVAL;                   << 
1600         ssize_t moved = 0;                    << 
1601                                               << 
1602         /* Sanitize the command parameters. * << 
1603         if (WARN_ON_ONCE(src_start & ~PAGE_MA << 
1604             WARN_ON_ONCE(dst_start & ~PAGE_MA << 
1605             WARN_ON_ONCE(len & ~PAGE_MASK))   << 
1606                 goto out;                     << 
1607                                               << 
1608         /* Does the address range wrap, or is << 
1609         if (WARN_ON_ONCE(src_start + len <= s << 
1610             WARN_ON_ONCE(dst_start + len <= d << 
1611                 goto out;                     << 
1612                                               << 
1613         err = uffd_move_lock(mm, dst_start, s << 
1614         if (err)                              << 
1615                 goto out;                     << 
1616                                               << 
1617         /* Re-check after taking map_changing << 
1618         err = -EAGAIN;                        << 
1619         down_read(&ctx->map_changing_lock);   << 
1620         if (likely(atomic_read(&ctx->mmap_cha << 
1621                 goto out_unlock;              << 
1622         /*                                    << 
1623          * Make sure the vma is not shared, t << 
1624          * ranges are both valid and fully wi << 
1625          * vma.                               << 
1626          */                                   << 
1627         err = -EINVAL;                        << 
1628         if (src_vma->vm_flags & VM_SHARED)    << 
1629                 goto out_unlock;              << 
1630         if (src_start + len > src_vma->vm_end << 
1631                 goto out_unlock;              << 
1632                                               << 
1633         if (dst_vma->vm_flags & VM_SHARED)    << 
1634                 goto out_unlock;              << 
1635         if (dst_start + len > dst_vma->vm_end << 
1636                 goto out_unlock;              << 
1637                                               << 
1638         err = validate_move_areas(ctx, src_vm << 
1639         if (err)                              << 
1640                 goto out_unlock;              << 
1641                                               << 
1642         for (src_addr = src_start, dst_addr = << 
1643              src_addr < src_start + len;) {   << 
1644                 spinlock_t *ptl;              << 
1645                 pmd_t dst_pmdval;             << 
1646                 unsigned long step_size;      << 
1647                                               << 
1648                 /*                            << 
1649                  * Below works because anonym << 
1650                  * transparent huge PUD. If f << 
1651                  * that case would need to be << 
1652                  */                           << 
1653                 src_pmd = mm_find_pmd(mm, src << 
1654                 if (unlikely(!src_pmd)) {     << 
1655                         if (!(mode & UFFDIO_M << 
1656                                 err = -ENOENT << 
1657                                 break;        << 
1658                         }                     << 
1659                         src_pmd = mm_alloc_pm << 
1660                         if (unlikely(!src_pmd << 
1661                                 err = -ENOMEM << 
1662                                 break;        << 
1663                         }                     << 
1664                 }                             << 
1665                 dst_pmd = mm_alloc_pmd(mm, ds << 
1666                 if (unlikely(!dst_pmd)) {     << 
1667                         err = -ENOMEM;        << 
1668                         break;                << 
1669                 }                             << 
1670                                               << 
1671                 dst_pmdval = pmdp_get_lockles << 
1672                 /*                            << 
1673                  * If the dst_pmd is mapped a << 
1674                  * be strict. If dst_pmd chan << 
1675                  * move_pages_huge_pmd() will << 
1676                  * while move_pages_pte() wil << 
1677                  */                           << 
1678                 if (unlikely(pmd_trans_huge(d << 
1679                         err = -EEXIST;        << 
1680                         break;                << 
1681                 }                             << 
1682                                               << 
1683                 ptl = pmd_trans_huge_lock(src << 
1684                 if (ptl) {                    << 
1685                         if (pmd_devmap(*src_p << 
1686                                 spin_unlock(p << 
1687                                 err = -ENOENT << 
1688                                 break;        << 
1689                         }                     << 
1690                                               << 
1691                         /* Check if we can mo << 
1692                         if (move_splits_huge_ << 
1693                             !pmd_none(dst_pmd << 
1694                                 struct folio  << 
1695                                               << 
1696                                 if (!folio || << 
1697                                               << 
1698                                         spin_ << 
1699                                         err = << 
1700                                         break << 
1701                                 }             << 
1702                                               << 
1703                                 spin_unlock(p << 
1704                                 split_huge_pm << 
1705                                 /* The folio  << 
1706                                 continue;     << 
1707                         }                     << 
1708                                               << 
1709                         err = move_pages_huge << 
1710                                               << 
1711                                               << 
1712                         step_size = HPAGE_PMD << 
1713                 } else {                      << 
1714                         if (pmd_none(*src_pmd << 
1715                                 if (!(mode &  << 
1716                                         err = << 
1717                                         break << 
1718                                 }             << 
1719                                 if (unlikely( << 
1720                                         err = << 
1721                                         break << 
1722                                 }             << 
1723                         }                     << 
1724                                               << 
1725                         if (unlikely(pte_allo << 
1726                                 err = -ENOMEM << 
1727                                 break;        << 
1728                         }                     << 
1729                                               << 
1730                         err = move_pages_pte( << 
1731                                               << 
1732                                               << 
1733                         step_size = PAGE_SIZE << 
1734                 }                             << 
1735                                               << 
1736                 cond_resched();               << 
1737                                               << 
1738                 if (fatal_signal_pending(curr << 
1739                         /* Do not override an << 
1740                         if (!err || err == -E << 
1741                                 err = -EINTR; << 
1742                         break;                << 
1743                 }                             << 
1744                                               << 
1745                 if (err) {                    << 
1746                         if (err == -EAGAIN)   << 
1747                                 continue;     << 
1748                         break;                << 
1749                 }                             << 
1750                                               << 
1751                 /* Proceed to the next page * << 
1752                 dst_addr += step_size;        << 
1753                 src_addr += step_size;        << 
1754                 moved += step_size;           << 
1755         }                                     << 
1756                                               << 
1757 out_unlock:                                   << 
1758         up_read(&ctx->map_changing_lock);     << 
1759         uffd_move_unlock(dst_vma, src_vma);   << 
1760 out:                                          << 
1761         VM_WARN_ON(moved < 0);                << 
1762         VM_WARN_ON(err > 0);                  << 
1763         VM_WARN_ON(!moved && !err);           << 
1764         return moved ? moved : err;           << 
1765 }                                             << 
1766                                               << 
1767 static void userfaultfd_set_vm_flags(struct v << 
1768                                      vm_flags << 
1769 {                                             << 
1770         const bool uffd_wp_changed = (vma->vm << 
1771                                               << 
1772         vm_flags_reset(vma, flags);           << 
1773         /*                                    << 
1774          * For shared mappings, we want to en << 
1775          * userfaultfd-wp is enabled (see vma << 
1776          * recalculate vma->vm_page_prot when << 
1777          */                                   << 
1778         if ((vma->vm_flags & VM_SHARED) && uf << 
1779                 vma_set_page_prot(vma);       << 
1780 }                                             << 
1781                                               << 
1782 static void userfaultfd_set_ctx(struct vm_are << 
1783                                 struct userfa << 
1784                                 unsigned long << 
1785 {                                             << 
1786         vma_start_write(vma);                 << 
1787         vma->vm_userfaultfd_ctx = (struct vm_ << 
1788         userfaultfd_set_vm_flags(vma,         << 
1789                                  (vma->vm_fla << 
1790 }                                             << 
1791                                               << 
1792 void userfaultfd_reset_ctx(struct vm_area_str << 
1793 {                                             << 
1794         userfaultfd_set_ctx(vma, NULL, 0);    << 
1795 }                                             << 
1796                                               << 
1797 struct vm_area_struct *userfaultfd_clear_vma( << 
1798                                               << 
1799                                               << 
1800                                               << 
1801                                               << 
1802 {                                             << 
1803         struct vm_area_struct *ret;           << 
1804                                               << 
1805         /* Reset ptes for the whole vma range << 
1806         if (userfaultfd_wp(vma))              << 
1807                 uffd_wp_range(vma, start, end << 
1808                                               << 
1809         ret = vma_modify_flags_uffd(vmi, prev << 
1810                                     vma->vm_f << 
1811                                     NULL_VM_U << 
1812                                               << 
1813         /*                                    << 
1814          * In the vma_merge() successful mpro << 
1815          * the next vma was merged into the c << 
1816          * the current one has not been updat << 
1817          */                                   << 
1818         if (!IS_ERR(ret))                     << 
1819                 userfaultfd_reset_ctx(ret);   << 
1820                                               << 
1821         return ret;                           << 
1822 }                                             << 
1823                                               << 
1824 /* Assumes mmap write lock taken, and mm_stru << 
1825 int userfaultfd_register_range(struct userfau << 
1826                                struct vm_area << 
1827                                unsigned long  << 
1828                                unsigned long  << 
1829                                bool wp_async) << 
1830 {                                             << 
1831         VMA_ITERATOR(vmi, ctx->mm, start);    << 
1832         struct vm_area_struct *prev = vma_pre << 
1833         unsigned long vma_end;                << 
1834         unsigned long new_flags;              << 
1835                                               << 
1836         if (vma->vm_start < start)            << 
1837                 prev = vma;                   << 
1838                                               << 
1839         for_each_vma_range(vmi, vma, end) {   << 
1840                 cond_resched();               << 
1841                                               << 
1842                 BUG_ON(!vma_can_userfault(vma << 
1843                 BUG_ON(vma->vm_userfaultfd_ct << 
1844                        vma->vm_userfaultfd_ct << 
1845                 WARN_ON(!(vma->vm_flags & VM_ << 
1846                                               << 
1847                 /*                            << 
1848                  * Nothing to do: this vma is << 
1849                  * userfaultfd and with the r << 
1850                  */                           << 
1851                 if (vma->vm_userfaultfd_ctx.c << 
1852                     (vma->vm_flags & vm_flags << 
1853                         goto skip;            << 
1854                                               << 
1855                 if (vma->vm_start > start)    << 
1856                         start = vma->vm_start << 
1857                 vma_end = min(end, vma->vm_en << 
1858                                               << 
1859                 new_flags = (vma->vm_flags &  << 
1860                 vma = vma_modify_flags_uffd(& << 
1861                                             n << 
1862                                             ( << 
1863                 if (IS_ERR(vma))              << 
1864                         return PTR_ERR(vma);  << 
1865                                               << 
1866                 /*                            << 
1867                  * In the vma_merge() success << 
1868                  * the next vma was merged in << 
1869                  * the current one has not be << 
1870                  */                           << 
1871                 userfaultfd_set_ctx(vma, ctx, << 
1872                                               << 
1873                 if (is_vm_hugetlb_page(vma) & << 
1874                         hugetlb_unshare_all_p << 
1875                                               << 
1876 skip:                                         << 
1877                 prev = vma;                   << 
1878                 start = vma->vm_end;          << 
1879         }                                     << 
1880                                               << 
1881         return 0;                             << 
1882 }                                             << 
1883                                               << 
1884 void userfaultfd_release_new(struct userfault << 
1885 {                                             << 
1886         struct mm_struct *mm = ctx->mm;       << 
1887         struct vm_area_struct *vma;           << 
1888         VMA_ITERATOR(vmi, mm, 0);             << 
1889                                               << 
1890         /* the various vma->vm_userfaultfd_ct << 
1891         mmap_write_lock(mm);                  << 
1892         for_each_vma(vmi, vma) {              << 
1893                 if (vma->vm_userfaultfd_ctx.c << 
1894                         userfaultfd_reset_ctx << 
1895         }                                     << 
1896         mmap_write_unlock(mm);                << 
1897 }                                             << 
1898                                               << 
1899 void userfaultfd_release_all(struct mm_struct << 
1900                              struct userfault << 
1901 {                                             << 
1902         struct vm_area_struct *vma, *prev;    << 
1903         VMA_ITERATOR(vmi, mm, 0);             << 
1904                                               << 
1905         if (!mmget_not_zero(mm))              << 
1906                 return;                       << 
1907                                               << 
1908         /*                                    << 
1909          * Flush page faults out of all CPUs. << 
1910          * must be retried without returning  << 
1911          * userfaultfd_ctx_get() succeeds but << 
1912          * changes while handle_userfault rel << 
1913          * it's critical that released is set << 
1914          * taking the mmap_lock for writing.  << 
1915          */                                   << 
1916         mmap_write_lock(mm);                  << 
1917         prev = NULL;                          << 
1918         for_each_vma(vmi, vma) {              << 
1919                 cond_resched();               << 
1920                 BUG_ON(!!vma->vm_userfaultfd_ << 
1921                        !!(vma->vm_flags & __V << 
1922                 if (vma->vm_userfaultfd_ctx.c << 
1923                         prev = vma;           << 
1924                         continue;             << 
1925                 }                             << 
1926                                               << 
1927                 vma = userfaultfd_clear_vma(& << 
1928                                             v << 
1929                 prev = vma;                   << 
1930         }                                     << 
1931         mmap_write_unlock(mm);                << 
1932         mmput(mm);                            << 
1933 }                                                631 }
1934                                                  632 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php