~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/mm/rmap.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /mm/rmap.c (Version linux-6.12-rc7) and /mm/rmap.c (Version linux-3.10.108)


  1 /*                                                  1 /*
  2  * mm/rmap.c - physical to virtual reverse map      2  * mm/rmap.c - physical to virtual reverse mappings
  3  *                                                  3  *
  4  * Copyright 2001, Rik van Riel <riel@conectiv      4  * Copyright 2001, Rik van Riel <riel@conectiva.com.br>
  5  * Released under the General Public License (      5  * Released under the General Public License (GPL).
  6  *                                                  6  *
  7  * Simple, low overhead reverse mapping scheme      7  * Simple, low overhead reverse mapping scheme.
  8  * Please try to keep this thing as modular as      8  * Please try to keep this thing as modular as possible.
  9  *                                                  9  *
 10  * Provides methods for unmapping each kind of     10  * Provides methods for unmapping each kind of mapped page:
 11  * the anon methods track anonymous pages, and     11  * the anon methods track anonymous pages, and
 12  * the file methods track pages belonging to a     12  * the file methods track pages belonging to an inode.
 13  *                                                 13  *
 14  * Original design by Rik van Riel <riel@conec     14  * Original design by Rik van Riel <riel@conectiva.com.br> 2001
 15  * File methods by Dave McCracken <dmccr@us.ib     15  * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
 16  * Anonymous methods by Andrea Arcangeli <andr     16  * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
 17  * Contributions by Hugh Dickins 2003, 2004        17  * Contributions by Hugh Dickins 2003, 2004
 18  */                                                18  */
 19                                                    19 
 20 /*                                                 20 /*
 21  * Lock ordering in mm:                            21  * Lock ordering in mm:
 22  *                                                 22  *
 23  * inode->i_rwsem       (while writing or trun !!  23  * inode->i_mutex       (while writing or truncating, not reading or faulting)
 24  *   mm->mmap_lock                             !!  24  *   mm->mmap_sem
 25  *     mapping->invalidate_lock (in filemap_fa !!  25  *     page->flags PG_locked (lock_page)
 26  *       folio_lock                            !!  26  *       mapping->i_mmap_mutex
 27  *         hugetlbfs_i_mmap_rwsem_key (in huge !!  27  *         anon_vma->rwsem
 28  *           vma_start_write                   !!  28  *           mm->page_table_lock or pte_lock
 29  *             mapping->i_mmap_rwsem           !!  29  *             zone->lru_lock (in mark_page_accessed, isolate_lru_page)
 30  *               anon_vma->rwsem               !!  30  *             swap_lock (in swap_duplicate, swap_info_get)
 31  *                 mm->page_table_lock or pte_ !!  31  *               mmlist_lock (in mmput, drain_mmlist and others)
 32  *                   swap_lock (in swap_duplic !!  32  *               mapping->private_lock (in __set_page_dirty_buffers)
 33  *                     mmlist_lock (in mmput,  !!  33  *               inode->i_lock (in set_page_dirty's __mark_inode_dirty)
 34  *                     mapping->private_lock ( !!  34  *               bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
 35  *                       folio_lock_memcg move !!  35  *                 sb_lock (within inode_lock in fs/fs-writeback.c)
 36  *                         i_pages lock (widel !!  36  *                 mapping->tree_lock (widely used, in set_page_dirty,
 37  *                           lruvec->lru_lock  !!  37  *                           in arch-dependent flush_dcache_mmap_lock,
 38  *                     inode->i_lock (in set_p !!  38  *                           within bdi.wb->list_lock in __sync_single_inode)
 39  *                     bdi.wb->list_lock (in s << 
 40  *                       sb_lock (within inode << 
 41  *                       i_pages lock (widely  << 
 42  *                                 in arch-dep << 
 43  *                                 within bdi. << 
 44  *                                                 39  *
 45  * anon_vma->rwsem,mapping->i_mmap_rwsem   (me !!  40  * anon_vma->rwsem,mapping->i_mutex      (memory_failure, collect_procs_anon)
 46  *   ->tasklist_lock                               41  *   ->tasklist_lock
 47  *     pte map lock                                42  *     pte map lock
 48  *                                             << 
 49  * hugetlbfs PageHuge() take locks in this ord << 
 50  *   hugetlb_fault_mutex (hugetlbfs specific p << 
 51  *     vma_lock (hugetlb specific lock for pmd << 
 52  *       mapping->i_mmap_rwsem (also used for  << 
 53  *         folio_lock                          << 
 54  */                                                43  */
 55                                                    44 
 56 #include <linux/mm.h>                              45 #include <linux/mm.h>
 57 #include <linux/sched/mm.h>                    << 
 58 #include <linux/sched/task.h>                  << 
 59 #include <linux/pagemap.h>                         46 #include <linux/pagemap.h>
 60 #include <linux/swap.h>                            47 #include <linux/swap.h>
 61 #include <linux/swapops.h>                         48 #include <linux/swapops.h>
 62 #include <linux/slab.h>                            49 #include <linux/slab.h>
 63 #include <linux/init.h>                            50 #include <linux/init.h>
 64 #include <linux/ksm.h>                             51 #include <linux/ksm.h>
 65 #include <linux/rmap.h>                            52 #include <linux/rmap.h>
 66 #include <linux/rcupdate.h>                        53 #include <linux/rcupdate.h>
 67 #include <linux/export.h>                          54 #include <linux/export.h>
 68 #include <linux/memcontrol.h>                      55 #include <linux/memcontrol.h>
 69 #include <linux/mmu_notifier.h>                    56 #include <linux/mmu_notifier.h>
 70 #include <linux/migrate.h>                         57 #include <linux/migrate.h>
 71 #include <linux/hugetlb.h>                         58 #include <linux/hugetlb.h>
 72 #include <linux/huge_mm.h>                     << 
 73 #include <linux/backing-dev.h>                     59 #include <linux/backing-dev.h>
 74 #include <linux/page_idle.h>                   << 
 75 #include <linux/memremap.h>                    << 
 76 #include <linux/userfaultfd_k.h>               << 
 77 #include <linux/mm_inline.h>                   << 
 78 #include <linux/oom.h>                         << 
 79                                                    60 
 80 #include <asm/tlbflush.h>                          61 #include <asm/tlbflush.h>
 81                                                    62 
 82 #define CREATE_TRACE_POINTS                    << 
 83 #include <trace/events/tlb.h>                  << 
 84 #include <trace/events/migrate.h>              << 
 85                                                << 
 86 #include "internal.h"                              63 #include "internal.h"
 87                                                    64 
 88 static struct kmem_cache *anon_vma_cachep;         65 static struct kmem_cache *anon_vma_cachep;
 89 static struct kmem_cache *anon_vma_chain_cache     66 static struct kmem_cache *anon_vma_chain_cachep;
 90                                                    67 
 91 static inline struct anon_vma *anon_vma_alloc(     68 static inline struct anon_vma *anon_vma_alloc(void)
 92 {                                                  69 {
 93         struct anon_vma *anon_vma;                 70         struct anon_vma *anon_vma;
 94                                                    71 
 95         anon_vma = kmem_cache_alloc(anon_vma_c     72         anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
 96         if (anon_vma) {                            73         if (anon_vma) {
 97                 atomic_set(&anon_vma->refcount     74                 atomic_set(&anon_vma->refcount, 1);
 98                 anon_vma->num_children = 0;    << 
 99                 anon_vma->num_active_vmas = 0; << 
100                 anon_vma->parent = anon_vma;   << 
101                 /*                                 75                 /*
102                  * Initialise the anon_vma roo     76                  * Initialise the anon_vma root to point to itself. If called
103                  * from fork, the root will be     77                  * from fork, the root will be reset to the parents anon_vma.
104                  */                                78                  */
105                 anon_vma->root = anon_vma;         79                 anon_vma->root = anon_vma;
106         }                                          80         }
107                                                    81 
108         return anon_vma;                           82         return anon_vma;
109 }                                                  83 }
110                                                    84 
111 static inline void anon_vma_free(struct anon_v     85 static inline void anon_vma_free(struct anon_vma *anon_vma)
112 {                                                  86 {
113         VM_BUG_ON(atomic_read(&anon_vma->refco     87         VM_BUG_ON(atomic_read(&anon_vma->refcount));
114                                                    88 
115         /*                                         89         /*
116          * Synchronize against folio_lock_anon !!  90          * Synchronize against page_lock_anon_vma_read() such that
117          * we can safely hold the lock without     91          * we can safely hold the lock without the anon_vma getting
118          * freed.                                  92          * freed.
119          *                                         93          *
120          * Relies on the full mb implied by th     94          * Relies on the full mb implied by the atomic_dec_and_test() from
121          * put_anon_vma() against the acquire      95          * put_anon_vma() against the acquire barrier implied by
122          * down_read_trylock() from folio_lock !!  96          * down_read_trylock() from page_lock_anon_vma_read(). This orders:
123          *                                         97          *
124          * folio_lock_anon_vma_read()   VS     !!  98          * page_lock_anon_vma_read()    VS      put_anon_vma()
125          *   down_read_trylock()                   99          *   down_read_trylock()                  atomic_dec_and_test()
126          *   LOCK                                 100          *   LOCK                                 MB
127          *   atomic_read()                        101          *   atomic_read()                        rwsem_is_locked()
128          *                                        102          *
129          * LOCK should suffice since the actua    103          * LOCK should suffice since the actual taking of the lock must
130          * happen _before_ what follows.          104          * happen _before_ what follows.
131          */                                       105          */
132         might_sleep();                            106         might_sleep();
133         if (rwsem_is_locked(&anon_vma->root->r    107         if (rwsem_is_locked(&anon_vma->root->rwsem)) {
134                 anon_vma_lock_write(anon_vma);    108                 anon_vma_lock_write(anon_vma);
135                 anon_vma_unlock_write(anon_vma    109                 anon_vma_unlock_write(anon_vma);
136         }                                         110         }
137                                                   111 
138         kmem_cache_free(anon_vma_cachep, anon_    112         kmem_cache_free(anon_vma_cachep, anon_vma);
139 }                                                 113 }
140                                                   114 
141 static inline struct anon_vma_chain *anon_vma_    115 static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
142 {                                                 116 {
143         return kmem_cache_alloc(anon_vma_chain    117         return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
144 }                                                 118 }
145                                                   119 
146 static void anon_vma_chain_free(struct anon_vm    120 static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
147 {                                                 121 {
148         kmem_cache_free(anon_vma_chain_cachep,    122         kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
149 }                                                 123 }
150                                                   124 
151 static void anon_vma_chain_link(struct vm_area    125 static void anon_vma_chain_link(struct vm_area_struct *vma,
152                                 struct anon_vm    126                                 struct anon_vma_chain *avc,
153                                 struct anon_vm    127                                 struct anon_vma *anon_vma)
154 {                                                 128 {
155         avc->vma = vma;                           129         avc->vma = vma;
156         avc->anon_vma = anon_vma;                 130         avc->anon_vma = anon_vma;
157         list_add(&avc->same_vma, &vma->anon_vm    131         list_add(&avc->same_vma, &vma->anon_vma_chain);
158         anon_vma_interval_tree_insert(avc, &an    132         anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
159 }                                                 133 }
160                                                   134 
161 /**                                               135 /**
162  * __anon_vma_prepare - attach an anon_vma to  !! 136  * anon_vma_prepare - attach an anon_vma to a memory region
163  * @vma: the memory region in question            137  * @vma: the memory region in question
164  *                                                138  *
165  * This makes sure the memory mapping describe    139  * This makes sure the memory mapping described by 'vma' has
166  * an 'anon_vma' attached to it, so that we ca    140  * an 'anon_vma' attached to it, so that we can associate the
167  * anonymous pages mapped into it with that an    141  * anonymous pages mapped into it with that anon_vma.
168  *                                                142  *
169  * The common case will be that we already hav !! 143  * The common case will be that we already have one, but if
170  * is handled inline by anon_vma_prepare(). Bu << 
171  * not we either need to find an adjacent mapp    144  * not we either need to find an adjacent mapping that we
172  * can re-use the anon_vma from (very common w    145  * can re-use the anon_vma from (very common when the only
173  * reason for splitting a vma has been mprotec    146  * reason for splitting a vma has been mprotect()), or we
174  * allocate a new one.                            147  * allocate a new one.
175  *                                                148  *
176  * Anon-vma allocations are very subtle, becau    149  * Anon-vma allocations are very subtle, because we may have
177  * optimistically looked up an anon_vma in fol !! 150  * optimistically looked up an anon_vma in page_lock_anon_vma_read()
178  * and that may actually touch the rwsem even  !! 151  * and that may actually touch the spinlock even in the newly
179  * allocated vma (it depends on RCU to make su    152  * allocated vma (it depends on RCU to make sure that the
180  * anon_vma isn't actually destroyed).            153  * anon_vma isn't actually destroyed).
181  *                                                154  *
182  * As a result, we need to do proper anon_vma     155  * As a result, we need to do proper anon_vma locking even
183  * for the new allocation. At the same time, w    156  * for the new allocation. At the same time, we do not want
184  * to do any locking for the common case of al    157  * to do any locking for the common case of already having
185  * an anon_vma.                                   158  * an anon_vma.
                                                   >> 159  *
                                                   >> 160  * This must be called with the mmap_sem held for reading.
186  */                                               161  */
187 int __anon_vma_prepare(struct vm_area_struct * !! 162 int anon_vma_prepare(struct vm_area_struct *vma)
188 {                                                 163 {
189         struct mm_struct *mm = vma->vm_mm;     !! 164         struct anon_vma *anon_vma = vma->anon_vma;
190         struct anon_vma *anon_vma, *allocated; << 
191         struct anon_vma_chain *avc;               165         struct anon_vma_chain *avc;
192                                                   166 
193         mmap_assert_locked(mm);                << 
194         might_sleep();                            167         might_sleep();
                                                   >> 168         if (unlikely(!anon_vma)) {
                                                   >> 169                 struct mm_struct *mm = vma->vm_mm;
                                                   >> 170                 struct anon_vma *allocated;
                                                   >> 171 
                                                   >> 172                 avc = anon_vma_chain_alloc(GFP_KERNEL);
                                                   >> 173                 if (!avc)
                                                   >> 174                         goto out_enomem;
195                                                   175 
196         avc = anon_vma_chain_alloc(GFP_KERNEL) !! 176                 anon_vma = find_mergeable_anon_vma(vma);
197         if (!avc)                              << 
198                 goto out_enomem;               << 
199                                                << 
200         anon_vma = find_mergeable_anon_vma(vma << 
201         allocated = NULL;                      << 
202         if (!anon_vma) {                       << 
203                 anon_vma = anon_vma_alloc();   << 
204                 if (unlikely(!anon_vma))       << 
205                         goto out_enomem_free_a << 
206                 anon_vma->num_children++; /* s << 
207                 allocated = anon_vma;          << 
208         }                                      << 
209                                                << 
210         anon_vma_lock_write(anon_vma);         << 
211         /* page_table_lock to protect against  << 
212         spin_lock(&mm->page_table_lock);       << 
213         if (likely(!vma->anon_vma)) {          << 
214                 vma->anon_vma = anon_vma;      << 
215                 anon_vma_chain_link(vma, avc,  << 
216                 anon_vma->num_active_vmas++;   << 
217                 allocated = NULL;                 177                 allocated = NULL;
218                 avc = NULL;                    !! 178                 if (!anon_vma) {
219         }                                      !! 179                         anon_vma = anon_vma_alloc();
220         spin_unlock(&mm->page_table_lock);     !! 180                         if (unlikely(!anon_vma))
221         anon_vma_unlock_write(anon_vma);       !! 181                                 goto out_enomem_free_avc;
                                                   >> 182                         allocated = anon_vma;
                                                   >> 183                 }
222                                                   184 
223         if (unlikely(allocated))               !! 185                 anon_vma_lock_write(anon_vma);
224                 put_anon_vma(allocated);       !! 186                 /* page_table_lock to protect against threads */
225         if (unlikely(avc))                     !! 187                 spin_lock(&mm->page_table_lock);
226                 anon_vma_chain_free(avc);      !! 188                 if (likely(!vma->anon_vma)) {
                                                   >> 189                         vma->anon_vma = anon_vma;
                                                   >> 190                         anon_vma_chain_link(vma, avc, anon_vma);
                                                   >> 191                         allocated = NULL;
                                                   >> 192                         avc = NULL;
                                                   >> 193                 }
                                                   >> 194                 spin_unlock(&mm->page_table_lock);
                                                   >> 195                 anon_vma_unlock_write(anon_vma);
227                                                   196 
                                                   >> 197                 if (unlikely(allocated))
                                                   >> 198                         put_anon_vma(allocated);
                                                   >> 199                 if (unlikely(avc))
                                                   >> 200                         anon_vma_chain_free(avc);
                                                   >> 201         }
228         return 0;                                 202         return 0;
229                                                   203 
230  out_enomem_free_avc:                             204  out_enomem_free_avc:
231         anon_vma_chain_free(avc);                 205         anon_vma_chain_free(avc);
232  out_enomem:                                      206  out_enomem:
233         return -ENOMEM;                           207         return -ENOMEM;
234 }                                                 208 }
235                                                   209 
236 /*                                                210 /*
237  * This is a useful helper function for lockin    211  * This is a useful helper function for locking the anon_vma root as
238  * we traverse the vma->anon_vma_chain, loopin    212  * we traverse the vma->anon_vma_chain, looping over anon_vma's that
239  * have the same vma.                             213  * have the same vma.
240  *                                                214  *
241  * Such anon_vma's should have the same root,     215  * Such anon_vma's should have the same root, so you'd expect to see
242  * just a single mutex_lock for the whole trav    216  * just a single mutex_lock for the whole traversal.
243  */                                               217  */
244 static inline struct anon_vma *lock_anon_vma_r    218 static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
245 {                                                 219 {
246         struct anon_vma *new_root = anon_vma->    220         struct anon_vma *new_root = anon_vma->root;
247         if (new_root != root) {                   221         if (new_root != root) {
248                 if (WARN_ON_ONCE(root))           222                 if (WARN_ON_ONCE(root))
249                         up_write(&root->rwsem)    223                         up_write(&root->rwsem);
250                 root = new_root;                  224                 root = new_root;
251                 down_write(&root->rwsem);         225                 down_write(&root->rwsem);
252         }                                         226         }
253         return root;                              227         return root;
254 }                                                 228 }
255                                                   229 
256 static inline void unlock_anon_vma_root(struct    230 static inline void unlock_anon_vma_root(struct anon_vma *root)
257 {                                                 231 {
258         if (root)                                 232         if (root)
259                 up_write(&root->rwsem);           233                 up_write(&root->rwsem);
260 }                                                 234 }
261                                                   235 
262 /*                                                236 /*
263  * Attach the anon_vmas from src to dst.          237  * Attach the anon_vmas from src to dst.
264  * Returns 0 on success, -ENOMEM on failure.      238  * Returns 0 on success, -ENOMEM on failure.
265  *                                             << 
266  * anon_vma_clone() is called by vma_expand(), << 
267  * copy_vma() and anon_vma_fork(). The first f << 
268  * while the last one, anon_vma_fork(), may tr << 
269  * prevent endless growth of anon_vma. Since d << 
270  * call, we can identify this case by checking << 
271  * src->anon_vma).                             << 
272  *                                             << 
273  * If (!dst->anon_vma && src->anon_vma) is tru << 
274  * and reuse existing anon_vma which has no vm << 
275  * This prevents degradation of anon_vma hiera << 
276  * case of constantly forking task. On the oth << 
277  * than one child isn't reused even if there w << 
278  * walker has a good chance of avoiding scanni << 
279  * searches where page is mapped.              << 
280  */                                               239  */
281 int anon_vma_clone(struct vm_area_struct *dst,    240 int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
282 {                                                 241 {
283         struct anon_vma_chain *avc, *pavc;        242         struct anon_vma_chain *avc, *pavc;
284         struct anon_vma *root = NULL;             243         struct anon_vma *root = NULL;
285                                                   244 
286         list_for_each_entry_reverse(pavc, &src    245         list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
287                 struct anon_vma *anon_vma;        246                 struct anon_vma *anon_vma;
288                                                   247 
289                 avc = anon_vma_chain_alloc(GFP    248                 avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
290                 if (unlikely(!avc)) {             249                 if (unlikely(!avc)) {
291                         unlock_anon_vma_root(r    250                         unlock_anon_vma_root(root);
292                         root = NULL;              251                         root = NULL;
293                         avc = anon_vma_chain_a    252                         avc = anon_vma_chain_alloc(GFP_KERNEL);
294                         if (!avc)                 253                         if (!avc)
295                                 goto enomem_fa    254                                 goto enomem_failure;
296                 }                                 255                 }
297                 anon_vma = pavc->anon_vma;        256                 anon_vma = pavc->anon_vma;
298                 root = lock_anon_vma_root(root    257                 root = lock_anon_vma_root(root, anon_vma);
299                 anon_vma_chain_link(dst, avc,     258                 anon_vma_chain_link(dst, avc, anon_vma);
300                                                << 
301                 /*                             << 
302                  * Reuse existing anon_vma if  << 
303                  * anon_vma child.             << 
304                  *                             << 
305                  * Root anon_vma is never reus << 
306                  * it has self-parent referenc << 
307                  */                            << 
308                 if (!dst->anon_vma && src->ano << 
309                     anon_vma->num_children < 2 << 
310                     anon_vma->num_active_vmas  << 
311                         dst->anon_vma = anon_v << 
312         }                                         259         }
313         if (dst->anon_vma)                     << 
314                 dst->anon_vma->num_active_vmas << 
315         unlock_anon_vma_root(root);               260         unlock_anon_vma_root(root);
316         return 0;                                 261         return 0;
317                                                   262 
318  enomem_failure:                                  263  enomem_failure:
319         /*                                     << 
320          * dst->anon_vma is dropped here other << 
321          * be incorrectly decremented in unlin << 
322          * We can safely do this because calle << 
323          * about dst->anon_vma if anon_vma_clo << 
324          */                                    << 
325         dst->anon_vma = NULL;                  << 
326         unlink_anon_vmas(dst);                    264         unlink_anon_vmas(dst);
327         return -ENOMEM;                           265         return -ENOMEM;
328 }                                                 266 }
329                                                   267 
330 /*                                                268 /*
331  * Attach vma to its own anon_vma, as well as     269  * Attach vma to its own anon_vma, as well as to the anon_vmas that
332  * the corresponding VMA in the parent process    270  * the corresponding VMA in the parent process is attached to.
333  * Returns 0 on success, non-zero on failure.     271  * Returns 0 on success, non-zero on failure.
334  */                                               272  */
335 int anon_vma_fork(struct vm_area_struct *vma,     273 int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
336 {                                                 274 {
337         struct anon_vma_chain *avc;               275         struct anon_vma_chain *avc;
338         struct anon_vma *anon_vma;                276         struct anon_vma *anon_vma;
339         int error;                             << 
340                                                   277 
341         /* Don't bother if the parent process     278         /* Don't bother if the parent process has no anon_vma here. */
342         if (!pvma->anon_vma)                      279         if (!pvma->anon_vma)
343                 return 0;                         280                 return 0;
344                                                   281 
345         /* Drop inherited anon_vma, we'll reus << 
346         vma->anon_vma = NULL;                  << 
347                                                << 
348         /*                                        282         /*
349          * First, attach the new VMA to the pa    283          * First, attach the new VMA to the parent VMA's anon_vmas,
350          * so rmap can find non-COWed pages in    284          * so rmap can find non-COWed pages in child processes.
351          */                                       285          */
352         error = anon_vma_clone(vma, pvma);     !! 286         if (anon_vma_clone(vma, pvma))
353         if (error)                             !! 287                 return -ENOMEM;
354                 return error;                  << 
355                                                << 
356         /* An existing anon_vma has been reuse << 
357         if (vma->anon_vma)                     << 
358                 return 0;                      << 
359                                                   288 
360         /* Then add our own anon_vma. */          289         /* Then add our own anon_vma. */
361         anon_vma = anon_vma_alloc();              290         anon_vma = anon_vma_alloc();
362         if (!anon_vma)                            291         if (!anon_vma)
363                 goto out_error;                   292                 goto out_error;
364         anon_vma->num_active_vmas++;           << 
365         avc = anon_vma_chain_alloc(GFP_KERNEL)    293         avc = anon_vma_chain_alloc(GFP_KERNEL);
366         if (!avc)                                 294         if (!avc)
367                 goto out_error_free_anon_vma;     295                 goto out_error_free_anon_vma;
368                                                   296 
369         /*                                        297         /*
370          * The root anon_vma's rwsem is the lo !! 298          * The root anon_vma's spinlock is the lock actually used when we
371          * lock any of the anon_vmas in this a    299          * lock any of the anon_vmas in this anon_vma tree.
372          */                                       300          */
373         anon_vma->root = pvma->anon_vma->root;    301         anon_vma->root = pvma->anon_vma->root;
374         anon_vma->parent = pvma->anon_vma;     << 
375         /*                                        302         /*
376          * With refcounts, an anon_vma can sta    303          * With refcounts, an anon_vma can stay around longer than the
377          * process it belongs to. The root ano    304          * process it belongs to. The root anon_vma needs to be pinned until
378          * this anon_vma is freed, because the    305          * this anon_vma is freed, because the lock lives in the root.
379          */                                       306          */
380         get_anon_vma(anon_vma->root);             307         get_anon_vma(anon_vma->root);
381         /* Mark this anon_vma as the one where    308         /* Mark this anon_vma as the one where our new (COWed) pages go. */
382         vma->anon_vma = anon_vma;                 309         vma->anon_vma = anon_vma;
383         anon_vma_lock_write(anon_vma);            310         anon_vma_lock_write(anon_vma);
384         anon_vma_chain_link(vma, avc, anon_vma    311         anon_vma_chain_link(vma, avc, anon_vma);
385         anon_vma->parent->num_children++;      << 
386         anon_vma_unlock_write(anon_vma);          312         anon_vma_unlock_write(anon_vma);
387                                                   313 
388         return 0;                                 314         return 0;
389                                                   315 
390  out_error_free_anon_vma:                         316  out_error_free_anon_vma:
391         put_anon_vma(anon_vma);                   317         put_anon_vma(anon_vma);
392  out_error:                                       318  out_error:
393         unlink_anon_vmas(vma);                    319         unlink_anon_vmas(vma);
394         return -ENOMEM;                           320         return -ENOMEM;
395 }                                                 321 }
396                                                   322 
397 void unlink_anon_vmas(struct vm_area_struct *v    323 void unlink_anon_vmas(struct vm_area_struct *vma)
398 {                                                 324 {
399         struct anon_vma_chain *avc, *next;        325         struct anon_vma_chain *avc, *next;
400         struct anon_vma *root = NULL;             326         struct anon_vma *root = NULL;
401                                                   327 
402         /*                                        328         /*
403          * Unlink each anon_vma chained to the    329          * Unlink each anon_vma chained to the VMA.  This list is ordered
404          * from newest to oldest, ensuring the    330          * from newest to oldest, ensuring the root anon_vma gets freed last.
405          */                                       331          */
406         list_for_each_entry_safe(avc, next, &v    332         list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
407                 struct anon_vma *anon_vma = av    333                 struct anon_vma *anon_vma = avc->anon_vma;
408                                                   334 
409                 root = lock_anon_vma_root(root    335                 root = lock_anon_vma_root(root, anon_vma);
410                 anon_vma_interval_tree_remove(    336                 anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
411                                                   337 
412                 /*                                338                 /*
413                  * Leave empty anon_vmas on th    339                  * Leave empty anon_vmas on the list - we'll need
414                  * to free them outside the lo    340                  * to free them outside the lock.
415                  */                               341                  */
416                 if (RB_EMPTY_ROOT(&anon_vma->r !! 342                 if (RB_EMPTY_ROOT(&anon_vma->rb_root))
417                         anon_vma->parent->num_ << 
418                         continue;                 343                         continue;
419                 }                              << 
420                                                   344 
421                 list_del(&avc->same_vma);         345                 list_del(&avc->same_vma);
422                 anon_vma_chain_free(avc);         346                 anon_vma_chain_free(avc);
423         }                                         347         }
424         if (vma->anon_vma) {                   << 
425                 vma->anon_vma->num_active_vmas << 
426                                                << 
427                 /*                             << 
428                  * vma would still be needed a << 
429                  * when handle fault.          << 
430                  */                            << 
431                 vma->anon_vma = NULL;          << 
432         }                                      << 
433         unlock_anon_vma_root(root);               348         unlock_anon_vma_root(root);
434                                                   349 
435         /*                                        350         /*
436          * Iterate the list once more, it now     351          * Iterate the list once more, it now only contains empty and unlinked
437          * anon_vmas, destroy them. Could not     352          * anon_vmas, destroy them. Could not do before due to __put_anon_vma()
438          * needing to write-acquire the anon_v    353          * needing to write-acquire the anon_vma->root->rwsem.
439          */                                       354          */
440         list_for_each_entry_safe(avc, next, &v    355         list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
441                 struct anon_vma *anon_vma = av    356                 struct anon_vma *anon_vma = avc->anon_vma;
442                                                   357 
443                 VM_WARN_ON(anon_vma->num_child << 
444                 VM_WARN_ON(anon_vma->num_activ << 
445                 put_anon_vma(anon_vma);           358                 put_anon_vma(anon_vma);
446                                                   359 
447                 list_del(&avc->same_vma);         360                 list_del(&avc->same_vma);
448                 anon_vma_chain_free(avc);         361                 anon_vma_chain_free(avc);
449         }                                         362         }
450 }                                                 363 }
451                                                   364 
452 static void anon_vma_ctor(void *data)             365 static void anon_vma_ctor(void *data)
453 {                                                 366 {
454         struct anon_vma *anon_vma = data;         367         struct anon_vma *anon_vma = data;
455                                                   368 
456         init_rwsem(&anon_vma->rwsem);             369         init_rwsem(&anon_vma->rwsem);
457         atomic_set(&anon_vma->refcount, 0);       370         atomic_set(&anon_vma->refcount, 0);
458         anon_vma->rb_root = RB_ROOT_CACHED;    !! 371         anon_vma->rb_root = RB_ROOT;
459 }                                                 372 }
460                                                   373 
461 void __init anon_vma_init(void)                   374 void __init anon_vma_init(void)
462 {                                                 375 {
463         anon_vma_cachep = kmem_cache_create("a    376         anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
464                         0, SLAB_TYPESAFE_BY_RC !! 377                         0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
465                         anon_vma_ctor);        !! 378         anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
466         anon_vma_chain_cachep = KMEM_CACHE(ano << 
467                         SLAB_PANIC|SLAB_ACCOUN << 
468 }                                                 379 }
469                                                   380 
470 /*                                                381 /*
471  * Getting a lock on a stable anon_vma from a     382  * Getting a lock on a stable anon_vma from a page off the LRU is tricky!
472  *                                                383  *
473  * Since there is no serialization what so eve !! 384  * Since there is no serialization what so ever against page_remove_rmap()
474  * the best this function can do is return a r !! 385  * the best this function can do is return a locked anon_vma that might
475  * that might have been relevant to this page. !! 386  * have been relevant to this page.
476  *                                                387  *
477  * The page might have been remapped to a diff    388  * The page might have been remapped to a different anon_vma or the anon_vma
478  * returned may already be freed (and even reu    389  * returned may already be freed (and even reused).
479  *                                                390  *
480  * In case it was remapped to a different anon    391  * In case it was remapped to a different anon_vma, the new anon_vma will be a
481  * child of the old anon_vma, and the anon_vma    392  * child of the old anon_vma, and the anon_vma lifetime rules will therefore
482  * ensure that any anon_vma obtained from the     393  * ensure that any anon_vma obtained from the page will still be valid for as
483  * long as we observe page_mapped() [ hence al    394  * long as we observe page_mapped() [ hence all those page_mapped() tests ].
484  *                                                395  *
485  * All users of this function must be very car    396  * All users of this function must be very careful when walking the anon_vma
486  * chain and verify that the page in question     397  * chain and verify that the page in question is indeed mapped in it
487  * [ something equivalent to page_mapped_in_vm    398  * [ something equivalent to page_mapped_in_vma() ].
488  *                                                399  *
489  * Since anon_vma's slab is SLAB_TYPESAFE_BY_R !! 400  * Since anon_vma's slab is DESTROY_BY_RCU and we know from page_remove_rmap()
490  * folio_remove_rmap_*() that the anon_vma poi !! 401  * that the anon_vma pointer from page->mapping is valid if there is a
491  * if there is a mapcount, we can dereference  !! 402  * mapcount, we can dereference the anon_vma after observing those.
492  * those.                                      << 
493  *                                             << 
494  * NOTE: the caller should normally hold folio << 
495  * not, the caller needs to double check the a << 
496  * taking the anon_vma lock for either read or << 
497  * concurrently without folio lock protection) << 
498  * which has already covered that, and comment << 
499  */                                               403  */
500 struct anon_vma *folio_get_anon_vma(struct fol !! 404 struct anon_vma *page_get_anon_vma(struct page *page)
501 {                                                 405 {
502         struct anon_vma *anon_vma = NULL;         406         struct anon_vma *anon_vma = NULL;
503         unsigned long anon_mapping;               407         unsigned long anon_mapping;
504                                                   408 
505         rcu_read_lock();                          409         rcu_read_lock();
506         anon_mapping = (unsigned long)READ_ONC !! 410         anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
507         if ((anon_mapping & PAGE_MAPPING_FLAGS    411         if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
508                 goto out;                         412                 goto out;
509         if (!folio_mapped(folio))              !! 413         if (!page_mapped(page))
510                 goto out;                         414                 goto out;
511                                                   415 
512         anon_vma = (struct anon_vma *) (anon_m    416         anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
513         if (!atomic_inc_not_zero(&anon_vma->re    417         if (!atomic_inc_not_zero(&anon_vma->refcount)) {
514                 anon_vma = NULL;                  418                 anon_vma = NULL;
515                 goto out;                         419                 goto out;
516         }                                         420         }
517                                                   421 
518         /*                                        422         /*
519          * If this folio is still mapped, then !! 423          * If this page is still mapped, then its anon_vma cannot have been
520          * freed.  But if it has been unmapped    424          * freed.  But if it has been unmapped, we have no security against the
521          * anon_vma structure being freed and     425          * anon_vma structure being freed and reused (for another anon_vma:
522          * SLAB_TYPESAFE_BY_RCU guarantees tha !! 426          * SLAB_DESTROY_BY_RCU guarantees that - so the atomic_inc_not_zero()
523          * above cannot corrupt).                 427          * above cannot corrupt).
524          */                                       428          */
525         if (!folio_mapped(folio)) {            !! 429         if (!page_mapped(page)) {
526                 rcu_read_unlock();                430                 rcu_read_unlock();
527                 put_anon_vma(anon_vma);           431                 put_anon_vma(anon_vma);
528                 return NULL;                      432                 return NULL;
529         }                                         433         }
530 out:                                              434 out:
531         rcu_read_unlock();                        435         rcu_read_unlock();
532                                                   436 
533         return anon_vma;                          437         return anon_vma;
534 }                                                 438 }
535                                                   439 
536 /*                                                440 /*
537  * Similar to folio_get_anon_vma() except it l !! 441  * Similar to page_get_anon_vma() except it locks the anon_vma.
538  *                                                442  *
539  * Its a little more complex as it tries to ke    443  * Its a little more complex as it tries to keep the fast path to a single
540  * atomic op -- the trylock. If we fail the tr    444  * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
541  * reference like with folio_get_anon_vma() an !! 445  * reference like with page_get_anon_vma() and then block on the mutex.
542  * on !rwc->try_lock case.                     << 
543  */                                               446  */
544 struct anon_vma *folio_lock_anon_vma_read(stru !! 447 struct anon_vma *page_lock_anon_vma_read(struct page *page)
545                                           stru << 
546 {                                                 448 {
547         struct anon_vma *anon_vma = NULL;         449         struct anon_vma *anon_vma = NULL;
548         struct anon_vma *root_anon_vma;           450         struct anon_vma *root_anon_vma;
549         unsigned long anon_mapping;               451         unsigned long anon_mapping;
550                                                   452 
551 retry:                                         << 
552         rcu_read_lock();                          453         rcu_read_lock();
553         anon_mapping = (unsigned long)READ_ONC !! 454         anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
554         if ((anon_mapping & PAGE_MAPPING_FLAGS    455         if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
555                 goto out;                         456                 goto out;
556         if (!folio_mapped(folio))              !! 457         if (!page_mapped(page))
557                 goto out;                         458                 goto out;
558                                                   459 
559         anon_vma = (struct anon_vma *) (anon_m    460         anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
560         root_anon_vma = READ_ONCE(anon_vma->ro !! 461         root_anon_vma = ACCESS_ONCE(anon_vma->root);
561         if (down_read_trylock(&root_anon_vma->    462         if (down_read_trylock(&root_anon_vma->rwsem)) {
562                 /*                                463                 /*
563                  * folio_move_anon_rmap() migh !! 464                  * If the page is still mapped, then this anon_vma is still
564                  * might not hold the folio lo << 
565                  */                            << 
566                 if (unlikely((unsigned long)RE << 
567                              anon_mapping)) {  << 
568                         up_read(&root_anon_vma << 
569                         rcu_read_unlock();     << 
570                         goto retry;            << 
571                 }                              << 
572                                                << 
573                 /*                             << 
574                  * If the folio is still mappe << 
575                  * its anon_vma, and holding t    465                  * its anon_vma, and holding the mutex ensures that it will
576                  * not go away, see anon_vma_f    466                  * not go away, see anon_vma_free().
577                  */                               467                  */
578                 if (!folio_mapped(folio)) {    !! 468                 if (!page_mapped(page)) {
579                         up_read(&root_anon_vma    469                         up_read(&root_anon_vma->rwsem);
580                         anon_vma = NULL;          470                         anon_vma = NULL;
581                 }                                 471                 }
582                 goto out;                         472                 goto out;
583         }                                         473         }
584                                                   474 
585         if (rwc && rwc->try_lock) {            << 
586                 anon_vma = NULL;               << 
587                 rwc->contended = true;         << 
588                 goto out;                      << 
589         }                                      << 
590                                                << 
591         /* trylock failed, we got to sleep */     475         /* trylock failed, we got to sleep */
592         if (!atomic_inc_not_zero(&anon_vma->re    476         if (!atomic_inc_not_zero(&anon_vma->refcount)) {
593                 anon_vma = NULL;                  477                 anon_vma = NULL;
594                 goto out;                         478                 goto out;
595         }                                         479         }
596                                                   480 
597         if (!folio_mapped(folio)) {            !! 481         if (!page_mapped(page)) {
598                 rcu_read_unlock();                482                 rcu_read_unlock();
599                 put_anon_vma(anon_vma);           483                 put_anon_vma(anon_vma);
600                 return NULL;                      484                 return NULL;
601         }                                         485         }
602                                                   486 
603         /* we pinned the anon_vma, its safe to    487         /* we pinned the anon_vma, its safe to sleep */
604         rcu_read_unlock();                        488         rcu_read_unlock();
605         anon_vma_lock_read(anon_vma);             489         anon_vma_lock_read(anon_vma);
606                                                   490 
607         /*                                     << 
608          * folio_move_anon_rmap() might have c << 
609          * not hold the folio lock here.       << 
610          */                                    << 
611         if (unlikely((unsigned long)READ_ONCE( << 
612                      anon_mapping)) {          << 
613                 anon_vma_unlock_read(anon_vma) << 
614                 put_anon_vma(anon_vma);        << 
615                 anon_vma = NULL;               << 
616                 goto retry;                    << 
617         }                                      << 
618                                                << 
619         if (atomic_dec_and_test(&anon_vma->ref    491         if (atomic_dec_and_test(&anon_vma->refcount)) {
620                 /*                                492                 /*
621                  * Oops, we held the last refc    493                  * Oops, we held the last refcount, release the lock
622                  * and bail -- can't simply us    494                  * and bail -- can't simply use put_anon_vma() because
623                  * we'll deadlock on the anon_    495                  * we'll deadlock on the anon_vma_lock_write() recursion.
624                  */                               496                  */
625                 anon_vma_unlock_read(anon_vma)    497                 anon_vma_unlock_read(anon_vma);
626                 __put_anon_vma(anon_vma);         498                 __put_anon_vma(anon_vma);
627                 anon_vma = NULL;                  499                 anon_vma = NULL;
628         }                                         500         }
629                                                   501 
630         return anon_vma;                          502         return anon_vma;
631                                                   503 
632 out:                                              504 out:
633         rcu_read_unlock();                        505         rcu_read_unlock();
634         return anon_vma;                          506         return anon_vma;
635 }                                                 507 }
636                                                   508 
637 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUS !! 509 void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
638 /*                                             << 
639  * Flush TLB entries for recently unmapped pag << 
640  * important if a PTE was dirty when it was un << 
641  * before any IO is initiated on the page to p << 
642  * it must be flushed before freeing to preven << 
643  */                                            << 
644 void try_to_unmap_flush(void)                  << 
645 {                                              << 
646         struct tlbflush_unmap_batch *tlb_ubc = << 
647                                                << 
648         if (!tlb_ubc->flush_required)          << 
649                 return;                        << 
650                                                << 
651         arch_tlbbatch_flush(&tlb_ubc->arch);   << 
652         tlb_ubc->flush_required = false;       << 
653         tlb_ubc->writable = false;             << 
654 }                                              << 
655                                                << 
656 /* Flush iff there are potentially writable TL << 
657 void try_to_unmap_flush_dirty(void)            << 
658 {                                                 510 {
659         struct tlbflush_unmap_batch *tlb_ubc = !! 511         anon_vma_unlock_read(anon_vma);
660                                                << 
661         if (tlb_ubc->writable)                 << 
662                 try_to_unmap_flush();          << 
663 }                                                 512 }
664                                                   513 
665 /*                                                514 /*
666  * Bits 0-14 of mm->tlb_flush_batched record p !! 515  * At what user virtual address is page expected in @vma?
667  * Bits 16-30 of mm->tlb_flush_batched bit rec << 
668  */                                               516  */
669 #define TLB_FLUSH_BATCH_FLUSHED_SHIFT   16     !! 517 static inline unsigned long
670 #define TLB_FLUSH_BATCH_PENDING_MASK           !! 518 __vma_address(struct page *page, struct vm_area_struct *vma)
671         ((1 << (TLB_FLUSH_BATCH_FLUSHED_SHIFT  !! 519 {
672 #define TLB_FLUSH_BATCH_PENDING_LARGE          !! 520         pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
673         (TLB_FLUSH_BATCH_PENDING_MASK / 2)     << 
674                                                << 
675 static void set_tlb_ubc_flush_pending(struct m << 
676                                       unsigned << 
677 {                                              << 
678         struct tlbflush_unmap_batch *tlb_ubc = << 
679         int batch;                             << 
680         bool writable = pte_dirty(pteval);     << 
681                                                << 
682         if (!pte_accessible(mm, pteval))       << 
683                 return;                        << 
684                                                << 
685         arch_tlbbatch_add_pending(&tlb_ubc->ar << 
686         tlb_ubc->flush_required = true;        << 
687                                                   521 
688         /*                                     !! 522         if (unlikely(is_vm_hugetlb_page(vma)))
689          * Ensure compiler does not re-order t !! 523                 pgoff = page->index << huge_page_order(page_hstate(page));
690          * before the PTE is cleared.          << 
691          */                                    << 
692         barrier();                             << 
693         batch = atomic_read(&mm->tlb_flush_bat << 
694 retry:                                         << 
695         if ((batch & TLB_FLUSH_BATCH_PENDING_M << 
696                 /*                             << 
697                  * Prevent `pending' from catc << 
698                  * overflow.  Reset `pending'  << 
699                  * `pending' becomes large.    << 
700                  */                            << 
701                 if (!atomic_try_cmpxchg(&mm->t << 
702                         goto retry;            << 
703         } else {                               << 
704                 atomic_inc(&mm->tlb_flush_batc << 
705         }                                      << 
706                                                   524 
707         /*                                     !! 525         return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
708          * If the PTE was dirty then it's best << 
709          * caller must use try_to_unmap_flush_ << 
710          * before the page is queued for IO.   << 
711          */                                    << 
712         if (writable)                          << 
713                 tlb_ubc->writable = true;      << 
714 }                                                 526 }
715                                                   527 
716 /*                                             !! 528 inline unsigned long
717  * Returns true if the TLB flush should be def !! 529 vma_address(struct page *page, struct vm_area_struct *vma)
718  * unmap operations to reduce IPIs.            << 
719  */                                            << 
720 static bool should_defer_flush(struct mm_struc << 
721 {                                                 530 {
722         if (!(flags & TTU_BATCH_FLUSH))        !! 531         unsigned long address = __vma_address(page, vma);
723                 return false;                  << 
724                                                << 
725         return arch_tlbbatch_should_defer(mm); << 
726 }                                              << 
727                                                   532 
728 /*                                             !! 533         /* page should be within @vma mapping range */
729  * Reclaim unmaps pages under the PTL but do n !! 534         VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
730  * releasing the PTL if TLB flushes are batche << 
731  * operation such as mprotect or munmap to rac << 
732  * the page and flushing the page. If this rac << 
733  * access to data via a stale TLB entry. Track << 
734  * batching in flight would be expensive durin << 
735  * whether TLB batching occurred in the past a << 
736  * if required. This will cost one additional  << 
737  * by the first operation at risk such as mpro << 
738  *                                             << 
739  * This must be called under the PTL so that a << 
740  * that is potentially a "reclaim vs mprotect/ << 
741  * via the PTL.                                << 
742  */                                            << 
743 void flush_tlb_batched_pending(struct mm_struc << 
744 {                                              << 
745         int batch = atomic_read(&mm->tlb_flush << 
746         int pending = batch & TLB_FLUSH_BATCH_ << 
747         int flushed = batch >> TLB_FLUSH_BATCH << 
748                                                << 
749         if (pending != flushed) {              << 
750                 arch_flush_tlb_batched_pending << 
751                 /*                             << 
752                  * If the new TLB flushing is  << 
753                  * mm->tlb_flush_batched as is << 
754                  */                            << 
755                 atomic_cmpxchg(&mm->tlb_flush_ << 
756                                pending | (pend << 
757         }                                      << 
758 }                                              << 
759 #else                                          << 
760 static void set_tlb_ubc_flush_pending(struct m << 
761                                       unsigned << 
762 {                                              << 
763 }                                              << 
764                                                   535 
765 static bool should_defer_flush(struct mm_struc !! 536         return address;
766 {                                              << 
767         return false;                          << 
768 }                                                 537 }
769 #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_F << 
770                                                   538 
771 /*                                                539 /*
772  * At what user virtual address is page expect    540  * At what user virtual address is page expected in vma?
773  * Caller should check the page is actually pa    541  * Caller should check the page is actually part of the vma.
774  */                                               542  */
775 unsigned long page_address_in_vma(struct page     543 unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
776 {                                                 544 {
777         struct folio *folio = page_folio(page) !! 545         unsigned long address;
778         pgoff_t pgoff;                         !! 546         if (PageAnon(page)) {
779                                                !! 547                 struct anon_vma *page__anon_vma = page_anon_vma(page);
780         if (folio_test_anon(folio)) {          << 
781                 struct anon_vma *page__anon_vm << 
782                 /*                                548                 /*
783                  * Note: swapoff's unuse_vma()    549                  * Note: swapoff's unuse_vma() is more efficient with this
784                  * check, and needs it to matc    550                  * check, and needs it to match anon_vma when KSM is active.
785                  */                               551                  */
786                 if (!vma->anon_vma || !page__a    552                 if (!vma->anon_vma || !page__anon_vma ||
787                     vma->anon_vma->root != pag    553                     vma->anon_vma->root != page__anon_vma->root)
788                         return -EFAULT;           554                         return -EFAULT;
789         } else if (!vma->vm_file) {            !! 555         } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
                                                   >> 556                 if (!vma->vm_file ||
                                                   >> 557                     vma->vm_file->f_mapping != page->mapping)
                                                   >> 558                         return -EFAULT;
                                                   >> 559         } else
790                 return -EFAULT;                   560                 return -EFAULT;
791         } else if (vma->vm_file->f_mapping !=  !! 561         address = __vma_address(page, vma);
                                                   >> 562         if (unlikely(address < vma->vm_start || address >= vma->vm_end))
792                 return -EFAULT;                   563                 return -EFAULT;
793         }                                      !! 564         return address;
794                                                << 
795         /* The !page__anon_vma above handles K << 
796         pgoff = folio->index + folio_page_idx( << 
797         return vma_address(vma, pgoff, 1);     << 
798 }                                                 565 }
799                                                   566 
800 /*                                             << 
801  * Returns the actual pmd_t* where we expect ' << 
802  * NULL if it doesn't exist.  No guarantees /  << 
803  * represents.                                 << 
804  */                                            << 
805 pmd_t *mm_find_pmd(struct mm_struct *mm, unsig    567 pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
806 {                                                 568 {
807         pgd_t *pgd;                               569         pgd_t *pgd;
808         p4d_t *p4d;                            << 
809         pud_t *pud;                               570         pud_t *pud;
810         pmd_t *pmd = NULL;                        571         pmd_t *pmd = NULL;
811                                                   572 
812         pgd = pgd_offset(mm, address);            573         pgd = pgd_offset(mm, address);
813         if (!pgd_present(*pgd))                   574         if (!pgd_present(*pgd))
814                 goto out;                         575                 goto out;
815                                                   576 
816         p4d = p4d_offset(pgd, address);        !! 577         pud = pud_offset(pgd, address);
817         if (!p4d_present(*p4d))                << 
818                 goto out;                      << 
819                                                << 
820         pud = pud_offset(p4d, address);        << 
821         if (!pud_present(*pud))                   578         if (!pud_present(*pud))
822                 goto out;                         579                 goto out;
823                                                   580 
824         pmd = pmd_offset(pud, address);           581         pmd = pmd_offset(pud, address);
                                                   >> 582         if (!pmd_present(*pmd))
                                                   >> 583                 pmd = NULL;
825 out:                                              584 out:
826         return pmd;                               585         return pmd;
827 }                                                 586 }
828                                                   587 
829 struct folio_referenced_arg {                  !! 588 /*
830         int mapcount;                          !! 589  * Check that @page is mapped at @address into @mm.
831         int referenced;                        !! 590  *
832         unsigned long vm_flags;                !! 591  * If @sync is false, page_check_address may perform a racy check to avoid
833         struct mem_cgroup *memcg;              !! 592  * the page table lock when the pte is not present (helpful when reclaiming
834 };                                             !! 593  * highly shared pages).
                                                   >> 594  *
                                                   >> 595  * On success returns with pte mapped and locked.
                                                   >> 596  */
                                                   >> 597 pte_t *__page_check_address(struct page *page, struct mm_struct *mm,
                                                   >> 598                           unsigned long address, spinlock_t **ptlp, int sync)
                                                   >> 599 {
                                                   >> 600         pmd_t *pmd;
                                                   >> 601         pte_t *pte;
                                                   >> 602         spinlock_t *ptl;
                                                   >> 603 
                                                   >> 604         if (unlikely(PageHuge(page))) {
                                                   >> 605                 /* when pud is not present, pte will be NULL */
                                                   >> 606                 pte = huge_pte_offset(mm, address);
                                                   >> 607                 if (!pte)
                                                   >> 608                         return NULL;
                                                   >> 609 
                                                   >> 610                 ptl = &mm->page_table_lock;
                                                   >> 611                 goto check;
                                                   >> 612         }
                                                   >> 613 
                                                   >> 614         pmd = mm_find_pmd(mm, address);
                                                   >> 615         if (!pmd)
                                                   >> 616                 return NULL;
                                                   >> 617 
                                                   >> 618         if (pmd_trans_huge(*pmd))
                                                   >> 619                 return NULL;
                                                   >> 620 
                                                   >> 621         pte = pte_offset_map(pmd, address);
                                                   >> 622         /* Make a quick check before getting the lock */
                                                   >> 623         if (!sync && !pte_present(*pte)) {
                                                   >> 624                 pte_unmap(pte);
                                                   >> 625                 return NULL;
                                                   >> 626         }
                                                   >> 627 
                                                   >> 628         ptl = pte_lockptr(mm, pmd);
                                                   >> 629 check:
                                                   >> 630         spin_lock(ptl);
                                                   >> 631         if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
                                                   >> 632                 *ptlp = ptl;
                                                   >> 633                 return pte;
                                                   >> 634         }
                                                   >> 635         pte_unmap_unlock(pte, ptl);
                                                   >> 636         return NULL;
                                                   >> 637 }
                                                   >> 638 
                                                   >> 639 /**
                                                   >> 640  * page_mapped_in_vma - check whether a page is really mapped in a VMA
                                                   >> 641  * @page: the page to test
                                                   >> 642  * @vma: the VMA to test
                                                   >> 643  *
                                                   >> 644  * Returns 1 if the page is mapped into the page tables of the VMA, 0
                                                   >> 645  * if the page is not mapped into the page tables of this VMA.  Only
                                                   >> 646  * valid for normal file or anonymous VMAs.
                                                   >> 647  */
                                                   >> 648 int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
                                                   >> 649 {
                                                   >> 650         unsigned long address;
                                                   >> 651         pte_t *pte;
                                                   >> 652         spinlock_t *ptl;
                                                   >> 653 
                                                   >> 654         address = __vma_address(page, vma);
                                                   >> 655         if (unlikely(address < vma->vm_start || address >= vma->vm_end))
                                                   >> 656                 return 0;
                                                   >> 657         pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
                                                   >> 658         if (!pte)                       /* the page is not in this mm */
                                                   >> 659                 return 0;
                                                   >> 660         pte_unmap_unlock(pte, ptl);
                                                   >> 661 
                                                   >> 662         return 1;
                                                   >> 663 }
835                                                   664 
836 /*                                                665 /*
837  * arg: folio_referenced_arg will be passed    !! 666  * Subfunctions of page_referenced: page_referenced_one called
                                                   >> 667  * repeatedly from either page_referenced_anon or page_referenced_file.
838  */                                               668  */
839 static bool folio_referenced_one(struct folio  !! 669 int page_referenced_one(struct page *page, struct vm_area_struct *vma,
840                 struct vm_area_struct *vma, un !! 670                         unsigned long address, unsigned int *mapcount,
                                                   >> 671                         unsigned long *vm_flags)
841 {                                                 672 {
842         struct folio_referenced_arg *pra = arg !! 673         struct mm_struct *mm = vma->vm_mm;
843         DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma << 
844         int referenced = 0;                       674         int referenced = 0;
845         unsigned long start = address, ptes =  << 
846                                                   675 
847         while (page_vma_mapped_walk(&pvmw)) {  !! 676         if (unlikely(PageTransHuge(page))) {
848                 address = pvmw.address;        !! 677                 pmd_t *pmd;
                                                   >> 678 
                                                   >> 679                 spin_lock(&mm->page_table_lock);
                                                   >> 680                 /*
                                                   >> 681                  * rmap might return false positives; we must filter
                                                   >> 682                  * these out using page_check_address_pmd().
                                                   >> 683                  */
                                                   >> 684                 pmd = page_check_address_pmd(page, mm, address,
                                                   >> 685                                              PAGE_CHECK_ADDRESS_PMD_FLAG);
                                                   >> 686                 if (!pmd) {
                                                   >> 687                         spin_unlock(&mm->page_table_lock);
                                                   >> 688                         goto out;
                                                   >> 689                 }
849                                                   690 
850                 if (vma->vm_flags & VM_LOCKED)    691                 if (vma->vm_flags & VM_LOCKED) {
851                         if (!folio_test_large( !! 692                         spin_unlock(&mm->page_table_lock);
852                                 /* Restore the !! 693                         *mapcount = 0;  /* break early from loop */
853                                 mlock_vma_foli !! 694                         *vm_flags |= VM_LOCKED;
854                                 page_vma_mappe !! 695                         goto out;
855                                 pra->vm_flags  << 
856                                 return false;  << 
857                         }                      << 
858                         /*                     << 
859                          * For large folio ful << 
860                          * be handled after th << 
861                          *                     << 
862                          * For large folio cro << 
863                          * expected to be pick << 
864                          * should skip referen << 
865                          * the range of VM_LOC << 
866                          * should just count t << 
867                          * the range of VM_LOC << 
868                          */                    << 
869                         ptes++;                << 
870                         pra->mapcount--;       << 
871                         continue;              << 
872                 }                                 696                 }
873                                                   697 
                                                   >> 698                 /* go ahead even if the pmd is pmd_trans_splitting() */
                                                   >> 699                 if (pmdp_clear_flush_young_notify(vma, address, pmd))
                                                   >> 700                         referenced++;
                                                   >> 701                 spin_unlock(&mm->page_table_lock);
                                                   >> 702         } else {
                                                   >> 703                 pte_t *pte;
                                                   >> 704                 spinlock_t *ptl;
                                                   >> 705 
874                 /*                                706                 /*
875                  * Skip the non-shared swapbac !! 707                  * rmap might return false positives; we must filter
876                  * the exiting or OOM-reaped p !! 708                  * these out using page_check_address().
877                  * swap-out followed by an imm << 
878                  */                               709                  */
879                 if ((!atomic_read(&vma->vm_mm- !! 710                 pte = page_check_address(page, mm, address, &ptl, 0);
880                     check_stable_address_space !! 711                 if (!pte)
881                     folio_test_anon(folio) &&  !! 712                         goto out;
882                     !folio_likely_mapped_share !! 713 
883                         pra->referenced = -1;  !! 714                 if (vma->vm_flags & VM_LOCKED) {
884                         page_vma_mapped_walk_d !! 715                         pte_unmap_unlock(pte, ptl);
885                         return false;          !! 716                         *mapcount = 0;  /* break early from loop */
                                                   >> 717                         *vm_flags |= VM_LOCKED;
                                                   >> 718                         goto out;
886                 }                                 719                 }
887                                                   720 
888                 if (lru_gen_enabled() && pvmw. !! 721                 if (ptep_clear_flush_young_notify(vma, address, pte)) {
889                         if (lru_gen_look_aroun !! 722                         /*
890                                 referenced++;  !! 723                          * Don't treat a reference through a sequentially read
891                 } else if (pvmw.pte) {         !! 724                          * mapping as such.  If the page has been used in
892                         if (ptep_clear_flush_y !! 725                          * another mapping, we will catch it; if this other
893                                                !! 726                          * mapping is already gone, the unmap path will have
894                                 referenced++;  !! 727                          * set PG_referenced or activated the page.
895                 } else if (IS_ENABLED(CONFIG_T !! 728                          */
896                         if (pmdp_clear_flush_y !! 729                         if (likely(!VM_SequentialReadHint(vma)))
897                                                << 
898                                 referenced++;     730                                 referenced++;
899                 } else {                       << 
900                         /* unexpected pmd-mapp << 
901                         WARN_ON_ONCE(1);       << 
902                 }                                 731                 }
903                                                !! 732                 pte_unmap_unlock(pte, ptl);
904                 pra->mapcount--;               << 
905         }                                         733         }
906                                                   734 
907         if ((vma->vm_flags & VM_LOCKED) &&     !! 735         (*mapcount)--;
908                         folio_test_large(folio << 
909                         folio_within_vma(folio << 
910                 unsigned long s_align, e_align << 
911                                                << 
912                 s_align = ALIGN_DOWN(start, PM << 
913                 e_align = ALIGN_DOWN(start + f << 
914                                                << 
915                 /* folio doesn't cross page ta << 
916                 if ((s_align == e_align) && (p << 
917                         /* Restore the mlock w << 
918                         mlock_vma_folio(folio, << 
919                         pra->vm_flags |= VM_LO << 
920                         return false; /* To br << 
921                 }                              << 
922         }                                      << 
923                                                   736 
924         if (referenced)                           737         if (referenced)
925                 folio_clear_idle(folio);       !! 738                 *vm_flags |= vma->vm_flags;
926         if (folio_test_clear_young(folio))     !! 739 out:
927                 referenced++;                  !! 740         return referenced;
                                                   >> 741 }
928                                                   742 
929         if (referenced) {                      !! 743 static int page_referenced_anon(struct page *page,
930                 pra->referenced++;             !! 744                                 struct mem_cgroup *memcg,
931                 pra->vm_flags |= vma->vm_flags !! 745                                 unsigned long *vm_flags)
932         }                                      !! 746 {
                                                   >> 747         unsigned int mapcount;
                                                   >> 748         struct anon_vma *anon_vma;
                                                   >> 749         pgoff_t pgoff;
                                                   >> 750         struct anon_vma_chain *avc;
                                                   >> 751         int referenced = 0;
                                                   >> 752 
                                                   >> 753         anon_vma = page_lock_anon_vma_read(page);
                                                   >> 754         if (!anon_vma)
                                                   >> 755                 return referenced;
933                                                   756 
934         if (!pra->mapcount)                    !! 757         mapcount = page_mapcount(page);
935                 return false; /* To break the  !! 758         pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
                                                   >> 759         anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
                                                   >> 760                 struct vm_area_struct *vma = avc->vma;
                                                   >> 761                 unsigned long address = vma_address(page, vma);
                                                   >> 762                 /*
                                                   >> 763                  * If we are reclaiming on behalf of a cgroup, skip
                                                   >> 764                  * counting on behalf of references from different
                                                   >> 765                  * cgroups
                                                   >> 766                  */
                                                   >> 767                 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
                                                   >> 768                         continue;
                                                   >> 769                 referenced += page_referenced_one(page, vma, address,
                                                   >> 770                                                   &mapcount, vm_flags);
                                                   >> 771                 if (!mapcount)
                                                   >> 772                         break;
                                                   >> 773         }
936                                                   774 
937         return true;                           !! 775         page_unlock_anon_vma_read(anon_vma);
                                                   >> 776         return referenced;
938 }                                                 777 }
939                                                   778 
940 static bool invalid_folio_referenced_vma(struc !! 779 /**
941 {                                              !! 780  * page_referenced_file - referenced check for object-based rmap
942         struct folio_referenced_arg *pra = arg !! 781  * @page: the page we're checking references on.
943         struct mem_cgroup *memcg = pra->memcg; !! 782  * @memcg: target memory control group
                                                   >> 783  * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
                                                   >> 784  *
                                                   >> 785  * For an object-based mapped page, find all the places it is mapped and
                                                   >> 786  * check/clear the referenced flag.  This is done by following the page->mapping
                                                   >> 787  * pointer, then walking the chain of vmas it holds.  It returns the number
                                                   >> 788  * of references it found.
                                                   >> 789  *
                                                   >> 790  * This function is only called from page_referenced for object-based pages.
                                                   >> 791  */
                                                   >> 792 static int page_referenced_file(struct page *page,
                                                   >> 793                                 struct mem_cgroup *memcg,
                                                   >> 794                                 unsigned long *vm_flags)
                                                   >> 795 {
                                                   >> 796         unsigned int mapcount;
                                                   >> 797         struct address_space *mapping = page->mapping;
                                                   >> 798         pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
                                                   >> 799         struct vm_area_struct *vma;
                                                   >> 800         int referenced = 0;
944                                                   801 
945         /*                                        802         /*
946          * Ignore references from this mapping !! 803          * The caller's checks on page->mapping and !PageAnon have made
947          * folio has been used in another mapp !! 804          * sure that this is a file page: the check for page->mapping
948          * other mapping is already gone, the  !! 805          * excludes the case just before it gets set on an anon page.
949          * referenced flag or activated the fo << 
950          */                                       806          */
951         if (!vma_has_recency(vma))             !! 807         BUG_ON(PageAnon(page));
952                 return true;                   << 
953                                                   808 
954         /*                                        809         /*
955          * If we are reclaiming on behalf of a !! 810          * The page lock not only makes sure that page->mapping cannot
956          * of references from different cgroup !! 811          * suddenly be NULLified by truncation, it makes sure that the
                                                   >> 812          * structure at mapping cannot be freed and reused yet,
                                                   >> 813          * so we can safely take mapping->i_mmap_mutex.
957          */                                       814          */
958         if (memcg && !mm_match_cgroup(vma->vm_ !! 815         BUG_ON(!PageLocked(page));
959                 return true;                   << 
960                                                   816 
961         return false;                          !! 817         mutex_lock(&mapping->i_mmap_mutex);
                                                   >> 818 
                                                   >> 819         /*
                                                   >> 820          * i_mmap_mutex does not stabilize mapcount at all, but mapcount
                                                   >> 821          * is more likely to be accurate if we note it after spinning.
                                                   >> 822          */
                                                   >> 823         mapcount = page_mapcount(page);
                                                   >> 824 
                                                   >> 825         vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
                                                   >> 826                 unsigned long address = vma_address(page, vma);
                                                   >> 827                 /*
                                                   >> 828                  * If we are reclaiming on behalf of a cgroup, skip
                                                   >> 829                  * counting on behalf of references from different
                                                   >> 830                  * cgroups
                                                   >> 831                  */
                                                   >> 832                 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
                                                   >> 833                         continue;
                                                   >> 834                 referenced += page_referenced_one(page, vma, address,
                                                   >> 835                                                   &mapcount, vm_flags);
                                                   >> 836                 if (!mapcount)
                                                   >> 837                         break;
                                                   >> 838         }
                                                   >> 839 
                                                   >> 840         mutex_unlock(&mapping->i_mmap_mutex);
                                                   >> 841         return referenced;
962 }                                                 842 }
963                                                   843 
964 /**                                               844 /**
965  * folio_referenced() - Test if the folio was  !! 845  * page_referenced - test if the page was referenced
966  * @folio: The folio to test.                  !! 846  * @page: the page to test
967  * @is_locked: Caller holds lock on the folio. !! 847  * @is_locked: caller holds lock on the page
968  * @memcg: target memory cgroup                   848  * @memcg: target memory cgroup
969  * @vm_flags: A combination of all the vma->vm !! 849  * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
970  *                                             << 
971  * Quick test_and_clear_referenced for all map << 
972  *                                                850  *
973  * Return: The number of mappings which refere !! 851  * Quick test_and_clear_referenced for all mappings to a page,
974  * the function bailed out due to rmap lock co !! 852  * returns the number of ptes which referenced the page.
975  */                                               853  */
976 int folio_referenced(struct folio *folio, int  !! 854 int page_referenced(struct page *page,
977                      struct mem_cgroup *memcg, !! 855                     int is_locked,
978 {                                              !! 856                     struct mem_cgroup *memcg,
979         bool we_locked = false;                !! 857                     unsigned long *vm_flags)
980         struct folio_referenced_arg pra = {    !! 858 {
981                 .mapcount = folio_mapcount(fol !! 859         int referenced = 0;
982                 .memcg = memcg,                !! 860         int we_locked = 0;
983         };                                     << 
984         struct rmap_walk_control rwc = {       << 
985                 .rmap_one = folio_referenced_o << 
986                 .arg = (void *)&pra,           << 
987                 .anon_lock = folio_lock_anon_v << 
988                 .try_lock = true,              << 
989                 .invalid_vma = invalid_folio_r << 
990         };                                     << 
991                                                   861 
992         *vm_flags = 0;                            862         *vm_flags = 0;
993         if (!pra.mapcount)                     !! 863         if (page_mapped(page) && page_rmapping(page)) {
994                 return 0;                      !! 864                 if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
995                                                !! 865                         we_locked = trylock_page(page);
996         if (!folio_raw_mapping(folio))         !! 866                         if (!we_locked) {
997                 return 0;                      !! 867                                 referenced++;
998                                                !! 868                                 goto out;
999         if (!is_locked && (!folio_test_anon(fo !! 869                         }
1000                 we_locked = folio_trylock(fol << 
1001                 if (!we_locked)               << 
1002                         return 1;             << 
1003         }                                     << 
1004                                               << 
1005         rmap_walk(folio, &rwc);               << 
1006         *vm_flags = pra.vm_flags;             << 
1007                                               << 
1008         if (we_locked)                        << 
1009                 folio_unlock(folio);          << 
1010                                               << 
1011         return rwc.contended ? -1 : pra.refer << 
1012 }                                             << 
1013                                               << 
1014 static int page_vma_mkclean_one(struct page_v << 
1015 {                                             << 
1016         int cleaned = 0;                      << 
1017         struct vm_area_struct *vma = pvmw->vm << 
1018         struct mmu_notifier_range range;      << 
1019         unsigned long address = pvmw->address << 
1020                                               << 
1021         /*                                    << 
1022          * We have to assume the worse case i << 
1023          * the folio can not be freed from th << 
1024          */                                   << 
1025         mmu_notifier_range_init(&range, MMU_N << 
1026                                 vma->vm_mm, a << 
1027         mmu_notifier_invalidate_range_start(& << 
1028                                               << 
1029         while (page_vma_mapped_walk(pvmw)) {  << 
1030                 int ret = 0;                  << 
1031                                               << 
1032                 address = pvmw->address;      << 
1033                 if (pvmw->pte) {              << 
1034                         pte_t *pte = pvmw->pt << 
1035                         pte_t entry = ptep_ge << 
1036                                               << 
1037                         if (!pte_dirty(entry) << 
1038                                 continue;     << 
1039                                               << 
1040                         flush_cache_page(vma, << 
1041                         entry = ptep_clear_fl << 
1042                         entry = pte_wrprotect << 
1043                         entry = pte_mkclean(e << 
1044                         set_pte_at(vma->vm_mm << 
1045                         ret = 1;              << 
1046                 } else {                      << 
1047 #ifdef CONFIG_TRANSPARENT_HUGEPAGE            << 
1048                         pmd_t *pmd = pvmw->pm << 
1049                         pmd_t entry;          << 
1050                                               << 
1051                         if (!pmd_dirty(*pmd)  << 
1052                                 continue;     << 
1053                                               << 
1054                         flush_cache_range(vma << 
1055                                           add << 
1056                         entry = pmdp_invalida << 
1057                         entry = pmd_wrprotect << 
1058                         entry = pmd_mkclean(e << 
1059                         set_pmd_at(vma->vm_mm << 
1060                         ret = 1;              << 
1061 #else                                         << 
1062                         /* unexpected pmd-map << 
1063                         WARN_ON_ONCE(1);      << 
1064 #endif                                        << 
1065                 }                                870                 }
                                                   >> 871                 if (unlikely(PageKsm(page)))
                                                   >> 872                         referenced += page_referenced_ksm(page, memcg,
                                                   >> 873                                                                 vm_flags);
                                                   >> 874                 else if (PageAnon(page))
                                                   >> 875                         referenced += page_referenced_anon(page, memcg,
                                                   >> 876                                                                 vm_flags);
                                                   >> 877                 else if (page->mapping)
                                                   >> 878                         referenced += page_referenced_file(page, memcg,
                                                   >> 879                                                                 vm_flags);
                                                   >> 880                 if (we_locked)
                                                   >> 881                         unlock_page(page);
1066                                                  882 
1067                 if (ret)                      !! 883                 if (page_test_and_clear_young(page_to_pfn(page)))
1068                         cleaned++;            !! 884                         referenced++;
1069         }                                        885         }
1070                                               !! 886 out:
1071         mmu_notifier_invalidate_range_end(&ra !! 887         return referenced;
1072                                               << 
1073         return cleaned;                       << 
1074 }                                             << 
1075                                               << 
1076 static bool page_mkclean_one(struct folio *fo << 
1077                              unsigned long ad << 
1078 {                                             << 
1079         DEFINE_FOLIO_VMA_WALK(pvmw, folio, vm << 
1080         int *cleaned = arg;                   << 
1081                                               << 
1082         *cleaned += page_vma_mkclean_one(&pvm << 
1083                                               << 
1084         return true;                          << 
1085 }                                             << 
1086                                               << 
1087 static bool invalid_mkclean_vma(struct vm_are << 
1088 {                                             << 
1089         if (vma->vm_flags & VM_SHARED)        << 
1090                 return false;                 << 
1091                                               << 
1092         return true;                          << 
1093 }                                                888 }
1094                                                  889 
1095 int folio_mkclean(struct folio *folio)        !! 890 static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
                                                   >> 891                             unsigned long address)
1096 {                                                892 {
1097         int cleaned = 0;                      !! 893         struct mm_struct *mm = vma->vm_mm;
1098         struct address_space *mapping;        !! 894         pte_t *pte;
1099         struct rmap_walk_control rwc = {      !! 895         spinlock_t *ptl;
1100                 .arg = (void *)&cleaned,      !! 896         int ret = 0;
1101                 .rmap_one = page_mkclean_one, << 
1102                 .invalid_vma = invalid_mkclea << 
1103         };                                    << 
1104                                                  897 
1105         BUG_ON(!folio_test_locked(folio));    !! 898         pte = page_check_address(page, mm, address, &ptl, 1);
                                                   >> 899         if (!pte)
                                                   >> 900                 goto out;
1106                                                  901 
1107         if (!folio_mapped(folio))             !! 902         if (pte_dirty(*pte) || pte_write(*pte)) {
1108                 return 0;                     !! 903                 pte_t entry;
1109                                                  904 
1110         mapping = folio_mapping(folio);       !! 905                 flush_cache_page(vma, address, pte_pfn(*pte));
1111         if (!mapping)                         !! 906                 entry = ptep_clear_flush(vma, address, pte);
1112                 return 0;                     !! 907                 entry = pte_wrprotect(entry);
                                                   >> 908                 entry = pte_mkclean(entry);
                                                   >> 909                 set_pte_at(mm, address, pte, entry);
                                                   >> 910                 ret = 1;
                                                   >> 911         }
1113                                                  912 
1114         rmap_walk(folio, &rwc);               !! 913         pte_unmap_unlock(pte, ptl);
1115                                                  914 
1116         return cleaned;                       !! 915         if (ret)
                                                   >> 916                 mmu_notifier_invalidate_page(mm, address);
                                                   >> 917 out:
                                                   >> 918         return ret;
1117 }                                                919 }
1118 EXPORT_SYMBOL_GPL(folio_mkclean);             << 
1119                                                  920 
1120 /**                                           !! 921 static int page_mkclean_file(struct address_space *mapping, struct page *page)
1121  * pfn_mkclean_range - Cleans the PTEs (inclu !! 922 {
1122  *                     [@pfn, @pfn + @nr_page !! 923         pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1123  *                     within the @vma of sha !! 924         struct vm_area_struct *vma;
1124  *                     should also be readonl !! 925         int ret = 0;
1125  * @pfn: start pfn.                           << 
1126  * @nr_pages: number of physically contiguous << 
1127  * @pgoff: page offset that the @pfn mapped w << 
1128  * @vma: vma that @pfn mapped within.         << 
1129  *                                            << 
1130  * Returns the number of cleaned PTEs (includ << 
1131  */                                           << 
1132 int pfn_mkclean_range(unsigned long pfn, unsi << 
1133                       struct vm_area_struct * << 
1134 {                                             << 
1135         struct page_vma_mapped_walk pvmw = {  << 
1136                 .pfn            = pfn,        << 
1137                 .nr_pages       = nr_pages,   << 
1138                 .pgoff          = pgoff,      << 
1139                 .vma            = vma,        << 
1140                 .flags          = PVMW_SYNC,  << 
1141         };                                    << 
1142                                               << 
1143         if (invalid_mkclean_vma(vma, NULL))   << 
1144                 return 0;                     << 
1145                                                  926 
1146         pvmw.address = vma_address(vma, pgoff !! 927         BUG_ON(PageAnon(page));
1147         VM_BUG_ON_VMA(pvmw.address == -EFAULT << 
1148                                                  928 
1149         return page_vma_mkclean_one(&pvmw);   !! 929         mutex_lock(&mapping->i_mmap_mutex);
                                                   >> 930         vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
                                                   >> 931                 if (vma->vm_flags & VM_SHARED) {
                                                   >> 932                         unsigned long address = vma_address(page, vma);
                                                   >> 933                         ret += page_mkclean_one(page, vma, address);
                                                   >> 934                 }
                                                   >> 935         }
                                                   >> 936         mutex_unlock(&mapping->i_mmap_mutex);
                                                   >> 937         return ret;
1150 }                                                938 }
1151                                                  939 
1152 static __always_inline unsigned int __folio_a !! 940 int page_mkclean(struct page *page)
1153                 struct page *page, int nr_pag << 
1154                 int *nr_pmdmapped)            << 
1155 {                                                941 {
1156         atomic_t *mapped = &folio->_nr_pages_ !! 942         int ret = 0;
1157         const int orig_nr_pages = nr_pages;   << 
1158         int first = 0, nr = 0;                << 
1159                                                  943 
1160         __folio_rmap_sanity_checks(folio, pag !! 944         BUG_ON(!PageLocked(page));
1161                                               << 
1162         switch (level) {                      << 
1163         case RMAP_LEVEL_PTE:                  << 
1164                 if (!folio_test_large(folio)) << 
1165                         nr = atomic_inc_and_t << 
1166                         break;                << 
1167                 }                             << 
1168                                                  945 
1169                 do {                          !! 946         if (page_mapped(page)) {
1170                         first += atomic_inc_a !! 947                 struct address_space *mapping = page_mapping(page);
1171                 } while (page++, --nr_pages > !! 948                 if (mapping)
1172                                               !! 949                         ret = page_mkclean_file(mapping, page);
1173                 if (first &&                  << 
1174                     atomic_add_return_relaxed << 
1175                         nr = first;           << 
1176                                               << 
1177                 atomic_add(orig_nr_pages, &fo << 
1178                 break;                        << 
1179         case RMAP_LEVEL_PMD:                  << 
1180                 first = atomic_inc_and_test(& << 
1181                 if (first) {                  << 
1182                         nr = atomic_add_retur << 
1183                         if (likely(nr < ENTIR << 
1184                                 *nr_pmdmapped << 
1185                                 nr = *nr_pmdm << 
1186                                 /* Raced ahea << 
1187                                 if (unlikely( << 
1188                                         nr =  << 
1189                         } else {              << 
1190                                 /* Raced ahea << 
1191                                 nr = 0;       << 
1192                         }                     << 
1193                 }                             << 
1194                 atomic_inc(&folio->_large_map << 
1195                 break;                        << 
1196         }                                        950         }
1197         return nr;                            !! 951 
                                                   >> 952         return ret;
1198 }                                                953 }
                                                   >> 954 EXPORT_SYMBOL_GPL(page_mkclean);
1199                                                  955 
1200 /**                                              956 /**
1201  * folio_move_anon_rmap - move a folio to our !! 957  * page_move_anon_rmap - move a page to our anon_vma
1202  * @folio:      The folio to move to our anon !! 958  * @page:       the page to move to our anon_vma
1203  * @vma:        The vma the folio belongs to  !! 959  * @vma:        the vma the page belongs to
                                                   >> 960  * @address:    the user virtual address mapped
1204  *                                               961  *
1205  * When a folio belongs exclusively to one pr !! 962  * When a page belongs exclusively to one process after a COW event,
1206  * that folio can be moved into the anon_vma  !! 963  * that page can be moved into the anon_vma that belongs to just that
1207  * process, so the rmap code will not search  !! 964  * process, so the rmap code will not search the parent or sibling
                                                   >> 965  * processes.
1208  */                                              966  */
1209 void folio_move_anon_rmap(struct folio *folio !! 967 void page_move_anon_rmap(struct page *page,
                                                   >> 968         struct vm_area_struct *vma, unsigned long address)
1210 {                                                969 {
1211         void *anon_vma = vma->anon_vma;       !! 970         struct anon_vma *anon_vma = vma->anon_vma;
1212                                                  971 
1213         VM_BUG_ON_FOLIO(!folio_test_locked(fo !! 972         VM_BUG_ON(!PageLocked(page));
1214         VM_BUG_ON_VMA(!anon_vma, vma);        !! 973         VM_BUG_ON(!anon_vma);
                                                   >> 974         VM_BUG_ON(page->index != linear_page_index(vma, address));
1215                                                  975 
1216         anon_vma += PAGE_MAPPING_ANON;        !! 976         anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1217         /*                                    !! 977         page->mapping = (struct address_space *) anon_vma;
1218          * Ensure that anon_vma and the PAGE_ << 
1219          * simultaneously, so a concurrent re << 
1220          * folio_test_anon()) will not see on << 
1221          */                                   << 
1222         WRITE_ONCE(folio->mapping, anon_vma); << 
1223 }                                                978 }
1224                                                  979 
1225 /**                                              980 /**
1226  * __folio_set_anon - set up a new anonymous  !! 981  * __page_set_anon_rmap - set up new anonymous rmap
1227  * @folio:      The folio to set up the new a !! 982  * @page:       Page to add to rmap     
1228  * @vma:        VM area to add the folio to.  !! 983  * @vma:        VM area to add page to.
1229  * @address:    User virtual address of the m !! 984  * @address:    User virtual address of the mapping     
1230  * @exclusive:  Whether the folio is exclusiv !! 985  * @exclusive:  the page is exclusively owned by the current process
1231  */                                              986  */
1232 static void __folio_set_anon(struct folio *fo !! 987 static void __page_set_anon_rmap(struct page *page,
1233                              unsigned long ad !! 988         struct vm_area_struct *vma, unsigned long address, int exclusive)
1234 {                                                989 {
1235         struct anon_vma *anon_vma = vma->anon    990         struct anon_vma *anon_vma = vma->anon_vma;
1236                                                  991 
1237         BUG_ON(!anon_vma);                       992         BUG_ON(!anon_vma);
1238                                                  993 
                                                   >> 994         if (PageAnon(page))
                                                   >> 995                 return;
                                                   >> 996 
1239         /*                                       997         /*
1240          * If the folio isn't exclusive to th !! 998          * If the page isn't exclusively mapped into this vma,
1241          * possible anon_vma for the folio ma !! 999          * we must use the _oldest_ possible anon_vma for the
                                                   >> 1000          * page mapping!
1242          */                                      1001          */
1243         if (!exclusive)                          1002         if (!exclusive)
1244                 anon_vma = anon_vma->root;       1003                 anon_vma = anon_vma->root;
1245                                                  1004 
1246         /*                                    << 
1247          * page_idle does a lockless/optimist << 
1248          * Make sure the compiler doesn't spl << 
1249          * the PAGE_MAPPING_ANON type identif << 
1250          * could mistake the mapping for a st << 
1251          */                                   << 
1252         anon_vma = (void *) anon_vma + PAGE_M    1005         anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1253         WRITE_ONCE(folio->mapping, (struct ad !! 1006         page->mapping = (struct address_space *) anon_vma;
1254         folio->index = linear_page_index(vma, !! 1007         page->index = linear_page_index(vma, address);
1255 }                                                1008 }
1256                                                  1009 
1257 /**                                              1010 /**
1258  * __page_check_anon_rmap - sanity check anon    1011  * __page_check_anon_rmap - sanity check anonymous rmap addition
1259  * @folio:      The folio containing @page.   !! 1012  * @page:       the page to add the mapping to
1260  * @page:       the page to check the mapping << 
1261  * @vma:        the vm area in which the mapp    1013  * @vma:        the vm area in which the mapping is added
1262  * @address:    the user virtual address mapp    1014  * @address:    the user virtual address mapped
1263  */                                              1015  */
1264 static void __page_check_anon_rmap(struct fol !! 1016 static void __page_check_anon_rmap(struct page *page,
1265         struct vm_area_struct *vma, unsigned     1017         struct vm_area_struct *vma, unsigned long address)
1266 {                                                1018 {
                                                   >> 1019 #ifdef CONFIG_DEBUG_VM
1267         /*                                       1020         /*
1268          * The page's anon-rmap details (mapp    1021          * The page's anon-rmap details (mapping and index) are guaranteed to
1269          * be set up correctly at this point.    1022          * be set up correctly at this point.
1270          *                                       1023          *
1271          * We have exclusion against folio_ad !! 1024          * We have exclusion against page_add_anon_rmap because the caller
1272          * always holds the page locked.      !! 1025          * always holds the page locked, except if called from page_dup_rmap,
                                                   >> 1026          * in which case the page is already known to be setup.
1273          *                                       1027          *
1274          * We have exclusion against folio_ad !! 1028          * We have exclusion against page_add_new_anon_rmap because those pages
1275          * are initially only visible via the    1029          * are initially only visible via the pagetables, and the pte is locked
1276          * over the call to folio_add_new_ano !! 1030          * over the call to page_add_new_anon_rmap.
1277          */                                   << 
1278         VM_BUG_ON_FOLIO(folio_anon_vma(folio) << 
1279                         folio);               << 
1280         VM_BUG_ON_PAGE(page_to_pgoff(page) != << 
1281                        page);                 << 
1282 }                                             << 
1283                                               << 
1284 static void __folio_mod_stat(struct folio *fo << 
1285 {                                             << 
1286         int idx;                              << 
1287                                               << 
1288         if (nr) {                             << 
1289                 idx = folio_test_anon(folio)  << 
1290                 __lruvec_stat_mod_folio(folio << 
1291         }                                     << 
1292         if (nr_pmdmapped) {                   << 
1293                 if (folio_test_anon(folio)) { << 
1294                         idx = NR_ANON_THPS;   << 
1295                         __lruvec_stat_mod_fol << 
1296                 } else {                      << 
1297                         /* NR_*_PMDMAPPED are << 
1298                         idx = folio_test_swap << 
1299                                 NR_SHMEM_PMDM << 
1300                         __mod_node_page_state << 
1301                                               << 
1302                 }                             << 
1303         }                                     << 
1304 }                                             << 
1305                                               << 
1306 static __always_inline void __folio_add_anon_ << 
1307                 struct page *page, int nr_pag << 
1308                 unsigned long address, rmap_t << 
1309 {                                             << 
1310         int i, nr, nr_pmdmapped = 0;          << 
1311                                               << 
1312         VM_WARN_ON_FOLIO(!folio_test_anon(fol << 
1313                                               << 
1314         nr = __folio_add_rmap(folio, page, nr << 
1315                                               << 
1316         if (likely(!folio_test_ksm(folio)))   << 
1317                 __page_check_anon_rmap(folio, << 
1318                                               << 
1319         __folio_mod_stat(folio, nr, nr_pmdmap << 
1320                                               << 
1321         if (flags & RMAP_EXCLUSIVE) {         << 
1322                 switch (level) {              << 
1323                 case RMAP_LEVEL_PTE:          << 
1324                         for (i = 0; i < nr_pa << 
1325                                 SetPageAnonEx << 
1326                         break;                << 
1327                 case RMAP_LEVEL_PMD:          << 
1328                         SetPageAnonExclusive( << 
1329                         break;                << 
1330                 }                             << 
1331         }                                     << 
1332         for (i = 0; i < nr_pages; i++) {      << 
1333                 struct page *cur_page = page  << 
1334                                               << 
1335                 /* While PTE-mapping a THP we << 
1336                 VM_WARN_ON_FOLIO((atomic_read << 
1337                                   (folio_test << 
1338                                    folio_enti << 
1339                                  PageAnonExcl << 
1340         }                                     << 
1341                                               << 
1342         /*                                    << 
1343          * For large folio, only mlock it if  << 
1344          * not easy to check whether the larg << 
1345          * here. Only mlock normal 4K folio a << 
1346          * large folio.                       << 
1347          */                                      1031          */
1348         if (!folio_test_large(folio))         !! 1032         BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root);
1349                 mlock_vma_folio(folio, vma);  !! 1033         BUG_ON(page->index != linear_page_index(vma, address));
                                                   >> 1034 #endif
1350 }                                                1035 }
1351                                                  1036 
1352 /**                                              1037 /**
1353  * folio_add_anon_rmap_ptes - add PTE mapping !! 1038  * page_add_anon_rmap - add pte mapping to an anonymous page
1354  * @folio:      The folio to add the mappings !! 1039  * @page:       the page to add the mapping to
1355  * @page:       The first page to add         !! 1040  * @vma:        the vm area in which the mapping is added
1356  * @nr_pages:   The number of pages which wil !! 1041  * @address:    the user virtual address mapped
1357  * @vma:        The vm area in which the mapp << 
1358  * @address:    The user virtual address of t << 
1359  * @flags:      The rmap flags                << 
1360  *                                            << 
1361  * The page range of folio is defined by [fir << 
1362  *                                               1042  *
1363  * The caller needs to hold the page table lo !! 1043  * The caller needs to hold the pte lock, and the page must be locked in
1364  * the anon_vma case: to serialize mapping,in    1044  * the anon_vma case: to serialize mapping,index checking after setting,
1365  * and to ensure that an anon folio is not be !! 1045  * and to ensure that PageAnon is not being upgraded racily to PageKsm
1366  * (but KSM folios are never downgraded).     !! 1046  * (but PageKsm is never downgraded to PageAnon).
1367  */                                              1047  */
1368 void folio_add_anon_rmap_ptes(struct folio *f !! 1048 void page_add_anon_rmap(struct page *page,
1369                 int nr_pages, struct vm_area_ !! 1049         struct vm_area_struct *vma, unsigned long address)
1370                 rmap_t flags)                 << 
1371 {                                                1050 {
1372         __folio_add_anon_rmap(folio, page, nr !! 1051         do_page_add_anon_rmap(page, vma, address, 0);
1373                               RMAP_LEVEL_PTE) << 
1374 }                                                1052 }
1375                                                  1053 
1376 /**                                           !! 1054 /*
1377  * folio_add_anon_rmap_pmd - add a PMD mappin !! 1055  * Special version of the above for do_swap_page, which often runs
1378  * @folio:      The folio to add the mapping  !! 1056  * into pages that are exclusively owned by the current process.
1379  * @page:       The first page to add         !! 1057  * Everybody else should continue to use page_add_anon_rmap above.
1380  * @vma:        The vm area in which the mapp !! 1058  */
1381  * @address:    The user virtual address of t !! 1059 void do_page_add_anon_rmap(struct page *page,
1382  * @flags:      The rmap flags                !! 1060         struct vm_area_struct *vma, unsigned long address, int exclusive)
1383  *                                            !! 1061 {
1384  * The page range of folio is defined by [fir !! 1062         int first = atomic_inc_and_test(&page->_mapcount);
1385  *                                            !! 1063         if (first) {
1386  * The caller needs to hold the page table lo !! 1064                 if (!PageTransHuge(page))
1387  * the anon_vma case: to serialize mapping,in !! 1065                         __inc_zone_page_state(page, NR_ANON_PAGES);
1388  */                                           !! 1066                 else
1389 void folio_add_anon_rmap_pmd(struct folio *fo !! 1067                         __inc_zone_page_state(page,
1390                 struct vm_area_struct *vma, u !! 1068                                               NR_ANON_TRANSPARENT_HUGEPAGES);
1391 {                                             !! 1069         }
1392 #ifdef CONFIG_TRANSPARENT_HUGEPAGE            !! 1070         if (unlikely(PageKsm(page)))
1393         __folio_add_anon_rmap(folio, page, HP !! 1071                 return;
1394                               RMAP_LEVEL_PMD) !! 1072 
1395 #else                                         !! 1073         VM_BUG_ON(!PageLocked(page));
1396         WARN_ON_ONCE(true);                   !! 1074         /* address might be in next vma when migration races vma_adjust */
1397 #endif                                        !! 1075         if (first)
                                                   >> 1076                 __page_set_anon_rmap(page, vma, address, exclusive);
                                                   >> 1077         else
                                                   >> 1078                 __page_check_anon_rmap(page, vma, address);
1398 }                                                1079 }
1399                                                  1080 
1400 /**                                              1081 /**
1401  * folio_add_new_anon_rmap - Add mapping to a !! 1082  * page_add_new_anon_rmap - add pte mapping to a new anonymous page
1402  * @folio:      The folio to add the mapping  !! 1083  * @page:       the page to add the mapping to
1403  * @vma:        the vm area in which the mapp    1084  * @vma:        the vm area in which the mapping is added
1404  * @address:    the user virtual address mapp    1085  * @address:    the user virtual address mapped
1405  * @flags:      The rmap flags                << 
1406  *                                               1086  *
1407  * Like folio_add_anon_rmap_*() but must only !! 1087  * Same as page_add_anon_rmap but must only be called on *new* pages.
1408  * This means the inc-and-test can be bypasse    1088  * This means the inc-and-test can be bypassed.
1409  * The folio doesn't necessarily need to be l !! 1089  * Page does not have to be locked.
1410  * unless two threads map it concurrently. Ho !! 1090  */
1411  * locked if it's shared.                     !! 1091 void page_add_new_anon_rmap(struct page *page,
1412  *                                            !! 1092         struct vm_area_struct *vma, unsigned long address)
1413  * If the folio is pmd-mappable, it is accoun << 
1414  */                                           << 
1415 void folio_add_new_anon_rmap(struct folio *fo << 
1416                 unsigned long address, rmap_t << 
1417 {                                             << 
1418         const int nr = folio_nr_pages(folio); << 
1419         const bool exclusive = flags & RMAP_E << 
1420         int nr_pmdmapped = 0;                 << 
1421                                               << 
1422         VM_WARN_ON_FOLIO(folio_test_hugetlb(f << 
1423         VM_WARN_ON_FOLIO(!exclusive && !folio << 
1424         VM_BUG_ON_VMA(address < vma->vm_start << 
1425                         address + (nr << PAGE << 
1426                                               << 
1427         /*                                    << 
1428          * VM_DROPPABLE mappings don't swap;  << 
1429          * under memory pressure.             << 
1430          */                                   << 
1431         if (!folio_test_swapbacked(folio) &&  << 
1432                 __folio_set_swapbacked(folio) << 
1433         __folio_set_anon(folio, vma, address, << 
1434                                               << 
1435         if (likely(!folio_test_large(folio))) << 
1436                 /* increment count (starts at << 
1437                 atomic_set(&folio->_mapcount, << 
1438                 if (exclusive)                << 
1439                         SetPageAnonExclusive( << 
1440         } else if (!folio_test_pmd_mappable(f << 
1441                 int i;                        << 
1442                                               << 
1443                 for (i = 0; i < nr; i++) {    << 
1444                         struct page *page = f << 
1445                                               << 
1446                         /* increment count (s << 
1447                         atomic_set(&page->_ma << 
1448                         if (exclusive)        << 
1449                                 SetPageAnonEx << 
1450                 }                             << 
1451                                               << 
1452                 /* increment count (starts at << 
1453                 atomic_set(&folio->_large_map << 
1454                 atomic_set(&folio->_nr_pages_ << 
1455         } else {                              << 
1456                 /* increment count (starts at << 
1457                 atomic_set(&folio->_entire_ma << 
1458                 /* increment count (starts at << 
1459                 atomic_set(&folio->_large_map << 
1460                 atomic_set(&folio->_nr_pages_ << 
1461                 if (exclusive)                << 
1462                         SetPageAnonExclusive( << 
1463                 nr_pmdmapped = nr;            << 
1464         }                                     << 
1465                                               << 
1466         __folio_mod_stat(folio, nr, nr_pmdmap << 
1467         mod_mthp_stat(folio_order(folio), MTH << 
1468 }                                             << 
1469                                               << 
1470 static __always_inline void __folio_add_file_ << 
1471                 struct page *page, int nr_pag << 
1472                 enum rmap_level level)        << 
1473 {                                                1093 {
1474         int nr, nr_pmdmapped = 0;             !! 1094         VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1475                                               !! 1095         SetPageSwapBacked(page);
1476         VM_WARN_ON_FOLIO(folio_test_anon(foli !! 1096         atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
1477                                               !! 1097         if (!PageTransHuge(page))
1478         nr = __folio_add_rmap(folio, page, nr !! 1098                 __inc_zone_page_state(page, NR_ANON_PAGES);
1479         __folio_mod_stat(folio, nr, nr_pmdmap !! 1099         else
1480                                               !! 1100                 __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
1481         /* See comments in folio_add_anon_rma !! 1101         __page_set_anon_rmap(page, vma, address, 1);
1482         if (!folio_test_large(folio))         !! 1102         if (!mlocked_vma_newpage(vma, page))
1483                 mlock_vma_folio(folio, vma);  !! 1103                 lru_cache_add_lru(page, LRU_ACTIVE_ANON);
                                                   >> 1104         else
                                                   >> 1105                 add_page_to_unevictable_list(page);
1484 }                                                1106 }
1485                                                  1107 
1486 /**                                              1108 /**
1487  * folio_add_file_rmap_ptes - add PTE mapping !! 1109  * page_add_file_rmap - add pte mapping to a file page
1488  * @folio:      The folio to add the mappings !! 1110  * @page: the page to add the mapping to
1489  * @page:       The first page to add         << 
1490  * @nr_pages:   The number of pages that will << 
1491  * @vma:        The vm area in which the mapp << 
1492  *                                            << 
1493  * The page range of the folio is defined by  << 
1494  *                                               1111  *
1495  * The caller needs to hold the page table lo !! 1112  * The caller needs to hold the pte lock.
1496  */                                              1113  */
1497 void folio_add_file_rmap_ptes(struct folio *f !! 1114 void page_add_file_rmap(struct page *page)
1498                 int nr_pages, struct vm_area_ << 
1499 {                                                1115 {
1500         __folio_add_file_rmap(folio, page, nr !! 1116         bool locked;
1501 }                                             !! 1117         unsigned long flags;
1502                                                  1118 
1503 /**                                           !! 1119         mem_cgroup_begin_update_page_stat(page, &locked, &flags);
1504  * folio_add_file_rmap_pmd - add a PMD mappin !! 1120         if (atomic_inc_and_test(&page->_mapcount)) {
1505  * @folio:      The folio to add the mapping  !! 1121                 __inc_zone_page_state(page, NR_FILE_MAPPED);
1506  * @page:       The first page to add         !! 1122                 mem_cgroup_inc_page_stat(page, MEMCG_NR_FILE_MAPPED);
1507  * @vma:        The vm area in which the mapp !! 1123         }
1508  *                                            !! 1124         mem_cgroup_end_update_page_stat(page, &locked, &flags);
1509  * The page range of the folio is defined by  << 
1510  *                                            << 
1511  * The caller needs to hold the page table lo << 
1512  */                                           << 
1513 void folio_add_file_rmap_pmd(struct folio *fo << 
1514                 struct vm_area_struct *vma)   << 
1515 {                                             << 
1516 #ifdef CONFIG_TRANSPARENT_HUGEPAGE            << 
1517         __folio_add_file_rmap(folio, page, HP << 
1518 #else                                         << 
1519         WARN_ON_ONCE(true);                   << 
1520 #endif                                        << 
1521 }                                                1125 }
1522                                                  1126 
1523 static __always_inline void __folio_remove_rm !! 1127 /**
1524                 struct page *page, int nr_pag !! 1128  * page_remove_rmap - take down pte mapping from a page
1525                 enum rmap_level level)        !! 1129  * @page: page to remove mapping from
1526 {                                             !! 1130  *
1527         atomic_t *mapped = &folio->_nr_pages_ !! 1131  * The caller needs to hold the pte lock.
1528         int last = 0, nr = 0, nr_pmdmapped =  !! 1132  */
1529         bool partially_mapped = false;        !! 1133 void page_remove_rmap(struct page *page)
1530                                               !! 1134 {
1531         __folio_rmap_sanity_checks(folio, pag !! 1135         bool anon = PageAnon(page);
1532                                               !! 1136         bool locked;
1533         switch (level) {                      !! 1137         unsigned long flags;
1534         case RMAP_LEVEL_PTE:                  << 
1535                 if (!folio_test_large(folio)) << 
1536                         nr = atomic_add_negat << 
1537                         break;                << 
1538                 }                             << 
1539                                               << 
1540                 atomic_sub(nr_pages, &folio-> << 
1541                 do {                          << 
1542                         last += atomic_add_ne << 
1543                 } while (page++, --nr_pages > << 
1544                                               << 
1545                 if (last &&                   << 
1546                     atomic_sub_return_relaxed << 
1547                         nr = last;            << 
1548                                               << 
1549                 partially_mapped = nr && atom << 
1550                 break;                        << 
1551         case RMAP_LEVEL_PMD:                  << 
1552                 atomic_dec(&folio->_large_map << 
1553                 last = atomic_add_negative(-1 << 
1554                 if (last) {                   << 
1555                         nr = atomic_sub_retur << 
1556                         if (likely(nr < ENTIR << 
1557                                 nr_pmdmapped  << 
1558                                 nr = nr_pmdma << 
1559                                 /* Raced ahea << 
1560                                 if (unlikely( << 
1561                                         nr =  << 
1562                         } else {              << 
1563                                 /* An add of  << 
1564                                 nr = 0;       << 
1565                         }                     << 
1566                 }                             << 
1567                                               << 
1568                 partially_mapped = nr && nr < << 
1569                 break;                        << 
1570         }                                     << 
1571                                                  1138 
1572         /*                                       1139         /*
1573          * Queue anon large folio for deferre !! 1140          * The anon case has no mem_cgroup page_stat to update; but may
1574          * the folio is unmapped and at least !! 1141          * uncharge_page() below, where the lock ordering can deadlock if
1575          *                                    !! 1142          * we hold the lock against page_stat move: so avoid it on anon.
1576          * Check partially_mapped first to en << 
1577          */                                      1143          */
1578         if (partially_mapped && folio_test_an !! 1144         if (!anon)
1579             !folio_test_partially_mapped(foli !! 1145                 mem_cgroup_begin_update_page_stat(page, &locked, &flags);
1580                 deferred_split_folio(folio, t << 
1581                                                  1146 
1582         __folio_mod_stat(folio, -nr, -nr_pmdm !! 1147         /* page still mapped by someone else? */
                                                   >> 1148         if (!atomic_add_negative(-1, &page->_mapcount))
                                                   >> 1149                 goto out;
1583                                                  1150 
1584         /*                                       1151         /*
1585          * It would be tidy to reset folio_te !! 1152          * Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED
1586          * unmapped, but that might overwrite !! 1153          * and not charged by memcg for now.
1587          * which increments mapcount after us << 
1588          * so leave the reset to free_pages_p << 
1589          * it's only reliable while mapped.   << 
1590          */                                      1154          */
1591                                               !! 1155         if (unlikely(PageHuge(page)))
1592         munlock_vma_folio(folio, vma);        !! 1156                 goto out;
1593 }                                             !! 1157         if (anon) {
1594                                               !! 1158                 mem_cgroup_uncharge_page(page);
1595 /**                                           !! 1159                 if (!PageTransHuge(page))
1596  * folio_remove_rmap_ptes - remove PTE mappin !! 1160                         __dec_zone_page_state(page, NR_ANON_PAGES);
1597  * @folio:      The folio to remove the mappi !! 1161                 else
1598  * @page:       The first page to remove      !! 1162                         __dec_zone_page_state(page,
1599  * @nr_pages:   The number of pages that will !! 1163                                               NR_ANON_TRANSPARENT_HUGEPAGES);
1600  * @vma:        The vm area from which the ma !! 1164         } else {
1601  *                                            !! 1165                 __dec_zone_page_state(page, NR_FILE_MAPPED);
1602  * The page range of the folio is defined by  !! 1166                 mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED);
1603  *                                            !! 1167                 mem_cgroup_end_update_page_stat(page, &locked, &flags);
1604  * The caller needs to hold the page table lo !! 1168         }
1605  */                                           !! 1169         if (unlikely(PageMlocked(page)))
1606 void folio_remove_rmap_ptes(struct folio *fol !! 1170                 clear_page_mlock(page);
1607                 int nr_pages, struct vm_area_ !! 1171         /*
1608 {                                             !! 1172          * It would be tidy to reset the PageAnon mapping here,
1609         __folio_remove_rmap(folio, page, nr_p !! 1173          * but that might overwrite a racing page_add_anon_rmap
1610 }                                             !! 1174          * which increments mapcount after us but sets mapping
1611                                               !! 1175          * before us: so leave the reset to free_hot_cold_page,
1612 /**                                           !! 1176          * and remember that it's only reliable while mapped.
1613  * folio_remove_rmap_pmd - remove a PMD mappi !! 1177          * Leaving it set also helps swapoff to reinstate ptes
1614  * @folio:      The folio to remove the mappi !! 1178          * faster for those pages still in swapcache.
1615  * @page:       The first page to remove      !! 1179          */
1616  * @vma:        The vm area from which the ma !! 1180         return;
1617  *                                            !! 1181 out:
1618  * The page range of the folio is defined by  !! 1182         if (!anon)
1619  *                                            !! 1183                 mem_cgroup_end_update_page_stat(page, &locked, &flags);
1620  * The caller needs to hold the page table lo << 
1621  */                                           << 
1622 void folio_remove_rmap_pmd(struct folio *foli << 
1623                 struct vm_area_struct *vma)   << 
1624 {                                             << 
1625 #ifdef CONFIG_TRANSPARENT_HUGEPAGE            << 
1626         __folio_remove_rmap(folio, page, HPAG << 
1627 #else                                         << 
1628         WARN_ON_ONCE(true);                   << 
1629 #endif                                        << 
1630 }                                                1184 }
1631                                                  1185 
1632 /*                                               1186 /*
1633  * @arg: enum ttu_flags will be passed to thi !! 1187  * Subfunctions of try_to_unmap: try_to_unmap_one called
                                                   >> 1188  * repeatedly from try_to_unmap_ksm, try_to_unmap_anon or try_to_unmap_file.
1634  */                                              1189  */
1635 static bool try_to_unmap_one(struct folio *fo !! 1190 int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1636                      unsigned long address, v !! 1191                      unsigned long address, enum ttu_flags flags)
1637 {                                                1192 {
1638         struct mm_struct *mm = vma->vm_mm;       1193         struct mm_struct *mm = vma->vm_mm;
1639         DEFINE_FOLIO_VMA_WALK(pvmw, folio, vm !! 1194         pte_t *pte;
1640         pte_t pteval;                            1195         pte_t pteval;
1641         struct page *subpage;                 !! 1196         spinlock_t *ptl;
1642         bool anon_exclusive, ret = true;      !! 1197         int ret = SWAP_AGAIN;
1643         struct mmu_notifier_range range;      << 
1644         enum ttu_flags flags = (enum ttu_flag << 
1645         unsigned long pfn;                    << 
1646         unsigned long hsz = 0;                << 
1647                                               << 
1648         /*                                    << 
1649          * When racing against e.g. zap_pte_r << 
1650          * in between its ptep_get_and_clear_ << 
1651          * try_to_unmap() may return before p << 
1652          * if page table locking is skipped:  << 
1653          */                                   << 
1654         if (flags & TTU_SYNC)                 << 
1655                 pvmw.flags = PVMW_SYNC;       << 
1656                                               << 
1657         /*                                    << 
1658          * For THP, we have to assume the wor << 
1659          * For hugetlb, it could be much wors << 
1660          * invalidation in the case of pmd sh << 
1661          *                                    << 
1662          * Note that the folio can not be fre << 
1663          * try_to_unmap() must hold a referen << 
1664          */                                   << 
1665         range.end = vma_address_end(&pvmw);   << 
1666         mmu_notifier_range_init(&range, MMU_N << 
1667                                 address, rang << 
1668         if (folio_test_hugetlb(folio)) {      << 
1669                 /*                            << 
1670                  * If sharing is possible, st << 
1671                  * accordingly.               << 
1672                  */                           << 
1673                 adjust_range_if_pmd_sharing_p << 
1674                                               << 
1675                                                  1198 
1676                 /* We need the huge page size !! 1199         pte = page_check_address(page, mm, address, &ptl, 0);
1677                 hsz = huge_page_size(hstate_v !! 1200         if (!pte)
1678         }                                     !! 1201                 goto out;
1679         mmu_notifier_invalidate_range_start(& << 
1680                                               << 
1681         while (page_vma_mapped_walk(&pvmw)) { << 
1682                 /*                            << 
1683                  * If the folio is in an mloc << 
1684                  */                           << 
1685                 if (!(flags & TTU_IGNORE_MLOC << 
1686                     (vma->vm_flags & VM_LOCKE << 
1687                         /* Restore the mlock  << 
1688                         if (!folio_test_large << 
1689                                 mlock_vma_fol << 
1690                         goto walk_abort;      << 
1691                 }                             << 
1692                                                  1202 
1693                 if (!pvmw.pte) {              !! 1203         /*
1694                         if (unmap_huge_pmd_lo !! 1204          * If the page is mlock()d, we cannot swap it out.
1695                                               !! 1205          * If it's recently referenced (perhaps page_referenced
1696                                 goto walk_don !! 1206          * skipped over this mm) then we should reactivate it.
                                                   >> 1207          */
                                                   >> 1208         if (!(flags & TTU_IGNORE_MLOCK)) {
                                                   >> 1209                 if (vma->vm_flags & VM_LOCKED)
                                                   >> 1210                         goto out_mlock;
1697                                                  1211 
1698                         if (flags & TTU_SPLIT !! 1212                 if (TTU_ACTION(flags) == TTU_MUNLOCK)
1699                                 /*            !! 1213                         goto out_unmap;
1700                                  * We tempora !! 1214         }
1701                                  * restart so !! 1215         if (!(flags & TTU_IGNORE_ACCESS)) {
1702                                  */           !! 1216                 if (ptep_clear_flush_young_notify(vma, address, pte)) {
1703                                 split_huge_pm !! 1217                         ret = SWAP_FAIL;
1704                                               !! 1218                         goto out_unmap;
1705                                 flags &= ~TTU << 
1706                                 page_vma_mapp << 
1707                                 continue;     << 
1708                         }                     << 
1709                 }                                1219                 }
                                                   >> 1220         }
1710                                                  1221 
1711                 /* Unexpected PMD-mapped THP? !! 1222         /* Nuke the page table entry. */
1712                 VM_BUG_ON_FOLIO(!pvmw.pte, fo !! 1223         flush_cache_page(vma, address, page_to_pfn(page));
1713                                               !! 1224         pteval = ptep_clear_flush(vma, address, pte);
1714                 pfn = pte_pfn(ptep_get(pvmw.p << 
1715                 subpage = folio_page(folio, p << 
1716                 address = pvmw.address;       << 
1717                 anon_exclusive = folio_test_a << 
1718                                  PageAnonExcl << 
1719                                               << 
1720                 if (folio_test_hugetlb(folio) << 
1721                         bool anon = folio_tes << 
1722                                                  1225 
1723                         /*                    !! 1226         /* Move the dirty bit to the physical page now the pte is gone. */
1724                          * The try_to_unmap() !! 1227         if (pte_dirty(pteval))
1725                          * in the case where  !! 1228                 set_page_dirty(page);
1726                          */                   << 
1727                         VM_BUG_ON_PAGE(!PageH << 
1728                         /*                    << 
1729                          * huge_pmd_unshare m << 
1730                          * There is no way of << 
1731                          * be cached for this << 
1732                          * start/end were alr << 
1733                          * range.             << 
1734                          */                   << 
1735                         flush_cache_range(vma << 
1736                                                  1229 
1737                         /*                    !! 1230         /* Update high watermark before we lower rss */
1738                          * To call huge_pmd_u !! 1231         update_hiwater_rss(mm);
1739                          * held in write mode << 
1740                          * do this outside rm << 
1741                          *                    << 
1742                          * We also must hold  << 
1743                          * Lock order dictate << 
1744                          * i_mmap_rwsem.  We  << 
1745                          * if unsuccessful.   << 
1746                          */                   << 
1747                         if (!anon) {          << 
1748                                 VM_BUG_ON(!(f << 
1749                                 if (!hugetlb_ << 
1750                                         goto  << 
1751                                 if (huge_pmd_ << 
1752                                         huget << 
1753                                         flush << 
1754                                               << 
1755                                         /*    << 
1756                                          * Th << 
1757                                          * dr << 
1758                                          * co << 
1759                                          * Re << 
1760                                          * no << 
1761                                          * re << 
1762                                          * ac << 
1763                                          * to << 
1764                                          */   << 
1765                                         goto  << 
1766                                 }             << 
1767                                 hugetlb_vma_u << 
1768                         }                     << 
1769                         pteval = huge_ptep_cl << 
1770                 } else {                      << 
1771                         flush_cache_page(vma, << 
1772                         /* Nuke the page tabl << 
1773                         if (should_defer_flus << 
1774                                 /*            << 
1775                                  * We clear t << 
1776                                  * a remote C << 
1777                                  * If the ent << 
1778                                  * architectu << 
1779                                  * transition << 
1780                                  * and traps  << 
1781                                  */           << 
1782                                 pteval = ptep << 
1783                                                  1232 
1784                                 set_tlb_ubc_f !! 1233         if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
1785                         } else {              !! 1234                 if (!PageHuge(page)) {
1786                                 pteval = ptep !! 1235                         if (PageAnon(page))
1787                         }                     !! 1236                                 dec_mm_counter(mm, MM_ANONPAGES);
                                                   >> 1237                         else
                                                   >> 1238                                 dec_mm_counter(mm, MM_FILEPAGES);
1788                 }                                1239                 }
                                                   >> 1240                 set_pte_at(mm, address, pte,
                                                   >> 1241                            swp_entry_to_pte(make_hwpoison_entry(page)));
                                                   >> 1242         } else if (PageAnon(page)) {
                                                   >> 1243                 swp_entry_t entry = { .val = page_private(page) };
1789                                                  1244 
1790                 /*                            !! 1245                 if (PageSwapCache(page)) {
1791                  * Now the pte is cleared. If << 
1792                  * we may want to replace a n << 
1793                  * it's file-backed, so we do << 
1794                  */                           << 
1795                 pte_install_uffd_wp_if_needed << 
1796                                               << 
1797                 /* Set the dirty flag on the  << 
1798                 if (pte_dirty(pteval))        << 
1799                         folio_mark_dirty(foli << 
1800                                               << 
1801                 /* Update high watermark befo << 
1802                 update_hiwater_rss(mm);       << 
1803                                               << 
1804                 if (PageHWPoison(subpage) &&  << 
1805                         pteval = swp_entry_to << 
1806                         if (folio_test_hugetl << 
1807                                 hugetlb_count << 
1808                                 set_huge_pte_ << 
1809                                               << 
1810                         } else {              << 
1811                                 dec_mm_counte << 
1812                                 set_pte_at(mm << 
1813                         }                     << 
1814                                               << 
1815                 } else if (pte_unused(pteval) << 
1816                         /*                    << 
1817                          * The guest indicate << 
1818                          * interest anymore.  << 
1819                          * will take care of  << 
1820                          * A future reference << 
1821                          * page. When userfau << 
1822                          * this page though,  << 
1823                          * migration) will no << 
1824                          * copied pages.      << 
1825                          */                   << 
1826                         dec_mm_counter(mm, mm << 
1827                 } else if (folio_test_anon(fo << 
1828                         swp_entry_t entry = p << 
1829                         pte_t swp_pte;        << 
1830                         /*                       1246                         /*
1831                          * Store the swap loc    1247                          * Store the swap location in the pte.
1832                          * See handle_pte_fau    1248                          * See handle_pte_fault() ...
1833                          */                      1249                          */
1834                         if (unlikely(folio_te << 
1835                                         folio << 
1836                                 WARN_ON_ONCE( << 
1837                                 goto walk_abo << 
1838                         }                     << 
1839                                               << 
1840                         /* MADV_FREE page che << 
1841                         if (!folio_test_swapb << 
1842                                 int ref_count << 
1843                                               << 
1844                                 /*            << 
1845                                  * Synchroniz << 
1846                                  * - clear PT << 
1847                                  * - inc refc << 
1848                                  */           << 
1849                                 smp_mb();     << 
1850                                               << 
1851                                 ref_count = f << 
1852                                 map_count = f << 
1853                                               << 
1854                                 /*            << 
1855                                  * Order read << 
1856                                  * (see comme << 
1857                                  */           << 
1858                                 smp_rmb();    << 
1859                                               << 
1860                                 /*            << 
1861                                  * The only p << 
1862                                  * plus the r << 
1863                                  */           << 
1864                                 if (ref_count << 
1865                                     (!folio_t << 
1866                                      /*       << 
1867                                       * Unlik << 
1868                                       * ones  << 
1869                                       * been  << 
1870                                       */      << 
1871                                      (vma->vm << 
1872                                         dec_m << 
1873                                         goto  << 
1874                                 }             << 
1875                                               << 
1876                                 /*            << 
1877                                  * If the fol << 
1878                                  * discarded. << 
1879                                  */           << 
1880                                 set_pte_at(mm << 
1881                                 /*            << 
1882                                  * Unlike MAD << 
1883                                  * never get  << 
1884                                  */           << 
1885                                 if (!(vma->vm << 
1886                                         folio << 
1887                                 goto walk_abo << 
1888                         }                     << 
1889                                               << 
1890                         if (swap_duplicate(en    1250                         if (swap_duplicate(entry) < 0) {
1891                                 set_pte_at(mm !! 1251                                 set_pte_at(mm, address, pte, pteval);
1892                                 goto walk_abo !! 1252                                 ret = SWAP_FAIL;
1893                         }                     !! 1253                                 goto out_unmap;
1894                         if (arch_unmap_one(mm << 
1895                                 swap_free(ent << 
1896                                 set_pte_at(mm << 
1897                                 goto walk_abo << 
1898                         }                     << 
1899                                               << 
1900                         /* See folio_try_shar << 
1901                         if (anon_exclusive && << 
1902                             folio_try_share_a << 
1903                                 swap_free(ent << 
1904                                 set_pte_at(mm << 
1905                                 goto walk_abo << 
1906                         }                        1254                         }
1907                         if (list_empty(&mm->m    1255                         if (list_empty(&mm->mmlist)) {
1908                                 spin_lock(&mm    1256                                 spin_lock(&mmlist_lock);
1909                                 if (list_empt    1257                                 if (list_empty(&mm->mmlist))
1910                                         list_    1258                                         list_add(&mm->mmlist, &init_mm.mmlist);
1911                                 spin_unlock(&    1259                                 spin_unlock(&mmlist_lock);
1912                         }                        1260                         }
1913                         dec_mm_counter(mm, MM    1261                         dec_mm_counter(mm, MM_ANONPAGES);
1914                         inc_mm_counter(mm, MM    1262                         inc_mm_counter(mm, MM_SWAPENTS);
1915                         swp_pte = swp_entry_t !! 1263                 } else if (IS_ENABLED(CONFIG_MIGRATION)) {
1916                         if (anon_exclusive)   << 
1917                                 swp_pte = pte << 
1918                         if (pte_soft_dirty(pt << 
1919                                 swp_pte = pte << 
1920                         if (pte_uffd_wp(pteva << 
1921                                 swp_pte = pte << 
1922                         set_pte_at(mm, addres << 
1923                 } else {                      << 
1924                         /*                       1264                         /*
1925                          * This is a locked f !! 1265                          * Store the pfn of the page in a special migration
1926                          * so it cannot be re !! 1266                          * pte. do_swap_page() will wait until the migration
1927                          * cache and replaced !! 1267                          * pte is removed and then restart fault handling.
1928                          * mmu_notifier_inval << 
1929                          * concurrent thread  << 
1930                          * to point at a new  << 
1931                          * still using this f << 
1932                          *                    << 
1933                          * See Documentation/ << 
1934                          */                      1268                          */
1935                         dec_mm_counter(mm, mm !! 1269                         BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
                                                   >> 1270                         entry = make_migration_entry(page, pte_write(pteval));
1936                 }                                1271                 }
1937 discard:                                      !! 1272                 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
1938                 if (unlikely(folio_test_huget !! 1273                 BUG_ON(pte_file(*pte));
1939                         hugetlb_remove_rmap(f !! 1274         } else if (IS_ENABLED(CONFIG_MIGRATION) &&
1940                 else                          !! 1275                    (TTU_ACTION(flags) == TTU_MIGRATION)) {
1941                         folio_remove_rmap_pte !! 1276                 /* Establish migration entry for a file page */
1942                 if (vma->vm_flags & VM_LOCKED !! 1277                 swp_entry_t entry;
1943                         mlock_drain_local();  !! 1278                 entry = make_migration_entry(page, pte_write(pteval));
1944                 folio_put(folio);             !! 1279                 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
1945                 continue;                     !! 1280         } else
1946 walk_abort:                                   !! 1281                 dec_mm_counter(mm, MM_FILEPAGES);
1947                 ret = false;                  !! 1282 
1948 walk_done:                                    !! 1283         page_remove_rmap(page);
1949                 page_vma_mapped_walk_done(&pv !! 1284         page_cache_release(page);
1950                 break;                        !! 1285 
1951         }                                     !! 1286 out_unmap:
1952                                               !! 1287         pte_unmap_unlock(pte, ptl);
1953         mmu_notifier_invalidate_range_end(&ra !! 1288         if (ret != SWAP_FAIL)
1954                                               !! 1289                 mmu_notifier_invalidate_page(mm, address);
                                                   >> 1290 out:
1955         return ret;                              1291         return ret;
1956 }                                             << 
1957                                               << 
1958 static bool invalid_migration_vma(struct vm_a << 
1959 {                                             << 
1960         return vma_is_temporary_stack(vma);   << 
1961 }                                             << 
1962                                                  1292 
1963 static int folio_not_mapped(struct folio *fol !! 1293 out_mlock:
1964 {                                             !! 1294         pte_unmap_unlock(pte, ptl);
1965         return !folio_mapped(folio);          << 
1966 }                                             << 
1967                                                  1295 
1968 /**                                           << 
1969  * try_to_unmap - Try to remove all page tabl << 
1970  * @folio: The folio to unmap.                << 
1971  * @flags: action and flags                   << 
1972  *                                            << 
1973  * Tries to remove all the page table entries << 
1974  * folio.  It is the caller's responsibility  << 
1975  * still mapped if needed (use TTU_SYNC to pr << 
1976  *                                            << 
1977  * Context: Caller must hold the folio lock.  << 
1978  */                                           << 
1979 void try_to_unmap(struct folio *folio, enum t << 
1980 {                                             << 
1981         struct rmap_walk_control rwc = {      << 
1982                 .rmap_one = try_to_unmap_one, << 
1983                 .arg = (void *)flags,         << 
1984                 .done = folio_not_mapped,     << 
1985                 .anon_lock = folio_lock_anon_ << 
1986         };                                    << 
1987                                                  1296 
1988         if (flags & TTU_RMAP_LOCKED)          !! 1297         /*
1989                 rmap_walk_locked(folio, &rwc) !! 1298          * We need mmap_sem locking, Otherwise VM_LOCKED check makes
1990         else                                  !! 1299          * unstable result and race. Plus, We can't wait here because
1991                 rmap_walk(folio, &rwc);       !! 1300          * we now hold anon_vma->rwsem or mapping->i_mmap_mutex.
                                                   >> 1301          * if trylock failed, the page remain in evictable lru and later
                                                   >> 1302          * vmscan could retry to move the page to unevictable lru if the
                                                   >> 1303          * page is actually mlocked.
                                                   >> 1304          */
                                                   >> 1305         if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
                                                   >> 1306                 if (vma->vm_flags & VM_LOCKED) {
                                                   >> 1307                         mlock_vma_page(page);
                                                   >> 1308                         ret = SWAP_MLOCK;
                                                   >> 1309                 }
                                                   >> 1310                 up_read(&vma->vm_mm->mmap_sem);
                                                   >> 1311         }
                                                   >> 1312         return ret;
1992 }                                                1313 }
1993                                                  1314 
1994 /*                                               1315 /*
1995  * @arg: enum ttu_flags will be passed to thi !! 1316  * objrmap doesn't work for nonlinear VMAs because the assumption that
1996  *                                            !! 1317  * offset-into-file correlates with offset-into-virtual-addresses does not hold.
1997  * If TTU_SPLIT_HUGE_PMD is specified any PMD !! 1318  * Consequently, given a particular page and its ->index, we cannot locate the
1998  * containing migration entries.              !! 1319  * ptes which are mapping that page without an exhaustive linear search.
                                                   >> 1320  *
                                                   >> 1321  * So what this code does is a mini "virtual scan" of each nonlinear VMA which
                                                   >> 1322  * maps the file to which the target page belongs.  The ->vm_private_data field
                                                   >> 1323  * holds the current cursor into that scan.  Successive searches will circulate
                                                   >> 1324  * around the vma's virtual address space.
                                                   >> 1325  *
                                                   >> 1326  * So as more replacement pressure is applied to the pages in a nonlinear VMA,
                                                   >> 1327  * more scanning pressure is placed against them as well.   Eventually pages
                                                   >> 1328  * will become fully unmapped and are eligible for eviction.
                                                   >> 1329  *
                                                   >> 1330  * For very sparsely populated VMAs this is a little inefficient - chances are
                                                   >> 1331  * there there won't be many ptes located within the scan cluster.  In this case
                                                   >> 1332  * maybe we could scan further - to the end of the pte page, perhaps.
                                                   >> 1333  *
                                                   >> 1334  * Mlocked pages:  check VM_LOCKED under mmap_sem held for read, if we can
                                                   >> 1335  * acquire it without blocking.  If vma locked, mlock the pages in the cluster,
                                                   >> 1336  * rather than unmapping them.  If we encounter the "check_page" that vmscan is
                                                   >> 1337  * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN.
1999  */                                              1338  */
2000 static bool try_to_migrate_one(struct folio * !! 1339 #define CLUSTER_SIZE    min(32*PAGE_SIZE, PMD_SIZE)
2001                      unsigned long address, v !! 1340 #define CLUSTER_MASK    (~(CLUSTER_SIZE - 1))
                                                   >> 1341 
                                                   >> 1342 static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
                                                   >> 1343                 struct vm_area_struct *vma, struct page *check_page)
2002 {                                                1344 {
2003         struct mm_struct *mm = vma->vm_mm;       1345         struct mm_struct *mm = vma->vm_mm;
2004         DEFINE_FOLIO_VMA_WALK(pvmw, folio, vm !! 1346         pmd_t *pmd;
                                                   >> 1347         pte_t *pte;
2005         pte_t pteval;                            1348         pte_t pteval;
2006         struct page *subpage;                 !! 1349         spinlock_t *ptl;
2007         bool anon_exclusive, ret = true;      !! 1350         struct page *page;
2008         struct mmu_notifier_range range;      !! 1351         unsigned long address;
2009         enum ttu_flags flags = (enum ttu_flag !! 1352         unsigned long mmun_start;       /* For mmu_notifiers */
2010         unsigned long pfn;                    !! 1353         unsigned long mmun_end;         /* For mmu_notifiers */
2011         unsigned long hsz = 0;                !! 1354         unsigned long end;
                                                   >> 1355         int ret = SWAP_AGAIN;
                                                   >> 1356         int locked_vma = 0;
2012                                                  1357 
2013         /*                                    !! 1358         address = (vma->vm_start + cursor) & CLUSTER_MASK;
2014          * When racing against e.g. zap_pte_r !! 1359         end = address + CLUSTER_SIZE;
2015          * in between its ptep_get_and_clear_ !! 1360         if (address < vma->vm_start)
2016          * try_to_migrate() may return before !! 1361                 address = vma->vm_start;
2017          * if page table locking is skipped:  !! 1362         if (end > vma->vm_end)
2018          */                                   !! 1363                 end = vma->vm_end;
2019         if (flags & TTU_SYNC)                 << 
2020                 pvmw.flags = PVMW_SYNC;       << 
2021                                                  1364 
2022         /*                                    !! 1365         pmd = mm_find_pmd(mm, address);
2023          * unmap_page() in mm/huge_memory.c i !! 1366         if (!pmd)
2024          * TTU_SPLIT_HUGE_PMD and it wants to !! 1367                 return ret;
2025          */                                   !! 1368 
2026         if (flags & TTU_SPLIT_HUGE_PMD)       !! 1369         mmun_start = address;
2027                 split_huge_pmd_address(vma, a !! 1370         mmun_end   = end;
                                                   >> 1371         mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
2028                                                  1372 
2029         /*                                       1373         /*
2030          * For THP, we have to assume the wor !! 1374          * If we can acquire the mmap_sem for read, and vma is VM_LOCKED,
2031          * For hugetlb, it could be much wors !! 1375          * keep the sem while scanning the cluster for mlocking pages.
2032          * invalidation in the case of pmd sh << 
2033          *                                    << 
2034          * Note that the page can not be free << 
2035          * try_to_unmap() must hold a referen << 
2036          */                                      1376          */
2037         range.end = vma_address_end(&pvmw);   !! 1377         if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
2038         mmu_notifier_range_init(&range, MMU_N !! 1378                 locked_vma = (vma->vm_flags & VM_LOCKED);
2039                                 address, rang !! 1379                 if (!locked_vma)
2040         if (folio_test_hugetlb(folio)) {      !! 1380                         up_read(&vma->vm_mm->mmap_sem); /* don't need it */
2041                 /*                            << 
2042                  * If sharing is possible, st << 
2043                  * accordingly.               << 
2044                  */                           << 
2045                 adjust_range_if_pmd_sharing_p << 
2046                                               << 
2047                                               << 
2048                 /* We need the huge page size << 
2049                 hsz = huge_page_size(hstate_v << 
2050         }                                        1381         }
2051         mmu_notifier_invalidate_range_start(& << 
2052                                               << 
2053         while (page_vma_mapped_walk(&pvmw)) { << 
2054 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION       << 
2055                 /* PMD-mapped THP migration e << 
2056                 if (!pvmw.pte) {              << 
2057                         subpage = folio_page( << 
2058                                 pmd_pfn(*pvmw << 
2059                         VM_BUG_ON_FOLIO(folio << 
2060                                         !foli << 
2061                                               << 
2062                         if (set_pmd_migration << 
2063                                 ret = false;  << 
2064                                 page_vma_mapp << 
2065                                 break;        << 
2066                         }                     << 
2067                         continue;             << 
2068                 }                             << 
2069 #endif                                        << 
2070                                               << 
2071                 /* Unexpected PMD-mapped THP? << 
2072                 VM_BUG_ON_FOLIO(!pvmw.pte, fo << 
2073                                                  1382 
2074                 pfn = pte_pfn(ptep_get(pvmw.p !! 1383         pte = pte_offset_map_lock(mm, pmd, address, &ptl);
2075                                                  1384 
2076                 if (folio_is_zone_device(foli !! 1385         /* Update high watermark before we lower rss */
2077                         /*                    !! 1386         update_hiwater_rss(mm);
2078                          * Our PTE is a non-p << 
2079                          * calculating the su << 
2080                          * result in an inval << 
2081                          *                    << 
2082                          * Since only PAGE_SI << 
2083                          * migrated, just set << 
2084                          * changed when hugep << 
2085                          * memory are support << 
2086                          */                   << 
2087                         VM_BUG_ON_FOLIO(folio << 
2088                         subpage = &folio->pag << 
2089                 } else {                      << 
2090                         subpage = folio_page( << 
2091                 }                             << 
2092                 address = pvmw.address;       << 
2093                 anon_exclusive = folio_test_a << 
2094                                  PageAnonExcl << 
2095                                                  1387 
2096                 if (folio_test_hugetlb(folio) !! 1388         for (; address < end; pte++, address += PAGE_SIZE) {
2097                         bool anon = folio_tes !! 1389                 if (!pte_present(*pte))
2098                                               !! 1390                         continue;
2099                         /*                    !! 1391                 page = vm_normal_page(vma, address, *pte);
2100                          * huge_pmd_unshare m !! 1392                 BUG_ON(!page || PageAnon(page));
2101                          * There is no way of << 
2102                          * be cached for this << 
2103                          * start/end were alr << 
2104                          * range.             << 
2105                          */                   << 
2106                         flush_cache_range(vma << 
2107                                                  1393 
2108                         /*                    !! 1394                 if (locked_vma) {
2109                          * To call huge_pmd_u !! 1395                         if (page == check_page) {
2110                          * held in write mode !! 1396                                 /* we know we have check_page locked */
2111                          * do this outside rm !! 1397                                 mlock_vma_page(page);
2112                          *                    !! 1398                                 ret = SWAP_MLOCK;
2113                          * We also must hold  !! 1399                         } else if (trylock_page(page)) {
2114                          * Lock order dictate << 
2115                          * i_mmap_rwsem.  We  << 
2116                          * fail if unsuccessf << 
2117                          */                   << 
2118                         if (!anon) {          << 
2119                                 VM_BUG_ON(!(f << 
2120                                 if (!hugetlb_ << 
2121                                         page_ << 
2122                                         ret = << 
2123                                         break << 
2124                                 }             << 
2125                                 if (huge_pmd_ << 
2126                                         huget << 
2127                                         flush << 
2128                                               << 
2129                                               << 
2130                                         /*    << 
2131                                          * Th << 
2132                                          * dr << 
2133                                          * co << 
2134                                          * Re << 
2135                                          * no << 
2136                                          * re << 
2137                                          * ac << 
2138                                          * to << 
2139                                          */   << 
2140                                         page_ << 
2141                                         break << 
2142                                 }             << 
2143                                 hugetlb_vma_u << 
2144                         }                     << 
2145                         /* Nuke the hugetlb p << 
2146                         pteval = huge_ptep_cl << 
2147                 } else {                      << 
2148                         flush_cache_page(vma, << 
2149                         /* Nuke the page tabl << 
2150                         if (should_defer_flus << 
2151                                 /*               1400                                 /*
2152                                  * We clear t !! 1401                                  * If we can lock the page, perform mlock.
2153                                  * a remote C !! 1402                                  * Otherwise leave the page alone, it will be
2154                                  * If the ent !! 1403                                  * eventually encountered again later.
2155                                  * architectu << 
2156                                  * transition << 
2157                                  * and traps  << 
2158                                  */              1404                                  */
2159                                 pteval = ptep !! 1405                                 mlock_vma_page(page);
2160                                               !! 1406                                 unlock_page(page);
2161                                 set_tlb_ubc_f << 
2162                         } else {              << 
2163                                 pteval = ptep << 
2164                         }                        1407                         }
                                                   >> 1408                         continue;       /* don't unmap */
2165                 }                                1409                 }
2166                                                  1410 
2167                 /* Set the dirty flag on the  !! 1411                 if (ptep_clear_flush_young_notify(vma, address, pte))
2168                 if (pte_dirty(pteval))        !! 1412                         continue;
2169                         folio_mark_dirty(foli << 
2170                                               << 
2171                 /* Update high watermark befo << 
2172                 update_hiwater_rss(mm);       << 
2173                                               << 
2174                 if (folio_is_device_private(f << 
2175                         unsigned long pfn = f << 
2176                         swp_entry_t entry;    << 
2177                         pte_t swp_pte;        << 
2178                                               << 
2179                         if (anon_exclusive)   << 
2180                                 WARN_ON_ONCE( << 
2181                                               << 
2182                                                  1413 
2183                         /*                    !! 1414                 /* Nuke the page table entry. */
2184                          * Store the pfn of t !! 1415                 flush_cache_page(vma, address, pte_pfn(*pte));
2185                          * pte. do_swap_page( !! 1416                 pteval = ptep_clear_flush(vma, address, pte);
2186                          * pte is removed and << 
2187                          */                   << 
2188                         entry = pte_to_swp_en << 
2189                         if (is_writable_devic << 
2190                                 entry = make_ << 
2191                         else if (anon_exclusi << 
2192                                 entry = make_ << 
2193                         else                  << 
2194                                 entry = make_ << 
2195                         swp_pte = swp_entry_t << 
2196                                                  1417 
2197                         /*                    !! 1418                 /* If nonlinear, store the file page offset in the pte. */
2198                          * pteval maps a zone !! 1419                 if (page->index != linear_page_index(vma, address))
2199                          * a swap pte.        !! 1420                         set_pte_at(mm, address, pte, pgoff_to_pte(page->index));
2200                          */                   << 
2201                         if (pte_swp_soft_dirt << 
2202                                 swp_pte = pte << 
2203                         if (pte_swp_uffd_wp(p << 
2204                                 swp_pte = pte << 
2205                         set_pte_at(mm, pvmw.a << 
2206                         trace_set_migration_p << 
2207                                               << 
2208                         /*                    << 
2209                          * No need to invalid << 
2210                          * against the specia << 
2211                          */                   << 
2212                 } else if (PageHWPoison(subpa << 
2213                         pteval = swp_entry_to << 
2214                         if (folio_test_hugetl << 
2215                                 hugetlb_count << 
2216                                 set_huge_pte_ << 
2217                                               << 
2218                         } else {              << 
2219                                 dec_mm_counte << 
2220                                 set_pte_at(mm << 
2221                         }                     << 
2222                                                  1421 
2223                 } else if (pte_unused(pteval) !! 1422                 /* Move the dirty bit to the physical page now the pte is gone. */
2224                         /*                    !! 1423                 if (pte_dirty(pteval))
2225                          * The guest indicate !! 1424                         set_page_dirty(page);
2226                          * interest anymore.  << 
2227                          * will take care of  << 
2228                          * A future reference << 
2229                          * page. When userfau << 
2230                          * this page though,  << 
2231                          * migration) will no << 
2232                          * copied pages.      << 
2233                          */                   << 
2234                         dec_mm_counter(mm, mm << 
2235                 } else {                      << 
2236                         swp_entry_t entry;    << 
2237                         pte_t swp_pte;        << 
2238                                               << 
2239                         if (arch_unmap_one(mm << 
2240                                 if (folio_tes << 
2241                                         set_h << 
2242                                               << 
2243                                 else          << 
2244                                         set_p << 
2245                                 ret = false;  << 
2246                                 page_vma_mapp << 
2247                                 break;        << 
2248                         }                     << 
2249                         VM_BUG_ON_PAGE(pte_wr << 
2250                                        !anon_ << 
2251                                                  1425 
2252                         /* See folio_try_shar !! 1426                 page_remove_rmap(page);
2253                         if (folio_test_hugetl !! 1427                 page_cache_release(page);
2254                                 if (anon_excl !! 1428                 dec_mm_counter(mm, MM_FILEPAGES);
2255                                     hugetlb_t !! 1429                 (*mapcount)--;
2256                                         set_h !! 1430         }
2257                                               !! 1431         pte_unmap_unlock(pte - 1, ptl);
2258                                         ret = !! 1432         mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2259                                         page_ !! 1433         if (locked_vma)
2260                                         break !! 1434                 up_read(&vma->vm_mm->mmap_sem);
2261                                 }             !! 1435         return ret;
2262                         } else if (anon_exclu !! 1436 }
2263                                    folio_try_ << 
2264                                 set_pte_at(mm << 
2265                                 ret = false;  << 
2266                                 page_vma_mapp << 
2267                                 break;        << 
2268                         }                     << 
2269                                                  1437 
2270                         /*                    !! 1438 bool is_vma_temporary_stack(struct vm_area_struct *vma)
2271                          * Store the pfn of t !! 1439 {
2272                          * pte. do_swap_page( !! 1440         int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
2273                          * pte is removed and << 
2274                          */                   << 
2275                         if (pte_write(pteval) << 
2276                                 entry = make_ << 
2277                                               << 
2278                         else if (anon_exclusi << 
2279                                 entry = make_ << 
2280                                               << 
2281                         else                  << 
2282                                 entry = make_ << 
2283                                               << 
2284                         if (pte_young(pteval) << 
2285                                 entry = make_ << 
2286                         if (pte_dirty(pteval) << 
2287                                 entry = make_ << 
2288                         swp_pte = swp_entry_t << 
2289                         if (pte_soft_dirty(pt << 
2290                                 swp_pte = pte << 
2291                         if (pte_uffd_wp(pteva << 
2292                                 swp_pte = pte << 
2293                         if (folio_test_hugetl << 
2294                                 set_huge_pte_ << 
2295                                               << 
2296                         else                  << 
2297                                 set_pte_at(mm << 
2298                         trace_set_migration_p << 
2299                                               << 
2300                         /*                    << 
2301                          * No need to invalid << 
2302                          * against the specia << 
2303                          */                   << 
2304                 }                             << 
2305                                                  1441 
2306                 if (unlikely(folio_test_huget !! 1442         if (!maybe_stack)
2307                         hugetlb_remove_rmap(f !! 1443                 return false;
2308                 else                          << 
2309                         folio_remove_rmap_pte << 
2310                 if (vma->vm_flags & VM_LOCKED << 
2311                         mlock_drain_local();  << 
2312                 folio_put(folio);             << 
2313         }                                     << 
2314                                                  1444 
2315         mmu_notifier_invalidate_range_end(&ra !! 1445         if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
                                                   >> 1446                                                 VM_STACK_INCOMPLETE_SETUP)
                                                   >> 1447                 return true;
2316                                                  1448 
2317         return ret;                           !! 1449         return false;
2318 }                                                1450 }
2319                                                  1451 
2320 /**                                              1452 /**
2321  * try_to_migrate - try to replace all page t !! 1453  * try_to_unmap_anon - unmap or unlock anonymous page using the object-based
2322  * @folio: the folio to replace page table en !! 1454  * rmap method
                                                   >> 1455  * @page: the page to unmap/unlock
2323  * @flags: action and flags                      1456  * @flags: action and flags
2324  *                                               1457  *
2325  * Tries to remove all the page table entries !! 1458  * Find all the mappings of a page using the mapping pointer and the vma chains
2326  * replace them with special swap entries. Ca !! 1459  * contained in the anon_vma struct it points to.
                                                   >> 1460  *
                                                   >> 1461  * This function is only called from try_to_unmap/try_to_munlock for
                                                   >> 1462  * anonymous pages.
                                                   >> 1463  * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
                                                   >> 1464  * where the page was found will be held for write.  So, we won't recheck
                                                   >> 1465  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
                                                   >> 1466  * 'LOCKED.
2327  */                                              1467  */
2328 void try_to_migrate(struct folio *folio, enum !! 1468 static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
2329 {                                                1469 {
2330         struct rmap_walk_control rwc = {      !! 1470         struct anon_vma *anon_vma;
2331                 .rmap_one = try_to_migrate_on !! 1471         pgoff_t pgoff;
2332                 .arg = (void *)flags,         !! 1472         struct anon_vma_chain *avc;
2333                 .done = folio_not_mapped,     !! 1473         int ret = SWAP_AGAIN;
2334                 .anon_lock = folio_lock_anon_ << 
2335         };                                    << 
2336                                               << 
2337         /*                                    << 
2338          * Migration always ignores mlock and << 
2339          * TTU_SPLIT_HUGE_PMD, TTU_SYNC, and  << 
2340          */                                   << 
2341         if (WARN_ON_ONCE(flags & ~(TTU_RMAP_L << 
2342                                         TTU_S << 
2343                 return;                       << 
2344                                               << 
2345         if (folio_is_zone_device(folio) &&    << 
2346             (!folio_is_device_private(folio)  << 
2347                 return;                       << 
2348                                                  1474 
2349         /*                                    !! 1475         anon_vma = page_lock_anon_vma_read(page);
2350          * During exec, a temporary VMA is se !! 1476         if (!anon_vma)
2351          * The VMA is moved under the anon_vm !! 1477                 return ret;
2352          * page tables leading to a race wher << 
2353          * find the migration ptes. Rather th << 
2354          * locking requirements of exec(), mi << 
2355          * temporary VMAs until after exec()  << 
2356          */                                   << 
2357         if (!folio_test_ksm(folio) && folio_t << 
2358                 rwc.invalid_vma = invalid_mig << 
2359                                                  1478 
2360         if (flags & TTU_RMAP_LOCKED)          !! 1479         pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
2361                 rmap_walk_locked(folio, &rwc) !! 1480         anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
2362         else                                  !! 1481                 struct vm_area_struct *vma = avc->vma;
2363                 rmap_walk(folio, &rwc);       !! 1482                 unsigned long address;
2364 }                                             << 
2365                                                  1483 
2366 #ifdef CONFIG_DEVICE_PRIVATE                  !! 1484                 /*
2367 struct make_exclusive_args {                  !! 1485                  * During exec, a temporary VMA is setup and later moved.
2368         struct mm_struct *mm;                 !! 1486                  * The VMA is moved under the anon_vma lock but not the
2369         unsigned long address;                !! 1487                  * page tables leading to a race where migration cannot
2370         void *owner;                          !! 1488                  * find the migration ptes. Rather than increasing the
2371         bool valid;                           !! 1489                  * locking requirements of exec(), migration skips
2372 };                                            !! 1490                  * temporary VMAs until after exec() completes.
                                                   >> 1491                  */
                                                   >> 1492                 if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
                                                   >> 1493                                 is_vma_temporary_stack(vma))
                                                   >> 1494                         continue;
2373                                                  1495 
2374 static bool page_make_device_exclusive_one(st !! 1496                 address = vma_address(page, vma);
2375                 struct vm_area_struct *vma, u !! 1497                 ret = try_to_unmap_one(page, vma, address, flags);
2376 {                                             !! 1498                 if (ret != SWAP_AGAIN || !page_mapped(page))
2377         struct mm_struct *mm = vma->vm_mm;    << 
2378         DEFINE_FOLIO_VMA_WALK(pvmw, folio, vm << 
2379         struct make_exclusive_args *args = pr << 
2380         pte_t pteval;                         << 
2381         struct page *subpage;                 << 
2382         bool ret = true;                      << 
2383         struct mmu_notifier_range range;      << 
2384         swp_entry_t entry;                    << 
2385         pte_t swp_pte;                        << 
2386         pte_t ptent;                          << 
2387                                               << 
2388         mmu_notifier_range_init_owner(&range, << 
2389                                       vma->vm << 
2390                                       address << 
2391                                       args->o << 
2392         mmu_notifier_invalidate_range_start(& << 
2393                                               << 
2394         while (page_vma_mapped_walk(&pvmw)) { << 
2395                 /* Unexpected PMD-mapped THP? << 
2396                 VM_BUG_ON_FOLIO(!pvmw.pte, fo << 
2397                                               << 
2398                 ptent = ptep_get(pvmw.pte);   << 
2399                 if (!pte_present(ptent)) {    << 
2400                         ret = false;          << 
2401                         page_vma_mapped_walk_ << 
2402                         break;                   1499                         break;
2403                 }                             !! 1500         }
2404                                               << 
2405                 subpage = folio_page(folio,   << 
2406                                 pte_pfn(ptent << 
2407                 address = pvmw.address;       << 
2408                                                  1501 
2409                 /* Nuke the page table entry. !! 1502         page_unlock_anon_vma_read(anon_vma);
2410                 flush_cache_page(vma, address !! 1503         return ret;
2411                 pteval = ptep_clear_flush(vma !! 1504 }
2412                                                  1505 
2413                 /* Set the dirty flag on the  !! 1506 /**
2414                 if (pte_dirty(pteval))        !! 1507  * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
2415                         folio_mark_dirty(foli !! 1508  * @page: the page to unmap/unlock
                                                   >> 1509  * @flags: action and flags
                                                   >> 1510  *
                                                   >> 1511  * Find all the mappings of a page using the mapping pointer and the vma chains
                                                   >> 1512  * contained in the address_space struct it points to.
                                                   >> 1513  *
                                                   >> 1514  * This function is only called from try_to_unmap/try_to_munlock for
                                                   >> 1515  * object-based pages.
                                                   >> 1516  * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
                                                   >> 1517  * where the page was found will be held for write.  So, we won't recheck
                                                   >> 1518  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
                                                   >> 1519  * 'LOCKED.
                                                   >> 1520  */
                                                   >> 1521 static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
                                                   >> 1522 {
                                                   >> 1523         struct address_space *mapping = page->mapping;
                                                   >> 1524         pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
                                                   >> 1525         struct vm_area_struct *vma;
                                                   >> 1526         int ret = SWAP_AGAIN;
                                                   >> 1527         unsigned long cursor;
                                                   >> 1528         unsigned long max_nl_cursor = 0;
                                                   >> 1529         unsigned long max_nl_size = 0;
                                                   >> 1530         unsigned int mapcount;
                                                   >> 1531 
                                                   >> 1532         if (PageHuge(page))
                                                   >> 1533                 pgoff = page->index << compound_order(page);
                                                   >> 1534 
                                                   >> 1535         mutex_lock(&mapping->i_mmap_mutex);
                                                   >> 1536         vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
                                                   >> 1537                 unsigned long address = vma_address(page, vma);
                                                   >> 1538                 ret = try_to_unmap_one(page, vma, address, flags);
                                                   >> 1539                 if (ret != SWAP_AGAIN || !page_mapped(page))
                                                   >> 1540                         goto out;
                                                   >> 1541         }
2416                                                  1542 
2417                 /*                            !! 1543         if (list_empty(&mapping->i_mmap_nonlinear))
2418                  * Check that our target page !! 1544                 goto out;
2419                  * address.                   << 
2420                  */                           << 
2421                 if (args->mm == mm && args->a << 
2422                     pte_write(pteval))        << 
2423                         args->valid = true;   << 
2424                                                  1545 
2425                 /*                            !! 1546         /*
2426                  * Store the pfn of the page  !! 1547          * We don't bother to try to find the munlocked page in nonlinears.
2427                  * pte. do_swap_page() will w !! 1548          * It's costly. Instead, later, page reclaim logic may call
2428                  * pte is removed and then re !! 1549          * try_to_unmap(TTU_MUNLOCK) and recover PG_mlocked lazily.
2429                  */                           !! 1550          */
2430                 if (pte_write(pteval))        !! 1551         if (TTU_ACTION(flags) == TTU_MUNLOCK)
2431                         entry = make_writable !! 1552                 goto out;
2432                                               << 
2433                 else                          << 
2434                         entry = make_readable << 
2435                                               << 
2436                 swp_pte = swp_entry_to_pte(en << 
2437                 if (pte_soft_dirty(pteval))   << 
2438                         swp_pte = pte_swp_mks << 
2439                 if (pte_uffd_wp(pteval))      << 
2440                         swp_pte = pte_swp_mku << 
2441                                                  1553 
2442                 set_pte_at(mm, address, pvmw. !! 1554         list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
                                                   >> 1555                                                         shared.nonlinear) {
                                                   >> 1556                 cursor = (unsigned long) vma->vm_private_data;
                                                   >> 1557                 if (cursor > max_nl_cursor)
                                                   >> 1558                         max_nl_cursor = cursor;
                                                   >> 1559                 cursor = vma->vm_end - vma->vm_start;
                                                   >> 1560                 if (cursor > max_nl_size)
                                                   >> 1561                         max_nl_size = cursor;
                                                   >> 1562         }
2443                                                  1563 
2444                 /*                            !! 1564         if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */
2445                  * There is a reference on th !! 1565                 ret = SWAP_FAIL;
2446                  * been removed, so shouldn't !! 1566                 goto out;
2447                  */                           << 
2448                 folio_remove_rmap_pte(folio,  << 
2449         }                                        1567         }
2450                                                  1568 
2451         mmu_notifier_invalidate_range_end(&ra !! 1569         /*
                                                   >> 1570          * We don't try to search for this page in the nonlinear vmas,
                                                   >> 1571          * and page_referenced wouldn't have found it anyway.  Instead
                                                   >> 1572          * just walk the nonlinear vmas trying to age and unmap some.
                                                   >> 1573          * The mapcount of the page we came in with is irrelevant,
                                                   >> 1574          * but even so use it as a guide to how hard we should try?
                                                   >> 1575          */
                                                   >> 1576         mapcount = page_mapcount(page);
                                                   >> 1577         if (!mapcount)
                                                   >> 1578                 goto out;
                                                   >> 1579         cond_resched();
                                                   >> 1580 
                                                   >> 1581         max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
                                                   >> 1582         if (max_nl_cursor == 0)
                                                   >> 1583                 max_nl_cursor = CLUSTER_SIZE;
                                                   >> 1584 
                                                   >> 1585         do {
                                                   >> 1586                 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
                                                   >> 1587                                                         shared.nonlinear) {
                                                   >> 1588                         cursor = (unsigned long) vma->vm_private_data;
                                                   >> 1589                         while ( cursor < max_nl_cursor &&
                                                   >> 1590                                 cursor < vma->vm_end - vma->vm_start) {
                                                   >> 1591                                 if (try_to_unmap_cluster(cursor, &mapcount,
                                                   >> 1592                                                 vma, page) == SWAP_MLOCK)
                                                   >> 1593                                         ret = SWAP_MLOCK;
                                                   >> 1594                                 cursor += CLUSTER_SIZE;
                                                   >> 1595                                 vma->vm_private_data = (void *) cursor;
                                                   >> 1596                                 if ((int)mapcount <= 0)
                                                   >> 1597                                         goto out;
                                                   >> 1598                         }
                                                   >> 1599                         vma->vm_private_data = (void *) max_nl_cursor;
                                                   >> 1600                 }
                                                   >> 1601                 cond_resched();
                                                   >> 1602                 max_nl_cursor += CLUSTER_SIZE;
                                                   >> 1603         } while (max_nl_cursor <= max_nl_size);
2452                                                  1604 
                                                   >> 1605         /*
                                                   >> 1606          * Don't loop forever (perhaps all the remaining pages are
                                                   >> 1607          * in locked vmas).  Reset cursor on all unreserved nonlinear
                                                   >> 1608          * vmas, now forgetting on which ones it had fallen behind.
                                                   >> 1609          */
                                                   >> 1610         list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
                                                   >> 1611                 vma->vm_private_data = NULL;
                                                   >> 1612 out:
                                                   >> 1613         mutex_unlock(&mapping->i_mmap_mutex);
2453         return ret;                              1614         return ret;
2454 }                                                1615 }
2455                                                  1616 
2456 /**                                              1617 /**
2457  * folio_make_device_exclusive - Mark the fol !! 1618  * try_to_unmap - try to remove all page table mappings to a page
2458  * @folio: The folio to replace page table en !! 1619  * @page: the page to get unmapped
2459  * @mm: The mm_struct where the folio is expe !! 1620  * @flags: action and flags
2460  * @address: Address where the folio is expec << 
2461  * @owner: passed to MMU_NOTIFY_EXCLUSIVE ran << 
2462  *                                               1621  *
2463  * Tries to remove all the page table entries    1622  * Tries to remove all the page table entries which are mapping this
2464  * folio and replace them with special device !! 1623  * page, used in the pageout path.  Caller must hold the page lock.
2465  * grant a device exclusive access to the fol !! 1624  * Return values are:
2466  *                                               1625  *
2467  * Context: Caller must hold the folio lock.  !! 1626  * SWAP_SUCCESS - we succeeded in removing all mappings
2468  * Return: false if the page is still mapped, !! 1627  * SWAP_AGAIN   - we missed a mapping, try again later
2469  * from the expected address. Otherwise retur !! 1628  * SWAP_FAIL    - the page is unswappable
2470  */                                           !! 1629  * SWAP_MLOCK   - page is mlocked.
2471 static bool folio_make_device_exclusive(struc !! 1630  */
2472                 struct mm_struct *mm, unsigne !! 1631 int try_to_unmap(struct page *page, enum ttu_flags flags)
2473 {                                             !! 1632 {
2474         struct make_exclusive_args args = {   !! 1633         int ret;
2475                 .mm = mm,                     << 
2476                 .address = address,           << 
2477                 .owner = owner,               << 
2478                 .valid = false,               << 
2479         };                                    << 
2480         struct rmap_walk_control rwc = {      << 
2481                 .rmap_one = page_make_device_ << 
2482                 .done = folio_not_mapped,     << 
2483                 .anon_lock = folio_lock_anon_ << 
2484                 .arg = &args,                 << 
2485         };                                    << 
2486                                               << 
2487         /*                                    << 
2488          * Restrict to anonymous folios for n << 
2489          * issues.                            << 
2490          */                                   << 
2491         if (!folio_test_anon(folio))          << 
2492                 return false;                 << 
2493                                                  1634 
2494         rmap_walk(folio, &rwc);               !! 1635         BUG_ON(!PageLocked(page));
                                                   >> 1636         VM_BUG_ON(!PageHuge(page) && PageTransHuge(page));
2495                                                  1637 
2496         return args.valid && !folio_mapcount( !! 1638         if (unlikely(PageKsm(page)))
                                                   >> 1639                 ret = try_to_unmap_ksm(page, flags);
                                                   >> 1640         else if (PageAnon(page))
                                                   >> 1641                 ret = try_to_unmap_anon(page, flags);
                                                   >> 1642         else
                                                   >> 1643                 ret = try_to_unmap_file(page, flags);
                                                   >> 1644         if (ret != SWAP_MLOCK && !page_mapped(page))
                                                   >> 1645                 ret = SWAP_SUCCESS;
                                                   >> 1646         return ret;
2497 }                                                1647 }
2498                                                  1648 
2499 /**                                              1649 /**
2500  * make_device_exclusive_range() - Mark a ran !! 1650  * try_to_munlock - try to munlock a page
2501  * @mm: mm_struct of associated target proces !! 1651  * @page: the page to be munlocked
2502  * @start: start of the region to mark for ex !! 1652  *
2503  * @end: end address of region                !! 1653  * Called from munlock code.  Checks all of the VMAs mapping the page
2504  * @pages: returns the pages which were succe !! 1654  * to make sure nobody else has this page mlocked. The page will be
2505  * @owner: passed to MMU_NOTIFY_EXCLUSIVE ran !! 1655  * returned with PG_mlocked cleared if no other vmas have it mlocked.
2506  *                                            !! 1656  *
2507  * Returns: number of pages found in the rang !! 1657  * Return values are:
2508  * exclusive access only if the page pointer  !! 1658  *
2509  *                                            !! 1659  * SWAP_AGAIN   - no vma is holding page mlocked, or,
2510  * This function finds ptes mapping page(s) t !! 1660  * SWAP_AGAIN   - page mapped in mlocked vma -- couldn't acquire mmap sem
2511  * them and replaces mappings with special sw !! 1661  * SWAP_FAIL    - page cannot be located at present
2512  * access. On fault these entries are replace !! 1662  * SWAP_MLOCK   - page is now mlocked.
2513  * calling MMU notifiers.                     !! 1663  */
2514  *                                            !! 1664 int try_to_munlock(struct page *page)
2515  * A driver using this to program access from !! 1665 {
2516  * critical section to hold a device specific !! 1666         VM_BUG_ON(!PageLocked(page) || PageLRU(page));
2517  * programming is complete it should drop the << 
2518  * which point CPU access to the page will re << 
2519  */                                           << 
2520 int make_device_exclusive_range(struct mm_str << 
2521                                 unsigned long << 
2522                                 void *owner)  << 
2523 {                                             << 
2524         long npages = (end - start) >> PAGE_S << 
2525         long i;                               << 
2526                                               << 
2527         npages = get_user_pages_remote(mm, st << 
2528                                        FOLL_G << 
2529                                        pages, << 
2530         if (npages < 0)                       << 
2531                 return npages;                << 
2532                                               << 
2533         for (i = 0; i < npages; i++, start += << 
2534                 struct folio *folio = page_fo << 
2535                 if (PageTail(pages[i]) || !fo << 
2536                         folio_put(folio);     << 
2537                         pages[i] = NULL;      << 
2538                         continue;             << 
2539                 }                             << 
2540                                               << 
2541                 if (!folio_make_device_exclus << 
2542                         folio_unlock(folio);  << 
2543                         folio_put(folio);     << 
2544                         pages[i] = NULL;      << 
2545                 }                             << 
2546         }                                     << 
2547                                                  1667 
2548         return npages;                        !! 1668         if (unlikely(PageKsm(page)))
                                                   >> 1669                 return try_to_unmap_ksm(page, TTU_MUNLOCK);
                                                   >> 1670         else if (PageAnon(page))
                                                   >> 1671                 return try_to_unmap_anon(page, TTU_MUNLOCK);
                                                   >> 1672         else
                                                   >> 1673                 return try_to_unmap_file(page, TTU_MUNLOCK);
2549 }                                                1674 }
2550 EXPORT_SYMBOL_GPL(make_device_exclusive_range << 
2551 #endif                                        << 
2552                                                  1675 
2553 void __put_anon_vma(struct anon_vma *anon_vma    1676 void __put_anon_vma(struct anon_vma *anon_vma)
2554 {                                                1677 {
2555         struct anon_vma *root = anon_vma->roo    1678         struct anon_vma *root = anon_vma->root;
2556                                                  1679 
2557         anon_vma_free(anon_vma);                 1680         anon_vma_free(anon_vma);
2558         if (root != anon_vma && atomic_dec_an    1681         if (root != anon_vma && atomic_dec_and_test(&root->refcount))
2559                 anon_vma_free(root);             1682                 anon_vma_free(root);
2560 }                                                1683 }
2561                                                  1684 
2562 static struct anon_vma *rmap_walk_anon_lock(s !! 1685 #ifdef CONFIG_MIGRATION
2563                                             s !! 1686 /*
                                                   >> 1687  * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
                                                   >> 1688  * Called by migrate.c to remove migration ptes, but might be used more later.
                                                   >> 1689  */
                                                   >> 1690 static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
                                                   >> 1691                 struct vm_area_struct *, unsigned long, void *), void *arg)
2564 {                                                1692 {
2565         struct anon_vma *anon_vma;               1693         struct anon_vma *anon_vma;
2566                                               !! 1694         pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
2567         if (rwc->anon_lock)                   !! 1695         struct anon_vma_chain *avc;
2568                 return rwc->anon_lock(folio,  !! 1696         int ret = SWAP_AGAIN;
2569                                                  1697 
2570         /*                                       1698         /*
2571          * Note: remove_migration_ptes() cann !! 1699          * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
2572          * because that depends on page_mappe    1700          * because that depends on page_mapped(); but not all its usages
2573          * are holding mmap_lock. Users witho !! 1701          * are holding mmap_sem. Users without mmap_sem are required to
2574          * take a reference count to prevent     1702          * take a reference count to prevent the anon_vma disappearing
2575          */                                      1703          */
2576         anon_vma = folio_anon_vma(folio);     !! 1704         anon_vma = page_anon_vma(page);
2577         if (!anon_vma)                           1705         if (!anon_vma)
2578                 return NULL;                  !! 1706                 return ret;
2579                                               << 
2580         if (anon_vma_trylock_read(anon_vma))  << 
2581                 goto out;                     << 
2582                                               << 
2583         if (rwc->try_lock) {                  << 
2584                 anon_vma = NULL;              << 
2585                 rwc->contended = true;        << 
2586                 goto out;                     << 
2587         }                                     << 
2588                                               << 
2589         anon_vma_lock_read(anon_vma);            1707         anon_vma_lock_read(anon_vma);
2590 out:                                          !! 1708         anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
2591         return anon_vma;                      << 
2592 }                                             << 
2593                                               << 
2594 /*                                            << 
2595  * rmap_walk_anon - do something to anonymous << 
2596  * rmap method                                << 
2597  * @folio: the folio to be handled            << 
2598  * @rwc: control variable according to each w << 
2599  * @locked: caller holds relevant rmap lock   << 
2600  *                                            << 
2601  * Find all the mappings of a folio using the << 
2602  * chains contained in the anon_vma struct it << 
2603  */                                           << 
2604 static void rmap_walk_anon(struct folio *foli << 
2605                 struct rmap_walk_control *rwc << 
2606 {                                             << 
2607         struct anon_vma *anon_vma;            << 
2608         pgoff_t pgoff_start, pgoff_end;       << 
2609         struct anon_vma_chain *avc;           << 
2610                                               << 
2611         if (locked) {                         << 
2612                 anon_vma = folio_anon_vma(fol << 
2613                 /* anon_vma disappear under u << 
2614                 VM_BUG_ON_FOLIO(!anon_vma, fo << 
2615         } else {                              << 
2616                 anon_vma = rmap_walk_anon_loc << 
2617         }                                     << 
2618         if (!anon_vma)                        << 
2619                 return;                       << 
2620                                               << 
2621         pgoff_start = folio_pgoff(folio);     << 
2622         pgoff_end = pgoff_start + folio_nr_pa << 
2623         anon_vma_interval_tree_foreach(avc, & << 
2624                         pgoff_start, pgoff_en << 
2625                 struct vm_area_struct *vma =     1709                 struct vm_area_struct *vma = avc->vma;
2626                 unsigned long address = vma_a !! 1710                 unsigned long address = vma_address(page, vma);
2627                                 folio_nr_page !! 1711                 ret = rmap_one(page, vma, address, arg);
2628                                               !! 1712                 if (ret != SWAP_AGAIN)
2629                 VM_BUG_ON_VMA(address == -EFA << 
2630                 cond_resched();               << 
2631                                               << 
2632                 if (rwc->invalid_vma && rwc-> << 
2633                         continue;             << 
2634                                               << 
2635                 if (!rwc->rmap_one(folio, vma << 
2636                         break;                << 
2637                 if (rwc->done && rwc->done(fo << 
2638                         break;                   1713                         break;
2639         }                                        1714         }
2640                                               !! 1715         anon_vma_unlock_read(anon_vma);
2641         if (!locked)                          !! 1716         return ret;
2642                 anon_vma_unlock_read(anon_vma << 
2643 }                                                1717 }
2644                                                  1718 
2645 /*                                            !! 1719 static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
2646  * rmap_walk_file - do something to file page !! 1720                 struct vm_area_struct *, unsigned long, void *), void *arg)
2647  * @folio: the folio to be handled            << 
2648  * @rwc: control variable according to each w << 
2649  * @locked: caller holds relevant rmap lock   << 
2650  *                                            << 
2651  * Find all the mappings of a folio using the << 
2652  * contained in the address_space struct it p << 
2653  */                                           << 
2654 static void rmap_walk_file(struct folio *foli << 
2655                 struct rmap_walk_control *rwc << 
2656 {                                                1721 {
2657         struct address_space *mapping = folio !! 1722         struct address_space *mapping = page->mapping;
2658         pgoff_t pgoff_start, pgoff_end;       !! 1723         pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
2659         struct vm_area_struct *vma;              1724         struct vm_area_struct *vma;
2660                                               !! 1725         int ret = SWAP_AGAIN;
2661         /*                                    << 
2662          * The page lock not only makes sure  << 
2663          * suddenly be NULLified by truncatio << 
2664          * structure at mapping cannot be fre << 
2665          * so we can safely take mapping->i_m << 
2666          */                                   << 
2667         VM_BUG_ON_FOLIO(!folio_test_locked(fo << 
2668                                                  1726 
2669         if (!mapping)                            1727         if (!mapping)
2670                 return;                       !! 1728                 return ret;
2671                                               !! 1729         mutex_lock(&mapping->i_mmap_mutex);
2672         pgoff_start = folio_pgoff(folio);     !! 1730         vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
2673         pgoff_end = pgoff_start + folio_nr_pa !! 1731                 unsigned long address = vma_address(page, vma);
2674         if (!locked) {                        !! 1732                 ret = rmap_one(page, vma, address, arg);
2675                 if (i_mmap_trylock_read(mappi !! 1733                 if (ret != SWAP_AGAIN)
2676                         goto lookup;          !! 1734                         break;
2677                                               << 
2678                 if (rwc->try_lock) {          << 
2679                         rwc->contended = true << 
2680                         return;               << 
2681                 }                             << 
2682                                               << 
2683                 i_mmap_lock_read(mapping);    << 
2684         }                                     << 
2685 lookup:                                       << 
2686         vma_interval_tree_foreach(vma, &mappi << 
2687                         pgoff_start, pgoff_en << 
2688                 unsigned long address = vma_a << 
2689                                folio_nr_pages << 
2690                                               << 
2691                 VM_BUG_ON_VMA(address == -EFA << 
2692                 cond_resched();               << 
2693                                               << 
2694                 if (rwc->invalid_vma && rwc-> << 
2695                         continue;             << 
2696                                               << 
2697                 if (!rwc->rmap_one(folio, vma << 
2698                         goto done;            << 
2699                 if (rwc->done && rwc->done(fo << 
2700                         goto done;            << 
2701         }                                        1735         }
2702                                               !! 1736         /*
2703 done:                                         !! 1737          * No nonlinear handling: being always shared, nonlinear vmas
2704         if (!locked)                          !! 1738          * never contain migration ptes.  Decide what to do about this
2705                 i_mmap_unlock_read(mapping);  !! 1739          * limitation to linear when we need rmap_walk() on nonlinear.
                                                   >> 1740          */
                                                   >> 1741         mutex_unlock(&mapping->i_mmap_mutex);
                                                   >> 1742         return ret;
2706 }                                                1743 }
2707                                                  1744 
2708 void rmap_walk(struct folio *folio, struct rm !! 1745 int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
                                                   >> 1746                 struct vm_area_struct *, unsigned long, void *), void *arg)
2709 {                                                1747 {
2710         if (unlikely(folio_test_ksm(folio)))  !! 1748         VM_BUG_ON(!PageLocked(page));
2711                 rmap_walk_ksm(folio, rwc);    << 
2712         else if (folio_test_anon(folio))      << 
2713                 rmap_walk_anon(folio, rwc, fa << 
2714         else                                  << 
2715                 rmap_walk_file(folio, rwc, fa << 
2716 }                                             << 
2717                                                  1749 
2718 /* Like rmap_walk, but caller holds relevant  !! 1750         if (unlikely(PageKsm(page)))
2719 void rmap_walk_locked(struct folio *folio, st !! 1751                 return rmap_walk_ksm(page, rmap_one, arg);
2720 {                                             !! 1752         else if (PageAnon(page))
2721         /* no ksm support for now */          !! 1753                 return rmap_walk_anon(page, rmap_one, arg);
2722         VM_BUG_ON_FOLIO(folio_test_ksm(folio) << 
2723         if (folio_test_anon(folio))           << 
2724                 rmap_walk_anon(folio, rwc, tr << 
2725         else                                     1754         else
2726                 rmap_walk_file(folio, rwc, tr !! 1755                 return rmap_walk_file(page, rmap_one, arg);
2727 }                                                1756 }
                                                   >> 1757 #endif /* CONFIG_MIGRATION */
2728                                                  1758 
2729 #ifdef CONFIG_HUGETLB_PAGE                       1759 #ifdef CONFIG_HUGETLB_PAGE
2730 /*                                               1760 /*
2731  * The following two functions are for anonym !! 1761  * The following three functions are for anonymous (private mapped) hugepages.
2732  * Unlike common anonymous pages, anonymous h    1762  * Unlike common anonymous pages, anonymous hugepages have no accounting code
2733  * and no lru code, because we handle hugepag    1763  * and no lru code, because we handle hugepages differently from common pages.
2734  */                                              1764  */
2735 void hugetlb_add_anon_rmap(struct folio *foli !! 1765 static void __hugepage_set_anon_rmap(struct page *page,
2736                 unsigned long address, rmap_t !! 1766         struct vm_area_struct *vma, unsigned long address, int exclusive)
2737 {                                                1767 {
2738         VM_WARN_ON_FOLIO(!folio_test_hugetlb( !! 1768         struct anon_vma *anon_vma = vma->anon_vma;
2739         VM_WARN_ON_FOLIO(!folio_test_anon(fol !! 1769 
                                                   >> 1770         BUG_ON(!anon_vma);
2740                                                  1771 
2741         atomic_inc(&folio->_entire_mapcount); !! 1772         if (PageAnon(page))
2742         atomic_inc(&folio->_large_mapcount);  !! 1773                 return;
2743         if (flags & RMAP_EXCLUSIVE)           !! 1774         if (!exclusive)
2744                 SetPageAnonExclusive(&folio-> !! 1775                 anon_vma = anon_vma->root;
2745         VM_WARN_ON_FOLIO(folio_entire_mapcoun !! 1776 
2746                          PageAnonExclusive(&f !! 1777         anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
                                                   >> 1778         page->mapping = (struct address_space *) anon_vma;
                                                   >> 1779         page->index = linear_page_index(vma, address);
2747 }                                                1780 }
2748                                                  1781 
2749 void hugetlb_add_new_anon_rmap(struct folio * !! 1782 void hugepage_add_anon_rmap(struct page *page,
2750                 struct vm_area_struct *vma, u !! 1783                             struct vm_area_struct *vma, unsigned long address)
2751 {                                                1784 {
2752         VM_WARN_ON_FOLIO(!folio_test_hugetlb( !! 1785         struct anon_vma *anon_vma = vma->anon_vma;
                                                   >> 1786         int first;
2753                                                  1787 
                                                   >> 1788         BUG_ON(!PageLocked(page));
                                                   >> 1789         BUG_ON(!anon_vma);
                                                   >> 1790         /* address might be in next vma when migration races vma_adjust */
                                                   >> 1791         first = atomic_inc_and_test(&page->_mapcount);
                                                   >> 1792         if (first)
                                                   >> 1793                 __hugepage_set_anon_rmap(page, vma, address, 0);
                                                   >> 1794 }
                                                   >> 1795 
                                                   >> 1796 void hugepage_add_new_anon_rmap(struct page *page,
                                                   >> 1797                         struct vm_area_struct *vma, unsigned long address)
                                                   >> 1798 {
2754         BUG_ON(address < vma->vm_start || add    1799         BUG_ON(address < vma->vm_start || address >= vma->vm_end);
2755         /* increment count (starts at -1) */  !! 1800         atomic_set(&page->_mapcount, 0);
2756         atomic_set(&folio->_entire_mapcount,  !! 1801         __hugepage_set_anon_rmap(page, vma, address, 1);
2757         atomic_set(&folio->_large_mapcount, 0 << 
2758         folio_clear_hugetlb_restore_reserve(f << 
2759         __folio_set_anon(folio, vma, address, << 
2760         SetPageAnonExclusive(&folio->page);   << 
2761 }                                                1802 }
2762 #endif /* CONFIG_HUGETLB_PAGE */                 1803 #endif /* CONFIG_HUGETLB_PAGE */
2763                                                  1804 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php