1 /* SPDX-License-Identifier: GPL-2.0 */ 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _LINUX_RMAP_H 2 #ifndef _LINUX_RMAP_H 3 #define _LINUX_RMAP_H 3 #define _LINUX_RMAP_H 4 /* 4 /* 5 * Declarations for Reverse Mapping functions 5 * Declarations for Reverse Mapping functions in mm/rmap.c 6 */ 6 */ 7 7 8 #include <linux/list.h> 8 #include <linux/list.h> 9 #include <linux/slab.h> 9 #include <linux/slab.h> 10 #include <linux/mm.h> 10 #include <linux/mm.h> 11 #include <linux/rwsem.h> 11 #include <linux/rwsem.h> 12 #include <linux/memcontrol.h> 12 #include <linux/memcontrol.h> 13 #include <linux/highmem.h> 13 #include <linux/highmem.h> 14 #include <linux/pagemap.h> 14 #include <linux/pagemap.h> 15 #include <linux/memremap.h> 15 #include <linux/memremap.h> 16 16 17 /* 17 /* 18 * The anon_vma heads a list of private "relat 18 * The anon_vma heads a list of private "related" vmas, to scan if 19 * an anonymous page pointing to this anon_vma 19 * an anonymous page pointing to this anon_vma needs to be unmapped: 20 * the vmas on the list will be related by for 20 * the vmas on the list will be related by forking, or by splitting. 21 * 21 * 22 * Since vmas come and go as they are split an 22 * Since vmas come and go as they are split and merged (particularly 23 * in mprotect), the mapping field of an anony 23 * in mprotect), the mapping field of an anonymous page cannot point 24 * directly to a vma: instead it points to an 24 * directly to a vma: instead it points to an anon_vma, on whose list 25 * the related vmas can be easily linked or un 25 * the related vmas can be easily linked or unlinked. 26 * 26 * 27 * After unlinking the last vma on the list, w 27 * After unlinking the last vma on the list, we must garbage collect 28 * the anon_vma object itself: we're guarantee 28 * the anon_vma object itself: we're guaranteed no page can be 29 * pointing to this anon_vma once its vma list 29 * pointing to this anon_vma once its vma list is empty. 30 */ 30 */ 31 struct anon_vma { 31 struct anon_vma { 32 struct anon_vma *root; /* Roo 32 struct anon_vma *root; /* Root of this anon_vma tree */ 33 struct rw_semaphore rwsem; /* W: 33 struct rw_semaphore rwsem; /* W: modification, R: walking the list */ 34 /* 34 /* 35 * The refcount is taken on an anon_vm 35 * The refcount is taken on an anon_vma when there is no 36 * guarantee that the vma of page tabl 36 * guarantee that the vma of page tables will exist for 37 * the duration of the operation. A ca 37 * the duration of the operation. A caller that takes 38 * the reference is responsible for cl 38 * the reference is responsible for clearing up the 39 * anon_vma if they are the last user 39 * anon_vma if they are the last user on release 40 */ 40 */ 41 atomic_t refcount; 41 atomic_t refcount; 42 42 43 /* 43 /* 44 * Count of child anon_vmas. Equals to 44 * Count of child anon_vmas. Equals to the count of all anon_vmas that 45 * have ->parent pointing to this one, 45 * have ->parent pointing to this one, including itself. 46 * 46 * 47 * This counter is used for making dec 47 * This counter is used for making decision about reusing anon_vma 48 * instead of forking new one. See com 48 * instead of forking new one. See comments in function anon_vma_clone. 49 */ 49 */ 50 unsigned long num_children; 50 unsigned long num_children; 51 /* Count of VMAs whose ->anon_vma poin 51 /* Count of VMAs whose ->anon_vma pointer points to this object. */ 52 unsigned long num_active_vmas; 52 unsigned long num_active_vmas; 53 53 54 struct anon_vma *parent; /* Par 54 struct anon_vma *parent; /* Parent of this anon_vma */ 55 55 56 /* 56 /* 57 * NOTE: the LSB of the rb_root.rb_nod 57 * NOTE: the LSB of the rb_root.rb_node is set by 58 * mm_take_all_locks() _after_ taking 58 * mm_take_all_locks() _after_ taking the above lock. So the 59 * rb_root must only be read/written a 59 * rb_root must only be read/written after taking the above lock 60 * to be sure to see a valid next poin 60 * to be sure to see a valid next pointer. The LSB bit itself 61 * is serialized by a system wide lock 61 * is serialized by a system wide lock only visible to 62 * mm_take_all_locks() (mm_all_locks_m 62 * mm_take_all_locks() (mm_all_locks_mutex). 63 */ 63 */ 64 64 65 /* Interval tree of private "related" 65 /* Interval tree of private "related" vmas */ 66 struct rb_root_cached rb_root; 66 struct rb_root_cached rb_root; 67 }; 67 }; 68 68 69 /* 69 /* 70 * The copy-on-write semantics of fork mean th 70 * The copy-on-write semantics of fork mean that an anon_vma 71 * can become associated with multiple process 71 * can become associated with multiple processes. Furthermore, 72 * each child process will have its own anon_v 72 * each child process will have its own anon_vma, where new 73 * pages for that process are instantiated. 73 * pages for that process are instantiated. 74 * 74 * 75 * This structure allows us to find the anon_v 75 * This structure allows us to find the anon_vmas associated 76 * with a VMA, or the VMAs associated with an 76 * with a VMA, or the VMAs associated with an anon_vma. 77 * The "same_vma" list contains the anon_vma_c 77 * The "same_vma" list contains the anon_vma_chains linking 78 * all the anon_vmas associated with this VMA. 78 * all the anon_vmas associated with this VMA. 79 * The "rb" field indexes on an interval tree 79 * The "rb" field indexes on an interval tree the anon_vma_chains 80 * which link all the VMAs associated with thi 80 * which link all the VMAs associated with this anon_vma. 81 */ 81 */ 82 struct anon_vma_chain { 82 struct anon_vma_chain { 83 struct vm_area_struct *vma; 83 struct vm_area_struct *vma; 84 struct anon_vma *anon_vma; 84 struct anon_vma *anon_vma; 85 struct list_head same_vma; /* locked 85 struct list_head same_vma; /* locked by mmap_lock & page_table_lock */ 86 struct rb_node rb; 86 struct rb_node rb; /* locked by anon_vma->rwsem */ 87 unsigned long rb_subtree_last; 87 unsigned long rb_subtree_last; 88 #ifdef CONFIG_DEBUG_VM_RB 88 #ifdef CONFIG_DEBUG_VM_RB 89 unsigned long cached_vma_start, cached 89 unsigned long cached_vma_start, cached_vma_last; 90 #endif 90 #endif 91 }; 91 }; 92 92 93 enum ttu_flags { 93 enum ttu_flags { 94 TTU_SPLIT_HUGE_PMD = 0x4, /* spl 94 TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */ 95 TTU_IGNORE_MLOCK = 0x8, /* ign 95 TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */ 96 TTU_SYNC = 0x10, /* avo 96 TTU_SYNC = 0x10, /* avoid racy checks with PVMW_SYNC */ 97 TTU_HWPOISON = 0x20, /* do 97 TTU_HWPOISON = 0x20, /* do convert pte to hwpoison entry */ 98 TTU_BATCH_FLUSH = 0x40, /* Bat 98 TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible 99 * and 99 * and caller guarantees they will 100 * do 100 * do a final flush if necessary */ 101 TTU_RMAP_LOCKED = 0x80, /* do 101 TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock: 102 * cal 102 * caller holds it */ 103 }; 103 }; 104 104 105 #ifdef CONFIG_MMU 105 #ifdef CONFIG_MMU 106 static inline void get_anon_vma(struct anon_vm 106 static inline void get_anon_vma(struct anon_vma *anon_vma) 107 { 107 { 108 atomic_inc(&anon_vma->refcount); 108 atomic_inc(&anon_vma->refcount); 109 } 109 } 110 110 111 void __put_anon_vma(struct anon_vma *anon_vma) 111 void __put_anon_vma(struct anon_vma *anon_vma); 112 112 113 static inline void put_anon_vma(struct anon_vm 113 static inline void put_anon_vma(struct anon_vma *anon_vma) 114 { 114 { 115 if (atomic_dec_and_test(&anon_vma->ref 115 if (atomic_dec_and_test(&anon_vma->refcount)) 116 __put_anon_vma(anon_vma); 116 __put_anon_vma(anon_vma); 117 } 117 } 118 118 119 static inline void anon_vma_lock_write(struct 119 static inline void anon_vma_lock_write(struct anon_vma *anon_vma) 120 { 120 { 121 down_write(&anon_vma->root->rwsem); 121 down_write(&anon_vma->root->rwsem); 122 } 122 } 123 123 124 static inline int anon_vma_trylock_write(struc 124 static inline int anon_vma_trylock_write(struct anon_vma *anon_vma) 125 { 125 { 126 return down_write_trylock(&anon_vma->r 126 return down_write_trylock(&anon_vma->root->rwsem); 127 } 127 } 128 128 129 static inline void anon_vma_unlock_write(struc 129 static inline void anon_vma_unlock_write(struct anon_vma *anon_vma) 130 { 130 { 131 up_write(&anon_vma->root->rwsem); 131 up_write(&anon_vma->root->rwsem); 132 } 132 } 133 133 134 static inline void anon_vma_lock_read(struct a 134 static inline void anon_vma_lock_read(struct anon_vma *anon_vma) 135 { 135 { 136 down_read(&anon_vma->root->rwsem); 136 down_read(&anon_vma->root->rwsem); 137 } 137 } 138 138 139 static inline int anon_vma_trylock_read(struct 139 static inline int anon_vma_trylock_read(struct anon_vma *anon_vma) 140 { 140 { 141 return down_read_trylock(&anon_vma->ro 141 return down_read_trylock(&anon_vma->root->rwsem); 142 } 142 } 143 143 144 static inline void anon_vma_unlock_read(struct 144 static inline void anon_vma_unlock_read(struct anon_vma *anon_vma) 145 { 145 { 146 up_read(&anon_vma->root->rwsem); 146 up_read(&anon_vma->root->rwsem); 147 } 147 } 148 148 149 149 150 /* 150 /* 151 * anon_vma helper functions. 151 * anon_vma helper functions. 152 */ 152 */ 153 void anon_vma_init(void); /* create anon 153 void anon_vma_init(void); /* create anon_vma_cachep */ 154 int __anon_vma_prepare(struct vm_area_struct 154 int __anon_vma_prepare(struct vm_area_struct *); 155 void unlink_anon_vmas(struct vm_area_struct *) 155 void unlink_anon_vmas(struct vm_area_struct *); 156 int anon_vma_clone(struct vm_area_struct *, st 156 int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *); 157 int anon_vma_fork(struct vm_area_struct *, str 157 int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); 158 158 159 static inline int anon_vma_prepare(struct vm_a 159 static inline int anon_vma_prepare(struct vm_area_struct *vma) 160 { 160 { 161 if (likely(vma->anon_vma)) 161 if (likely(vma->anon_vma)) 162 return 0; 162 return 0; 163 163 164 return __anon_vma_prepare(vma); 164 return __anon_vma_prepare(vma); 165 } 165 } 166 166 167 static inline void anon_vma_merge(struct vm_ar 167 static inline void anon_vma_merge(struct vm_area_struct *vma, 168 struct vm_ar 168 struct vm_area_struct *next) 169 { 169 { 170 VM_BUG_ON_VMA(vma->anon_vma != next->a 170 VM_BUG_ON_VMA(vma->anon_vma != next->anon_vma, vma); 171 unlink_anon_vmas(next); 171 unlink_anon_vmas(next); 172 } 172 } 173 173 174 struct anon_vma *folio_get_anon_vma(struct fol 174 struct anon_vma *folio_get_anon_vma(struct folio *folio); 175 175 176 /* RMAP flags, currently only relevant for som 176 /* RMAP flags, currently only relevant for some anon rmap operations. */ 177 typedef int __bitwise rmap_t; 177 typedef int __bitwise rmap_t; 178 178 179 /* 179 /* 180 * No special request: A mapped anonymous (sub 180 * No special request: A mapped anonymous (sub)page is possibly shared between 181 * processes. 181 * processes. 182 */ 182 */ 183 #define RMAP_NONE ((__force rmap 183 #define RMAP_NONE ((__force rmap_t)0) 184 184 185 /* The anonymous (sub)page is exclusive to a s 185 /* The anonymous (sub)page is exclusive to a single process. */ 186 #define RMAP_EXCLUSIVE ((__force rmap 186 #define RMAP_EXCLUSIVE ((__force rmap_t)BIT(0)) 187 187 188 /* 188 /* 189 * Internally, we're using an enum to specify 189 * Internally, we're using an enum to specify the granularity. We make the 190 * compiler emit specialized code for each gra 190 * compiler emit specialized code for each granularity. 191 */ 191 */ 192 enum rmap_level { 192 enum rmap_level { 193 RMAP_LEVEL_PTE = 0, 193 RMAP_LEVEL_PTE = 0, 194 RMAP_LEVEL_PMD, 194 RMAP_LEVEL_PMD, 195 }; 195 }; 196 196 197 static inline void __folio_rmap_sanity_checks( 197 static inline void __folio_rmap_sanity_checks(struct folio *folio, 198 struct page *page, int nr_page 198 struct page *page, int nr_pages, enum rmap_level level) 199 { 199 { 200 /* hugetlb folios are handled separate 200 /* hugetlb folios are handled separately. */ 201 VM_WARN_ON_FOLIO(folio_test_hugetlb(fo 201 VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); 202 202 203 /* When (un)mapping zeropages, we shou 203 /* When (un)mapping zeropages, we should never touch ref+mapcount. */ 204 VM_WARN_ON_FOLIO(is_zero_folio(folio), 204 VM_WARN_ON_FOLIO(is_zero_folio(folio), folio); 205 205 206 /* 206 /* 207 * TODO: we get driver-allocated folio 207 * TODO: we get driver-allocated folios that have nothing to do with 208 * the rmap using vm_insert_page(); th 208 * the rmap using vm_insert_page(); therefore, we cannot assume that 209 * folio_test_large_rmappable() holds 209 * folio_test_large_rmappable() holds for large folios. We should 210 * handle any desired mapcount+stats a 210 * handle any desired mapcount+stats accounting for these folios in 211 * VM_MIXEDMAP VMAs separately, and th 211 * VM_MIXEDMAP VMAs separately, and then sanity-check here that 212 * we really only get rmappable folios 212 * we really only get rmappable folios. 213 */ 213 */ 214 214 215 VM_WARN_ON_ONCE(nr_pages <= 0); 215 VM_WARN_ON_ONCE(nr_pages <= 0); 216 VM_WARN_ON_FOLIO(page_folio(page) != f 216 VM_WARN_ON_FOLIO(page_folio(page) != folio, folio); 217 VM_WARN_ON_FOLIO(page_folio(page + nr_ 217 VM_WARN_ON_FOLIO(page_folio(page + nr_pages - 1) != folio, folio); 218 218 219 switch (level) { 219 switch (level) { 220 case RMAP_LEVEL_PTE: 220 case RMAP_LEVEL_PTE: 221 break; 221 break; 222 case RMAP_LEVEL_PMD: 222 case RMAP_LEVEL_PMD: 223 /* 223 /* 224 * We don't support folios lar 224 * We don't support folios larger than a single PMD yet. So 225 * when RMAP_LEVEL_PMD is set, 225 * when RMAP_LEVEL_PMD is set, we assume that we are creating 226 * a single "entire" mapping o 226 * a single "entire" mapping of the folio. 227 */ 227 */ 228 VM_WARN_ON_FOLIO(folio_nr_page 228 VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio); 229 VM_WARN_ON_FOLIO(nr_pages != H 229 VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio); 230 break; 230 break; 231 default: 231 default: 232 VM_WARN_ON_ONCE(true); 232 VM_WARN_ON_ONCE(true); 233 } 233 } 234 } 234 } 235 235 236 /* 236 /* 237 * rmap interfaces called when adding or remov 237 * rmap interfaces called when adding or removing pte of page 238 */ 238 */ 239 void folio_move_anon_rmap(struct folio *, stru 239 void folio_move_anon_rmap(struct folio *, struct vm_area_struct *); 240 void folio_add_anon_rmap_ptes(struct folio *, 240 void folio_add_anon_rmap_ptes(struct folio *, struct page *, int nr_pages, 241 struct vm_area_struct *, unsig 241 struct vm_area_struct *, unsigned long address, rmap_t flags); 242 #define folio_add_anon_rmap_pte(folio, page, v 242 #define folio_add_anon_rmap_pte(folio, page, vma, address, flags) \ 243 folio_add_anon_rmap_ptes(folio, page, 243 folio_add_anon_rmap_ptes(folio, page, 1, vma, address, flags) 244 void folio_add_anon_rmap_pmd(struct folio *, s 244 void folio_add_anon_rmap_pmd(struct folio *, struct page *, 245 struct vm_area_struct *, unsig 245 struct vm_area_struct *, unsigned long address, rmap_t flags); 246 void folio_add_new_anon_rmap(struct folio *, s 246 void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *, 247 unsigned long address, rmap_t 247 unsigned long address, rmap_t flags); 248 void folio_add_file_rmap_ptes(struct folio *, 248 void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages, 249 struct vm_area_struct *); 249 struct vm_area_struct *); 250 #define folio_add_file_rmap_pte(folio, page, v 250 #define folio_add_file_rmap_pte(folio, page, vma) \ 251 folio_add_file_rmap_ptes(folio, page, 251 folio_add_file_rmap_ptes(folio, page, 1, vma) 252 void folio_add_file_rmap_pmd(struct folio *, s 252 void folio_add_file_rmap_pmd(struct folio *, struct page *, 253 struct vm_area_struct *); 253 struct vm_area_struct *); 254 void folio_remove_rmap_ptes(struct folio *, st 254 void folio_remove_rmap_ptes(struct folio *, struct page *, int nr_pages, 255 struct vm_area_struct *); 255 struct vm_area_struct *); 256 #define folio_remove_rmap_pte(folio, page, vma 256 #define folio_remove_rmap_pte(folio, page, vma) \ 257 folio_remove_rmap_ptes(folio, page, 1, 257 folio_remove_rmap_ptes(folio, page, 1, vma) 258 void folio_remove_rmap_pmd(struct folio *, str 258 void folio_remove_rmap_pmd(struct folio *, struct page *, 259 struct vm_area_struct *); 259 struct vm_area_struct *); 260 260 261 void hugetlb_add_anon_rmap(struct folio *, str 261 void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *, 262 unsigned long address, rmap_t 262 unsigned long address, rmap_t flags); 263 void hugetlb_add_new_anon_rmap(struct folio *, 263 void hugetlb_add_new_anon_rmap(struct folio *, struct vm_area_struct *, 264 unsigned long address); 264 unsigned long address); 265 265 266 /* See folio_try_dup_anon_rmap_*() */ 266 /* See folio_try_dup_anon_rmap_*() */ 267 static inline int hugetlb_try_dup_anon_rmap(st 267 static inline int hugetlb_try_dup_anon_rmap(struct folio *folio, 268 struct vm_area_struct *vma) 268 struct vm_area_struct *vma) 269 { 269 { 270 VM_WARN_ON_FOLIO(!folio_test_hugetlb(f 270 VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); 271 VM_WARN_ON_FOLIO(!folio_test_anon(foli 271 VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); 272 272 273 if (PageAnonExclusive(&folio->page)) { 273 if (PageAnonExclusive(&folio->page)) { 274 if (unlikely(folio_needs_cow_f 274 if (unlikely(folio_needs_cow_for_dma(vma, folio))) 275 return -EBUSY; 275 return -EBUSY; 276 ClearPageAnonExclusive(&folio- 276 ClearPageAnonExclusive(&folio->page); 277 } 277 } 278 atomic_inc(&folio->_entire_mapcount); 278 atomic_inc(&folio->_entire_mapcount); 279 atomic_inc(&folio->_large_mapcount); 279 atomic_inc(&folio->_large_mapcount); 280 return 0; 280 return 0; 281 } 281 } 282 282 283 /* See folio_try_share_anon_rmap_*() */ 283 /* See folio_try_share_anon_rmap_*() */ 284 static inline int hugetlb_try_share_anon_rmap( 284 static inline int hugetlb_try_share_anon_rmap(struct folio *folio) 285 { 285 { 286 VM_WARN_ON_FOLIO(!folio_test_hugetlb(f 286 VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); 287 VM_WARN_ON_FOLIO(!folio_test_anon(foli 287 VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); 288 VM_WARN_ON_FOLIO(!PageAnonExclusive(&f 288 VM_WARN_ON_FOLIO(!PageAnonExclusive(&folio->page), folio); 289 289 290 /* Paired with the memory barrier in t 290 /* Paired with the memory barrier in try_grab_folio(). */ 291 if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) 291 if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) 292 smp_mb(); 292 smp_mb(); 293 293 294 if (unlikely(folio_maybe_dma_pinned(fo 294 if (unlikely(folio_maybe_dma_pinned(folio))) 295 return -EBUSY; 295 return -EBUSY; 296 ClearPageAnonExclusive(&folio->page); 296 ClearPageAnonExclusive(&folio->page); 297 297 298 /* 298 /* 299 * This is conceptually a smp_wmb() pa 299 * This is conceptually a smp_wmb() paired with the smp_rmb() in 300 * gup_must_unshare(). 300 * gup_must_unshare(). 301 */ 301 */ 302 if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) 302 if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) 303 smp_mb__after_atomic(); 303 smp_mb__after_atomic(); 304 return 0; 304 return 0; 305 } 305 } 306 306 307 static inline void hugetlb_add_file_rmap(struc 307 static inline void hugetlb_add_file_rmap(struct folio *folio) 308 { 308 { 309 VM_WARN_ON_FOLIO(!folio_test_hugetlb(f 309 VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); 310 VM_WARN_ON_FOLIO(folio_test_anon(folio 310 VM_WARN_ON_FOLIO(folio_test_anon(folio), folio); 311 311 312 atomic_inc(&folio->_entire_mapcount); 312 atomic_inc(&folio->_entire_mapcount); 313 atomic_inc(&folio->_large_mapcount); 313 atomic_inc(&folio->_large_mapcount); 314 } 314 } 315 315 316 static inline void hugetlb_remove_rmap(struct 316 static inline void hugetlb_remove_rmap(struct folio *folio) 317 { 317 { 318 VM_WARN_ON_FOLIO(!folio_test_hugetlb(f 318 VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); 319 319 320 atomic_dec(&folio->_entire_mapcount); 320 atomic_dec(&folio->_entire_mapcount); 321 atomic_dec(&folio->_large_mapcount); 321 atomic_dec(&folio->_large_mapcount); 322 } 322 } 323 323 324 static __always_inline void __folio_dup_file_r 324 static __always_inline void __folio_dup_file_rmap(struct folio *folio, 325 struct page *page, int nr_page 325 struct page *page, int nr_pages, enum rmap_level level) 326 { 326 { 327 const int orig_nr_pages = nr_pages; 327 const int orig_nr_pages = nr_pages; 328 328 329 __folio_rmap_sanity_checks(folio, page 329 __folio_rmap_sanity_checks(folio, page, nr_pages, level); 330 330 331 switch (level) { 331 switch (level) { 332 case RMAP_LEVEL_PTE: 332 case RMAP_LEVEL_PTE: 333 if (!folio_test_large(folio)) 333 if (!folio_test_large(folio)) { 334 atomic_inc(&folio->_ma 334 atomic_inc(&folio->_mapcount); 335 break; 335 break; 336 } 336 } 337 337 338 do { 338 do { 339 atomic_inc(&page->_map 339 atomic_inc(&page->_mapcount); 340 } while (page++, --nr_pages > 340 } while (page++, --nr_pages > 0); 341 atomic_add(orig_nr_pages, &fol 341 atomic_add(orig_nr_pages, &folio->_large_mapcount); 342 break; 342 break; 343 case RMAP_LEVEL_PMD: 343 case RMAP_LEVEL_PMD: 344 atomic_inc(&folio->_entire_map 344 atomic_inc(&folio->_entire_mapcount); 345 atomic_inc(&folio->_large_mapc 345 atomic_inc(&folio->_large_mapcount); 346 break; 346 break; 347 } 347 } 348 } 348 } 349 349 350 /** 350 /** 351 * folio_dup_file_rmap_ptes - duplicate PTE ma 351 * folio_dup_file_rmap_ptes - duplicate PTE mappings of a page range of a folio 352 * @folio: The folio to duplicate the map 352 * @folio: The folio to duplicate the mappings of 353 * @page: The first page to duplicate th 353 * @page: The first page to duplicate the mappings of 354 * @nr_pages: The number of pages of which t 354 * @nr_pages: The number of pages of which the mapping will be duplicated 355 * 355 * 356 * The page range of the folio is defined by [ 356 * The page range of the folio is defined by [page, page + nr_pages) 357 * 357 * 358 * The caller needs to hold the page table loc 358 * The caller needs to hold the page table lock. 359 */ 359 */ 360 static inline void folio_dup_file_rmap_ptes(st 360 static inline void folio_dup_file_rmap_ptes(struct folio *folio, 361 struct page *page, int nr_page 361 struct page *page, int nr_pages) 362 { 362 { 363 __folio_dup_file_rmap(folio, page, nr_ 363 __folio_dup_file_rmap(folio, page, nr_pages, RMAP_LEVEL_PTE); 364 } 364 } 365 365 366 static __always_inline void folio_dup_file_rma 366 static __always_inline void folio_dup_file_rmap_pte(struct folio *folio, 367 struct page *page) 367 struct page *page) 368 { 368 { 369 __folio_dup_file_rmap(folio, page, 1, 369 __folio_dup_file_rmap(folio, page, 1, RMAP_LEVEL_PTE); 370 } 370 } 371 371 372 /** 372 /** 373 * folio_dup_file_rmap_pmd - duplicate a PMD m 373 * folio_dup_file_rmap_pmd - duplicate a PMD mapping of a page range of a folio 374 * @folio: The folio to duplicate the map 374 * @folio: The folio to duplicate the mapping of 375 * @page: The first page to duplicate th 375 * @page: The first page to duplicate the mapping of 376 * 376 * 377 * The page range of the folio is defined by [ 377 * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) 378 * 378 * 379 * The caller needs to hold the page table loc 379 * The caller needs to hold the page table lock. 380 */ 380 */ 381 static inline void folio_dup_file_rmap_pmd(str 381 static inline void folio_dup_file_rmap_pmd(struct folio *folio, 382 struct page *page) 382 struct page *page) 383 { 383 { 384 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 384 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 385 __folio_dup_file_rmap(folio, page, HPA 385 __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, RMAP_LEVEL_PTE); 386 #else 386 #else 387 WARN_ON_ONCE(true); 387 WARN_ON_ONCE(true); 388 #endif 388 #endif 389 } 389 } 390 390 391 static __always_inline int __folio_try_dup_ano 391 static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, 392 struct page *page, int nr_page 392 struct page *page, int nr_pages, struct vm_area_struct *src_vma, 393 enum rmap_level level) 393 enum rmap_level level) 394 { 394 { 395 const int orig_nr_pages = nr_pages; 395 const int orig_nr_pages = nr_pages; 396 bool maybe_pinned; 396 bool maybe_pinned; 397 int i; 397 int i; 398 398 399 VM_WARN_ON_FOLIO(!folio_test_anon(foli 399 VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); 400 __folio_rmap_sanity_checks(folio, page 400 __folio_rmap_sanity_checks(folio, page, nr_pages, level); 401 401 402 /* 402 /* 403 * If this folio may have been pinned 403 * If this folio may have been pinned by the parent process, 404 * don't allow to duplicate the mappin 404 * don't allow to duplicate the mappings but instead require to e.g., 405 * copy the subpage immediately for th 405 * copy the subpage immediately for the child so that we'll always 406 * guarantee the pinned folio won't be 406 * guarantee the pinned folio won't be randomly replaced in the 407 * future on write faults. 407 * future on write faults. 408 */ 408 */ 409 maybe_pinned = likely(!folio_is_device 409 maybe_pinned = likely(!folio_is_device_private(folio)) && 410 unlikely(folio_needs_co 410 unlikely(folio_needs_cow_for_dma(src_vma, folio)); 411 411 412 /* 412 /* 413 * No need to check+clear for already 413 * No need to check+clear for already shared PTEs/PMDs of the 414 * folio. But if any page is PageAnonE 414 * folio. But if any page is PageAnonExclusive, we must fallback to 415 * copying if the folio maybe pinned. 415 * copying if the folio maybe pinned. 416 */ 416 */ 417 switch (level) { 417 switch (level) { 418 case RMAP_LEVEL_PTE: 418 case RMAP_LEVEL_PTE: 419 if (unlikely(maybe_pinned)) { 419 if (unlikely(maybe_pinned)) { 420 for (i = 0; i < nr_pag 420 for (i = 0; i < nr_pages; i++) 421 if (PageAnonEx 421 if (PageAnonExclusive(page + i)) 422 return 422 return -EBUSY; 423 } 423 } 424 424 425 if (!folio_test_large(folio)) 425 if (!folio_test_large(folio)) { 426 if (PageAnonExclusive( 426 if (PageAnonExclusive(page)) 427 ClearPageAnonE 427 ClearPageAnonExclusive(page); 428 atomic_inc(&folio->_ma 428 atomic_inc(&folio->_mapcount); 429 break; 429 break; 430 } 430 } 431 431 432 do { 432 do { 433 if (PageAnonExclusive( 433 if (PageAnonExclusive(page)) 434 ClearPageAnonE 434 ClearPageAnonExclusive(page); 435 atomic_inc(&page->_map 435 atomic_inc(&page->_mapcount); 436 } while (page++, --nr_pages > 436 } while (page++, --nr_pages > 0); 437 atomic_add(orig_nr_pages, &fol 437 atomic_add(orig_nr_pages, &folio->_large_mapcount); 438 break; 438 break; 439 case RMAP_LEVEL_PMD: 439 case RMAP_LEVEL_PMD: 440 if (PageAnonExclusive(page)) { 440 if (PageAnonExclusive(page)) { 441 if (unlikely(maybe_pin 441 if (unlikely(maybe_pinned)) 442 return -EBUSY; 442 return -EBUSY; 443 ClearPageAnonExclusive 443 ClearPageAnonExclusive(page); 444 } 444 } 445 atomic_inc(&folio->_entire_map 445 atomic_inc(&folio->_entire_mapcount); 446 atomic_inc(&folio->_large_mapc 446 atomic_inc(&folio->_large_mapcount); 447 break; 447 break; 448 } 448 } 449 return 0; 449 return 0; 450 } 450 } 451 451 452 /** 452 /** 453 * folio_try_dup_anon_rmap_ptes - try duplicat 453 * folio_try_dup_anon_rmap_ptes - try duplicating PTE mappings of a page range 454 * of a folio 454 * of a folio 455 * @folio: The folio to duplicate the map 455 * @folio: The folio to duplicate the mappings of 456 * @page: The first page to duplicate th 456 * @page: The first page to duplicate the mappings of 457 * @nr_pages: The number of pages of which t 457 * @nr_pages: The number of pages of which the mapping will be duplicated 458 * @src_vma: The vm area from which the map 458 * @src_vma: The vm area from which the mappings are duplicated 459 * 459 * 460 * The page range of the folio is defined by [ 460 * The page range of the folio is defined by [page, page + nr_pages) 461 * 461 * 462 * The caller needs to hold the page table loc 462 * The caller needs to hold the page table lock and the 463 * vma->vma_mm->write_protect_seq. 463 * vma->vma_mm->write_protect_seq. 464 * 464 * 465 * Duplicating the mappings can only fail if t 465 * Duplicating the mappings can only fail if the folio may be pinned; device 466 * private folios cannot get pinned and conseq 466 * private folios cannot get pinned and consequently this function cannot fail 467 * for them. 467 * for them. 468 * 468 * 469 * If duplicating the mappings succeeded, the 469 * If duplicating the mappings succeeded, the duplicated PTEs have to be R/O in 470 * the parent and the child. They must *not* b 470 * the parent and the child. They must *not* be writable after this call 471 * succeeded. 471 * succeeded. 472 * 472 * 473 * Returns 0 if duplicating the mappings succe 473 * Returns 0 if duplicating the mappings succeeded. Returns -EBUSY otherwise. 474 */ 474 */ 475 static inline int folio_try_dup_anon_rmap_ptes 475 static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio, 476 struct page *page, int nr_page 476 struct page *page, int nr_pages, struct vm_area_struct *src_vma) 477 { 477 { 478 return __folio_try_dup_anon_rmap(folio 478 return __folio_try_dup_anon_rmap(folio, page, nr_pages, src_vma, 479 RMAP_ 479 RMAP_LEVEL_PTE); 480 } 480 } 481 481 482 static __always_inline int folio_try_dup_anon_ 482 static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, 483 struct page *page, struct vm_a 483 struct page *page, struct vm_area_struct *src_vma) 484 { 484 { 485 return __folio_try_dup_anon_rmap(folio 485 return __folio_try_dup_anon_rmap(folio, page, 1, src_vma, 486 RMAP_ 486 RMAP_LEVEL_PTE); 487 } 487 } 488 488 489 /** 489 /** 490 * folio_try_dup_anon_rmap_pmd - try duplicati 490 * folio_try_dup_anon_rmap_pmd - try duplicating a PMD mapping of a page range 491 * of a folio 491 * of a folio 492 * @folio: The folio to duplicate the map 492 * @folio: The folio to duplicate the mapping of 493 * @page: The first page to duplicate th 493 * @page: The first page to duplicate the mapping of 494 * @src_vma: The vm area from which the map 494 * @src_vma: The vm area from which the mapping is duplicated 495 * 495 * 496 * The page range of the folio is defined by [ 496 * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) 497 * 497 * 498 * The caller needs to hold the page table loc 498 * The caller needs to hold the page table lock and the 499 * vma->vma_mm->write_protect_seq. 499 * vma->vma_mm->write_protect_seq. 500 * 500 * 501 * Duplicating the mapping can only fail if th 501 * Duplicating the mapping can only fail if the folio may be pinned; device 502 * private folios cannot get pinned and conseq 502 * private folios cannot get pinned and consequently this function cannot fail 503 * for them. 503 * for them. 504 * 504 * 505 * If duplicating the mapping succeeds, the du 505 * If duplicating the mapping succeeds, the duplicated PMD has to be R/O in 506 * the parent and the child. They must *not* b 506 * the parent and the child. They must *not* be writable after this call 507 * succeeded. 507 * succeeded. 508 * 508 * 509 * Returns 0 if duplicating the mapping succee 509 * Returns 0 if duplicating the mapping succeeded. Returns -EBUSY otherwise. 510 */ 510 */ 511 static inline int folio_try_dup_anon_rmap_pmd( 511 static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio, 512 struct page *page, struct vm_a 512 struct page *page, struct vm_area_struct *src_vma) 513 { 513 { 514 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 514 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 515 return __folio_try_dup_anon_rmap(folio 515 return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, src_vma, 516 RMAP_ 516 RMAP_LEVEL_PMD); 517 #else 517 #else 518 WARN_ON_ONCE(true); 518 WARN_ON_ONCE(true); 519 return -EBUSY; 519 return -EBUSY; 520 #endif 520 #endif 521 } 521 } 522 522 523 static __always_inline int __folio_try_share_a 523 static __always_inline int __folio_try_share_anon_rmap(struct folio *folio, 524 struct page *page, int nr_page 524 struct page *page, int nr_pages, enum rmap_level level) 525 { 525 { 526 VM_WARN_ON_FOLIO(!folio_test_anon(foli 526 VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); 527 VM_WARN_ON_FOLIO(!PageAnonExclusive(pa 527 VM_WARN_ON_FOLIO(!PageAnonExclusive(page), folio); 528 __folio_rmap_sanity_checks(folio, page 528 __folio_rmap_sanity_checks(folio, page, nr_pages, level); 529 529 530 /* device private folios cannot get pi 530 /* device private folios cannot get pinned via GUP. */ 531 if (unlikely(folio_is_device_private(f 531 if (unlikely(folio_is_device_private(folio))) { 532 ClearPageAnonExclusive(page); 532 ClearPageAnonExclusive(page); 533 return 0; 533 return 0; 534 } 534 } 535 535 536 /* 536 /* 537 * We have to make sure that when we c 537 * We have to make sure that when we clear PageAnonExclusive, that 538 * the page is not pinned and that con 538 * the page is not pinned and that concurrent GUP-fast won't succeed in 539 * concurrently pinning the page. 539 * concurrently pinning the page. 540 * 540 * 541 * Conceptually, PageAnonExclusive cle 541 * Conceptually, PageAnonExclusive clearing consists of: 542 * (A1) Clear PTE 542 * (A1) Clear PTE 543 * (A2) Check if the page is pinned; b 543 * (A2) Check if the page is pinned; back off if so. 544 * (A3) Clear PageAnonExclusive 544 * (A3) Clear PageAnonExclusive 545 * (A4) Restore PTE (optional, but cer 545 * (A4) Restore PTE (optional, but certainly not writable) 546 * 546 * 547 * When clearing PageAnonExclusive, we 547 * When clearing PageAnonExclusive, we cannot possibly map the page 548 * writable again, because anon pages 548 * writable again, because anon pages that may be shared must never 549 * be writable. So in any case, if the 549 * be writable. So in any case, if the PTE was writable it cannot 550 * be writable anymore afterwards and 550 * be writable anymore afterwards and there would be a PTE change. Only 551 * if the PTE wasn't writable, there m 551 * if the PTE wasn't writable, there might not be a PTE change. 552 * 552 * 553 * Conceptually, GUP-fast pinning of a 553 * Conceptually, GUP-fast pinning of an anon page consists of: 554 * (B1) Read the PTE 554 * (B1) Read the PTE 555 * (B2) FOLL_WRITE: check if the PTE i 555 * (B2) FOLL_WRITE: check if the PTE is not writable; back off if so. 556 * (B3) Pin the mapped page 556 * (B3) Pin the mapped page 557 * (B4) Check if the PTE changed by re 557 * (B4) Check if the PTE changed by re-reading it; back off if so. 558 * (B5) If the original PTE is not wri 558 * (B5) If the original PTE is not writable, check if 559 * PageAnonExclusive is not set; 559 * PageAnonExclusive is not set; back off if so. 560 * 560 * 561 * If the PTE was writable, we only ha 561 * If the PTE was writable, we only have to make sure that GUP-fast 562 * observes a PTE change and properly 562 * observes a PTE change and properly backs off. 563 * 563 * 564 * If the PTE was not writable, we hav 564 * If the PTE was not writable, we have to make sure that GUP-fast either 565 * detects a (temporary) PTE change or 565 * detects a (temporary) PTE change or that PageAnonExclusive is cleared 566 * and properly backs off. 566 * and properly backs off. 567 * 567 * 568 * Consequently, when clearing PageAno 568 * Consequently, when clearing PageAnonExclusive(), we have to make 569 * sure that (A1), (A2)/(A3) and (A4) 569 * sure that (A1), (A2)/(A3) and (A4) happen in the right memory 570 * order. In GUP-fast pinning code, we 570 * order. In GUP-fast pinning code, we have to make sure that (B3),(B4) 571 * and (B5) happen in the right memory 571 * and (B5) happen in the right memory order. 572 * 572 * 573 * We assume that there might not be a 573 * We assume that there might not be a memory barrier after 574 * clearing/invalidating the PTE (A1) 574 * clearing/invalidating the PTE (A1) and before restoring the PTE (A4), 575 * so we use explicit ones here. 575 * so we use explicit ones here. 576 */ 576 */ 577 577 578 /* Paired with the memory barrier in t 578 /* Paired with the memory barrier in try_grab_folio(). */ 579 if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) 579 if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) 580 smp_mb(); 580 smp_mb(); 581 581 582 if (unlikely(folio_maybe_dma_pinned(fo 582 if (unlikely(folio_maybe_dma_pinned(folio))) 583 return -EBUSY; 583 return -EBUSY; 584 ClearPageAnonExclusive(page); 584 ClearPageAnonExclusive(page); 585 585 586 /* 586 /* 587 * This is conceptually a smp_wmb() pa 587 * This is conceptually a smp_wmb() paired with the smp_rmb() in 588 * gup_must_unshare(). 588 * gup_must_unshare(). 589 */ 589 */ 590 if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) 590 if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) 591 smp_mb__after_atomic(); 591 smp_mb__after_atomic(); 592 return 0; 592 return 0; 593 } 593 } 594 594 595 /** 595 /** 596 * folio_try_share_anon_rmap_pte - try marking 596 * folio_try_share_anon_rmap_pte - try marking an exclusive anonymous page 597 * mapped by a 597 * mapped by a PTE possibly shared to prepare 598 * for KSM or 598 * for KSM or temporary unmapping 599 * @folio: The folio to share a mapping o 599 * @folio: The folio to share a mapping of 600 * @page: The mapped exclusive page 600 * @page: The mapped exclusive page 601 * 601 * 602 * The caller needs to hold the page table loc 602 * The caller needs to hold the page table lock and has to have the page table 603 * entries cleared/invalidated. 603 * entries cleared/invalidated. 604 * 604 * 605 * This is similar to folio_try_dup_anon_rmap_ 605 * This is similar to folio_try_dup_anon_rmap_pte(), however, not used during 606 * fork() to duplicate mappings, but instead t 606 * fork() to duplicate mappings, but instead to prepare for KSM or temporarily 607 * unmapping parts of a folio (swap, migration 607 * unmapping parts of a folio (swap, migration) via folio_remove_rmap_pte(). 608 * 608 * 609 * Marking the mapped page shared can only fai 609 * Marking the mapped page shared can only fail if the folio maybe pinned; 610 * device private folios cannot get pinned and 610 * device private folios cannot get pinned and consequently this function cannot 611 * fail. 611 * fail. 612 * 612 * 613 * Returns 0 if marking the mapped page possib 613 * Returns 0 if marking the mapped page possibly shared succeeded. Returns 614 * -EBUSY otherwise. 614 * -EBUSY otherwise. 615 */ 615 */ 616 static inline int folio_try_share_anon_rmap_pt 616 static inline int folio_try_share_anon_rmap_pte(struct folio *folio, 617 struct page *page) 617 struct page *page) 618 { 618 { 619 return __folio_try_share_anon_rmap(fol 619 return __folio_try_share_anon_rmap(folio, page, 1, RMAP_LEVEL_PTE); 620 } 620 } 621 621 622 /** 622 /** 623 * folio_try_share_anon_rmap_pmd - try marking 623 * folio_try_share_anon_rmap_pmd - try marking an exclusive anonymous page 624 * range mappe 624 * range mapped by a PMD possibly shared to 625 * prepare for 625 * prepare for temporary unmapping 626 * @folio: The folio to share the mapping 626 * @folio: The folio to share the mapping of 627 * @page: The first page to share the ma 627 * @page: The first page to share the mapping of 628 * 628 * 629 * The page range of the folio is defined by [ 629 * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) 630 * 630 * 631 * The caller needs to hold the page table loc 631 * The caller needs to hold the page table lock and has to have the page table 632 * entries cleared/invalidated. 632 * entries cleared/invalidated. 633 * 633 * 634 * This is similar to folio_try_dup_anon_rmap_ 634 * This is similar to folio_try_dup_anon_rmap_pmd(), however, not used during 635 * fork() to duplicate a mapping, but instead 635 * fork() to duplicate a mapping, but instead to prepare for temporarily 636 * unmapping parts of a folio (swap, migration 636 * unmapping parts of a folio (swap, migration) via folio_remove_rmap_pmd(). 637 * 637 * 638 * Marking the mapped pages shared can only fa 638 * Marking the mapped pages shared can only fail if the folio maybe pinned; 639 * device private folios cannot get pinned and 639 * device private folios cannot get pinned and consequently this function cannot 640 * fail. 640 * fail. 641 * 641 * 642 * Returns 0 if marking the mapped pages possi 642 * Returns 0 if marking the mapped pages possibly shared succeeded. Returns 643 * -EBUSY otherwise. 643 * -EBUSY otherwise. 644 */ 644 */ 645 static inline int folio_try_share_anon_rmap_pm 645 static inline int folio_try_share_anon_rmap_pmd(struct folio *folio, 646 struct page *page) 646 struct page *page) 647 { 647 { 648 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 648 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 649 return __folio_try_share_anon_rmap(fol 649 return __folio_try_share_anon_rmap(folio, page, HPAGE_PMD_NR, 650 RMA 650 RMAP_LEVEL_PMD); 651 #else 651 #else 652 WARN_ON_ONCE(true); 652 WARN_ON_ONCE(true); 653 return -EBUSY; 653 return -EBUSY; 654 #endif 654 #endif 655 } 655 } 656 656 657 /* 657 /* 658 * Called from mm/vmscan.c to handle paging ou 658 * Called from mm/vmscan.c to handle paging out 659 */ 659 */ 660 int folio_referenced(struct folio *, int is_lo 660 int folio_referenced(struct folio *, int is_locked, 661 struct mem_cgroup *mem 661 struct mem_cgroup *memcg, unsigned long *vm_flags); 662 662 663 void try_to_migrate(struct folio *folio, enum 663 void try_to_migrate(struct folio *folio, enum ttu_flags flags); 664 void try_to_unmap(struct folio *, enum ttu_fla 664 void try_to_unmap(struct folio *, enum ttu_flags flags); 665 665 666 int make_device_exclusive_range(struct mm_stru 666 int make_device_exclusive_range(struct mm_struct *mm, unsigned long start, 667 unsigned long 667 unsigned long end, struct page **pages, 668 void *arg); 668 void *arg); 669 669 670 /* Avoid racy checks */ 670 /* Avoid racy checks */ 671 #define PVMW_SYNC (1 << 0) 671 #define PVMW_SYNC (1 << 0) 672 /* Look for migration entries rather than pres 672 /* Look for migration entries rather than present PTEs */ 673 #define PVMW_MIGRATION (1 << 1) 673 #define PVMW_MIGRATION (1 << 1) 674 674 675 struct page_vma_mapped_walk { 675 struct page_vma_mapped_walk { 676 unsigned long pfn; 676 unsigned long pfn; 677 unsigned long nr_pages; 677 unsigned long nr_pages; 678 pgoff_t pgoff; 678 pgoff_t pgoff; 679 struct vm_area_struct *vma; 679 struct vm_area_struct *vma; 680 unsigned long address; 680 unsigned long address; 681 pmd_t *pmd; 681 pmd_t *pmd; 682 pte_t *pte; 682 pte_t *pte; 683 spinlock_t *ptl; 683 spinlock_t *ptl; 684 unsigned int flags; 684 unsigned int flags; 685 }; 685 }; 686 686 687 #define DEFINE_FOLIO_VMA_WALK(name, _folio, _v 687 #define DEFINE_FOLIO_VMA_WALK(name, _folio, _vma, _address, _flags) \ 688 struct page_vma_mapped_walk name = { 688 struct page_vma_mapped_walk name = { \ 689 .pfn = folio_pfn(_folio), 689 .pfn = folio_pfn(_folio), \ 690 .nr_pages = folio_nr_pages(_fo 690 .nr_pages = folio_nr_pages(_folio), \ 691 .pgoff = folio_pgoff(_folio), 691 .pgoff = folio_pgoff(_folio), \ 692 .vma = _vma, 692 .vma = _vma, \ 693 .address = _address, 693 .address = _address, \ 694 .flags = _flags, 694 .flags = _flags, \ 695 } 695 } 696 696 697 static inline void page_vma_mapped_walk_done(s 697 static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw) 698 { 698 { 699 /* HugeTLB pte is set to the relevant 699 /* HugeTLB pte is set to the relevant page table entry without pte_mapped. */ 700 if (pvmw->pte && !is_vm_hugetlb_page(p 700 if (pvmw->pte && !is_vm_hugetlb_page(pvmw->vma)) 701 pte_unmap(pvmw->pte); 701 pte_unmap(pvmw->pte); 702 if (pvmw->ptl) 702 if (pvmw->ptl) 703 spin_unlock(pvmw->ptl); 703 spin_unlock(pvmw->ptl); 704 } 704 } 705 705 706 /** 706 /** 707 * page_vma_mapped_walk_restart - Restart the 707 * page_vma_mapped_walk_restart - Restart the page table walk. 708 * @pvmw: Pointer to struct page_vma_mapped_wa 708 * @pvmw: Pointer to struct page_vma_mapped_walk. 709 * 709 * 710 * It restarts the page table walk when change 710 * It restarts the page table walk when changes occur in the page 711 * table, such as splitting a PMD. Ensures tha 711 * table, such as splitting a PMD. Ensures that the PTL held during 712 * the previous walk is released and resets th 712 * the previous walk is released and resets the state to allow for 713 * a new walk starting at the current address 713 * a new walk starting at the current address stored in pvmw->address. 714 */ 714 */ 715 static inline void 715 static inline void 716 page_vma_mapped_walk_restart(struct page_vma_m 716 page_vma_mapped_walk_restart(struct page_vma_mapped_walk *pvmw) 717 { 717 { 718 WARN_ON_ONCE(!pvmw->pmd && !pvmw->pte) 718 WARN_ON_ONCE(!pvmw->pmd && !pvmw->pte); 719 719 720 if (likely(pvmw->ptl)) 720 if (likely(pvmw->ptl)) 721 spin_unlock(pvmw->ptl); 721 spin_unlock(pvmw->ptl); 722 else 722 else 723 WARN_ON_ONCE(1); 723 WARN_ON_ONCE(1); 724 724 725 pvmw->ptl = NULL; 725 pvmw->ptl = NULL; 726 pvmw->pmd = NULL; 726 pvmw->pmd = NULL; 727 pvmw->pte = NULL; 727 pvmw->pte = NULL; 728 } 728 } 729 729 730 bool page_vma_mapped_walk(struct page_vma_mapp 730 bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw); 731 731 732 /* 732 /* 733 * Used by swapoff to help locate where page i 733 * Used by swapoff to help locate where page is expected in vma. 734 */ 734 */ 735 unsigned long page_address_in_vma(struct page 735 unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); 736 736 737 /* 737 /* 738 * Cleans the PTEs of shared mappings. 738 * Cleans the PTEs of shared mappings. 739 * (and since clean PTEs should also be readon 739 * (and since clean PTEs should also be readonly, write protects them too) 740 * 740 * 741 * returns the number of cleaned PTEs. 741 * returns the number of cleaned PTEs. 742 */ 742 */ 743 int folio_mkclean(struct folio *); 743 int folio_mkclean(struct folio *); 744 744 745 int pfn_mkclean_range(unsigned long pfn, unsig 745 int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, 746 struct vm_area_struct *v 746 struct vm_area_struct *vma); 747 747 748 enum rmp_flags { 748 enum rmp_flags { 749 RMP_LOCKED = 1 << 0, 749 RMP_LOCKED = 1 << 0, 750 RMP_USE_SHARED_ZEROPAGE = 1 << 1, 750 RMP_USE_SHARED_ZEROPAGE = 1 << 1, 751 }; 751 }; 752 752 753 void remove_migration_ptes(struct folio *src, 753 void remove_migration_ptes(struct folio *src, struct folio *dst, int flags); 754 754 755 /* 755 /* 756 * rmap_walk_control: To control rmap traversi 756 * rmap_walk_control: To control rmap traversing for specific needs 757 * 757 * 758 * arg: passed to rmap_one() and invalid_vma() 758 * arg: passed to rmap_one() and invalid_vma() 759 * try_lock: bail out if the rmap lock is cont 759 * try_lock: bail out if the rmap lock is contended 760 * contended: indicate the rmap traversal bail 760 * contended: indicate the rmap traversal bailed out due to lock contention 761 * rmap_one: executed on each vma where page i 761 * rmap_one: executed on each vma where page is mapped 762 * done: for checking traversing termination c 762 * done: for checking traversing termination condition 763 * anon_lock: for getting anon_lock by optimiz 763 * anon_lock: for getting anon_lock by optimized way rather than default 764 * invalid_vma: for skipping uninterested vma 764 * invalid_vma: for skipping uninterested vma 765 */ 765 */ 766 struct rmap_walk_control { 766 struct rmap_walk_control { 767 void *arg; 767 void *arg; 768 bool try_lock; 768 bool try_lock; 769 bool contended; 769 bool contended; 770 /* 770 /* 771 * Return false if page table scanning 771 * Return false if page table scanning in rmap_walk should be stopped. 772 * Otherwise, return true. 772 * Otherwise, return true. 773 */ 773 */ 774 bool (*rmap_one)(struct folio *folio, 774 bool (*rmap_one)(struct folio *folio, struct vm_area_struct *vma, 775 unsign 775 unsigned long addr, void *arg); 776 int (*done)(struct folio *folio); 776 int (*done)(struct folio *folio); 777 struct anon_vma *(*anon_lock)(struct f 777 struct anon_vma *(*anon_lock)(struct folio *folio, 778 struct r 778 struct rmap_walk_control *rwc); 779 bool (*invalid_vma)(struct vm_area_str 779 bool (*invalid_vma)(struct vm_area_struct *vma, void *arg); 780 }; 780 }; 781 781 782 void rmap_walk(struct folio *folio, struct rma 782 void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc); 783 void rmap_walk_locked(struct folio *folio, str 783 void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc); 784 struct anon_vma *folio_lock_anon_vma_read(stru 784 struct anon_vma *folio_lock_anon_vma_read(struct folio *folio, 785 stru 785 struct rmap_walk_control *rwc); 786 786 787 #else /* !CONFIG_MMU */ 787 #else /* !CONFIG_MMU */ 788 788 789 #define anon_vma_init() do {} while (0 789 #define anon_vma_init() do {} while (0) 790 #define anon_vma_prepare(vma) (0) 790 #define anon_vma_prepare(vma) (0) 791 791 792 static inline int folio_referenced(struct foli 792 static inline int folio_referenced(struct folio *folio, int is_locked, 793 struct mem_c 793 struct mem_cgroup *memcg, 794 unsigned lon 794 unsigned long *vm_flags) 795 { 795 { 796 *vm_flags = 0; 796 *vm_flags = 0; 797 return 0; 797 return 0; 798 } 798 } 799 799 800 static inline void try_to_unmap(struct folio * 800 static inline void try_to_unmap(struct folio *folio, enum ttu_flags flags) 801 { 801 { 802 } 802 } 803 803 804 static inline int folio_mkclean(struct folio * 804 static inline int folio_mkclean(struct folio *folio) 805 { 805 { 806 return 0; 806 return 0; 807 } 807 } 808 #endif /* CONFIG_MMU */ 808 #endif /* CONFIG_MMU */ 809 809 810 #endif /* _LINUX_RMAP_H */ 810 #endif /* _LINUX_RMAP_H */ 811 811
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.