1 /* SPDX-License-Identifier: GPL-2.0 */ 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _LINUX_PAGEWALK_H 2 #ifndef _LINUX_PAGEWALK_H 3 #define _LINUX_PAGEWALK_H 3 #define _LINUX_PAGEWALK_H 4 4 5 #include <linux/mm.h> 5 #include <linux/mm.h> 6 6 7 struct mm_walk; 7 struct mm_walk; 8 8 9 /* Locking requirement during a page walk. */ 9 /* Locking requirement during a page walk. */ 10 enum page_walk_lock { 10 enum page_walk_lock { 11 /* mmap_lock should be locked for read 11 /* mmap_lock should be locked for read to stabilize the vma tree */ 12 PGWALK_RDLOCK = 0, 12 PGWALK_RDLOCK = 0, 13 /* vma will be write-locked during the 13 /* vma will be write-locked during the walk */ 14 PGWALK_WRLOCK = 1, 14 PGWALK_WRLOCK = 1, 15 /* vma is expected to be already write 15 /* vma is expected to be already write-locked during the walk */ 16 PGWALK_WRLOCK_VERIFY = 2, 16 PGWALK_WRLOCK_VERIFY = 2, 17 }; 17 }; 18 18 19 /** 19 /** 20 * struct mm_walk_ops - callbacks for walk_pag 20 * struct mm_walk_ops - callbacks for walk_page_range 21 * @pgd_entry: if set, called for eac 21 * @pgd_entry: if set, called for each non-empty PGD (top-level) entry 22 * @p4d_entry: if set, called for eac 22 * @p4d_entry: if set, called for each non-empty P4D entry 23 * @pud_entry: if set, called for eac 23 * @pud_entry: if set, called for each non-empty PUD entry 24 * @pmd_entry: if set, called for eac 24 * @pmd_entry: if set, called for each non-empty PMD entry 25 * this handler is requir 25 * this handler is required to be able to handle 26 * pmd_trans_huge() pmds. 26 * pmd_trans_huge() pmds. They may simply choose to 27 * split_huge_page() inst 27 * split_huge_page() instead of handling it explicitly. 28 * @pte_entry: if set, called for eac 28 * @pte_entry: if set, called for each PTE (lowest-level) entry, 29 * including empty ones 29 * including empty ones 30 * @pte_hole: if set, called for eac 30 * @pte_hole: if set, called for each hole at all levels, 31 * depth is -1 if not kno 31 * depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD. 32 * Any folded depths (whe 32 * Any folded depths (where PTRS_PER_P?D is equal to 1) 33 * are skipped. 33 * are skipped. 34 * @hugetlb_entry: if set, called for eac 34 * @hugetlb_entry: if set, called for each hugetlb entry. This hook 35 * function is called wit 35 * function is called with the vma lock held, in order to 36 * protect against a conc 36 * protect against a concurrent freeing of the pte_t* or 37 * the ptl. In some cases 37 * the ptl. In some cases, the hook function needs to drop 38 * and retake the vma loc 38 * and retake the vma lock in order to avoid deadlocks 39 * while calling other fu 39 * while calling other functions. In such cases the hook 40 * function must either r 40 * function must either refrain from accessing the pte or 41 * ptl after dropping the 41 * ptl after dropping the vma lock, or else revalidate 42 * those items after re-a 42 * those items after re-acquiring the vma lock and before 43 * accessing them. 43 * accessing them. 44 * @test_walk: caller specific callba 44 * @test_walk: caller specific callback function to determine whether 45 * we walk over the curre 45 * we walk over the current vma or not. Returning 0 means 46 * "do page table walk ov 46 * "do page table walk over the current vma", returning 47 * a negative value means 47 * a negative value means "abort current page table walk 48 * right now" and returni 48 * right now" and returning 1 means "skip the current vma" 49 * Note that this callbac 49 * Note that this callback is not called when the caller 50 * passes in a single VMA 50 * passes in a single VMA as for walk_page_vma(). 51 * @pre_vma: if set, called before 51 * @pre_vma: if set, called before starting walk on a non-null vma. 52 * @post_vma: if set, called after a 52 * @post_vma: if set, called after a walk on a non-null vma, provided 53 * that @pre_vma and the 53 * that @pre_vma and the vma walk succeeded. 54 * 54 * 55 * p?d_entry callbacks are called even if thos 55 * p?d_entry callbacks are called even if those levels are folded on a 56 * particular architecture/configuration. 56 * particular architecture/configuration. 57 */ 57 */ 58 struct mm_walk_ops { 58 struct mm_walk_ops { 59 int (*pgd_entry)(pgd_t *pgd, unsigned 59 int (*pgd_entry)(pgd_t *pgd, unsigned long addr, 60 unsigned long next, s 60 unsigned long next, struct mm_walk *walk); 61 int (*p4d_entry)(p4d_t *p4d, unsigned 61 int (*p4d_entry)(p4d_t *p4d, unsigned long addr, 62 unsigned long next, s 62 unsigned long next, struct mm_walk *walk); 63 int (*pud_entry)(pud_t *pud, unsigned 63 int (*pud_entry)(pud_t *pud, unsigned long addr, 64 unsigned long next, s 64 unsigned long next, struct mm_walk *walk); 65 int (*pmd_entry)(pmd_t *pmd, unsigned 65 int (*pmd_entry)(pmd_t *pmd, unsigned long addr, 66 unsigned long next, s 66 unsigned long next, struct mm_walk *walk); 67 int (*pte_entry)(pte_t *pte, unsigned 67 int (*pte_entry)(pte_t *pte, unsigned long addr, 68 unsigned long next, s 68 unsigned long next, struct mm_walk *walk); 69 int (*pte_hole)(unsigned long addr, un 69 int (*pte_hole)(unsigned long addr, unsigned long next, 70 int depth, struct mm_w 70 int depth, struct mm_walk *walk); 71 int (*hugetlb_entry)(pte_t *pte, unsig 71 int (*hugetlb_entry)(pte_t *pte, unsigned long hmask, 72 unsigned long add 72 unsigned long addr, unsigned long next, 73 struct mm_walk *w 73 struct mm_walk *walk); 74 int (*test_walk)(unsigned long addr, u 74 int (*test_walk)(unsigned long addr, unsigned long next, 75 struct mm_walk *walk); 75 struct mm_walk *walk); 76 int (*pre_vma)(unsigned long start, un 76 int (*pre_vma)(unsigned long start, unsigned long end, 77 struct mm_walk *walk); 77 struct mm_walk *walk); 78 void (*post_vma)(struct mm_walk *walk) 78 void (*post_vma)(struct mm_walk *walk); 79 enum page_walk_lock walk_lock; 79 enum page_walk_lock walk_lock; 80 }; 80 }; 81 81 82 /* 82 /* 83 * Action for pud_entry / pmd_entry callbacks. 83 * Action for pud_entry / pmd_entry callbacks. 84 * ACTION_SUBTREE is the default 84 * ACTION_SUBTREE is the default 85 */ 85 */ 86 enum page_walk_action { 86 enum page_walk_action { 87 /* Descend to next level, splitting hu 87 /* Descend to next level, splitting huge pages if needed and possible */ 88 ACTION_SUBTREE = 0, 88 ACTION_SUBTREE = 0, 89 /* Continue to next entry at this leve 89 /* Continue to next entry at this level (ignoring any subtree) */ 90 ACTION_CONTINUE = 1, 90 ACTION_CONTINUE = 1, 91 /* Call again for this entry */ 91 /* Call again for this entry */ 92 ACTION_AGAIN = 2 92 ACTION_AGAIN = 2 93 }; 93 }; 94 94 95 /** 95 /** 96 * struct mm_walk - walk_page_range data 96 * struct mm_walk - walk_page_range data 97 * @ops: operation to call during the w 97 * @ops: operation to call during the walk 98 * @mm: mm_struct representing the tar 98 * @mm: mm_struct representing the target process of page table walk 99 * @pgd: pointer to PGD; only valid wit 99 * @pgd: pointer to PGD; only valid with no_vma (otherwise set to NULL) 100 * @vma: vma currently walked (NULL if 100 * @vma: vma currently walked (NULL if walking outside vmas) 101 * @action: next action to perform (see en 101 * @action: next action to perform (see enum page_walk_action) 102 * @no_vma: walk ignoring vmas (vma will a 102 * @no_vma: walk ignoring vmas (vma will always be NULL) 103 * @private: private data for callbacks' us 103 * @private: private data for callbacks' usage 104 * 104 * 105 * (see the comment on walk_page_range() for m 105 * (see the comment on walk_page_range() for more details) 106 */ 106 */ 107 struct mm_walk { 107 struct mm_walk { 108 const struct mm_walk_ops *ops; 108 const struct mm_walk_ops *ops; 109 struct mm_struct *mm; 109 struct mm_struct *mm; 110 pgd_t *pgd; 110 pgd_t *pgd; 111 struct vm_area_struct *vma; 111 struct vm_area_struct *vma; 112 enum page_walk_action action; 112 enum page_walk_action action; 113 bool no_vma; 113 bool no_vma; 114 void *private; 114 void *private; 115 }; 115 }; 116 116 117 int walk_page_range(struct mm_struct *mm, unsi 117 int walk_page_range(struct mm_struct *mm, unsigned long start, 118 unsigned long end, const struc 118 unsigned long end, const struct mm_walk_ops *ops, 119 void *private); 119 void *private); 120 int walk_page_range_novma(struct mm_struct *mm 120 int walk_page_range_novma(struct mm_struct *mm, unsigned long start, 121 unsigned long end, c 121 unsigned long end, const struct mm_walk_ops *ops, 122 pgd_t *pgd, 122 pgd_t *pgd, 123 void *private); 123 void *private); 124 int walk_page_range_vma(struct vm_area_struct 124 int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start, 125 unsigned long end, con 125 unsigned long end, const struct mm_walk_ops *ops, 126 void *private); 126 void *private); 127 int walk_page_vma(struct vm_area_struct *vma, 127 int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, 128 void *private); 128 void *private); 129 int walk_page_mapping(struct address_space *ma 129 int walk_page_mapping(struct address_space *mapping, pgoff_t first_index, 130 pgoff_t nr, const struct 130 pgoff_t nr, const struct mm_walk_ops *ops, 131 void *private); 131 void *private); 132 132 133 typedef int __bitwise folio_walk_flags_t; << 134 << 135 /* << 136 * Walk migration entries as well. Careful: a << 137 * concurrently. << 138 */ << 139 #define FW_MIGRATION ((__fo << 140 << 141 /* Walk shared zeropages (small + huge) as wel << 142 #define FW_ZEROPAGE ((__fo << 143 << 144 enum folio_walk_level { << 145 FW_LEVEL_PTE, << 146 FW_LEVEL_PMD, << 147 FW_LEVEL_PUD, << 148 }; << 149 << 150 /** << 151 * struct folio_walk - folio_walk_start() / fo << 152 * @page: exact folio page referenced (i << 153 * @level: page table level identifying t << 154 * @pte: pointer to the page table entr << 155 * @pmd: pointer to the page table entr << 156 * @pud: pointer to the page table entr << 157 * @ptl: pointer to the page table lock << 158 * << 159 * (see folio_walk_start() documentation for m << 160 */ << 161 struct folio_walk { << 162 /* public */ << 163 struct page *page; << 164 enum folio_walk_level level; << 165 union { << 166 pte_t *ptep; << 167 pud_t *pudp; << 168 pmd_t *pmdp; << 169 }; << 170 union { << 171 pte_t pte; << 172 pud_t pud; << 173 pmd_t pmd; << 174 }; << 175 /* private */ << 176 struct vm_area_struct *vma; << 177 spinlock_t *ptl; << 178 }; << 179 << 180 struct folio *folio_walk_start(struct folio_wa << 181 struct vm_area_struct *vma, un << 182 folio_walk_flags_t flags); << 183 << 184 #define folio_walk_end(__fw, __vma) do { \ << 185 spin_unlock((__fw)->ptl); \ << 186 if (likely((__fw)->level == FW_LEVEL_P << 187 pte_unmap((__fw)->ptep); \ << 188 vma_pgtable_walk_end(__vma); \ << 189 } while (0) << 190 << 191 #endif /* _LINUX_PAGEWALK_H */ 133 #endif /* _LINUX_PAGEWALK_H */ 192 134
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.