~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/include/linux/pgtable.h

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /include/linux/pgtable.h (Version linux-6.12-rc7) and /include/linux/pgtable.h (Version linux-6.9.12)


  1 /* SPDX-License-Identifier: GPL-2.0 */              1 /* SPDX-License-Identifier: GPL-2.0 */
  2 #ifndef _LINUX_PGTABLE_H                            2 #ifndef _LINUX_PGTABLE_H
  3 #define _LINUX_PGTABLE_H                            3 #define _LINUX_PGTABLE_H
  4                                                     4 
  5 #include <linux/pfn.h>                              5 #include <linux/pfn.h>
  6 #include <asm/pgtable.h>                            6 #include <asm/pgtable.h>
  7                                                     7 
  8 #define PMD_ORDER       (PMD_SHIFT - PAGE_SHIF      8 #define PMD_ORDER       (PMD_SHIFT - PAGE_SHIFT)
  9 #define PUD_ORDER       (PUD_SHIFT - PAGE_SHIF      9 #define PUD_ORDER       (PUD_SHIFT - PAGE_SHIFT)
 10                                                    10 
 11 #ifndef __ASSEMBLY__                               11 #ifndef __ASSEMBLY__
 12 #ifdef CONFIG_MMU                                  12 #ifdef CONFIG_MMU
 13                                                    13 
 14 #include <linux/mm_types.h>                        14 #include <linux/mm_types.h>
 15 #include <linux/bug.h>                             15 #include <linux/bug.h>
 16 #include <linux/errno.h>                           16 #include <linux/errno.h>
 17 #include <asm-generic/pgtable_uffd.h>              17 #include <asm-generic/pgtable_uffd.h>
 18 #include <linux/page_table_check.h>                18 #include <linux/page_table_check.h>
 19                                                    19 
 20 #if 5 - defined(__PAGETABLE_P4D_FOLDED) - defi     20 #if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \
 21         defined(__PAGETABLE_PMD_FOLDED) != CON     21         defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS
 22 #error CONFIG_PGTABLE_LEVELS is not consistent     22 #error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED
 23 #endif                                             23 #endif
 24                                                    24 
 25 /*                                                 25 /*
 26  * On almost all architectures and configurati     26  * On almost all architectures and configurations, 0 can be used as the
 27  * upper ceiling to free_pgtables(): on many a     27  * upper ceiling to free_pgtables(): on many architectures it has the same
 28  * effect as using TASK_SIZE.  However, there      28  * effect as using TASK_SIZE.  However, there is one configuration which
 29  * must impose a more careful limit, to avoid      29  * must impose a more careful limit, to avoid freeing kernel pgtables.
 30  */                                                30  */
 31 #ifndef USER_PGTABLES_CEILING                      31 #ifndef USER_PGTABLES_CEILING
 32 #define USER_PGTABLES_CEILING   0UL                32 #define USER_PGTABLES_CEILING   0UL
 33 #endif                                             33 #endif
 34                                                    34 
 35 /*                                                 35 /*
 36  * This defines the first usable user address.     36  * This defines the first usable user address. Platforms
 37  * can override its value with custom FIRST_US     37  * can override its value with custom FIRST_USER_ADDRESS
 38  * defined in their respective <asm/pgtable.h>     38  * defined in their respective <asm/pgtable.h>.
 39  */                                                39  */
 40 #ifndef FIRST_USER_ADDRESS                         40 #ifndef FIRST_USER_ADDRESS
 41 #define FIRST_USER_ADDRESS      0UL                41 #define FIRST_USER_ADDRESS      0UL
 42 #endif                                             42 #endif
 43                                                    43 
 44 /*                                                 44 /*
 45  * This defines the generic helper for accessi     45  * This defines the generic helper for accessing PMD page
 46  * table page. Although platforms can still ov     46  * table page. Although platforms can still override this
 47  * via their respective <asm/pgtable.h>.           47  * via their respective <asm/pgtable.h>.
 48  */                                                48  */
 49 #ifndef pmd_pgtable                                49 #ifndef pmd_pgtable
 50 #define pmd_pgtable(pmd) pmd_page(pmd)             50 #define pmd_pgtable(pmd) pmd_page(pmd)
 51 #endif                                             51 #endif
 52                                                    52 
 53 #define pmd_folio(pmd) page_folio(pmd_page(pmd << 
 54                                                << 
 55 /*                                                 53 /*
 56  * A page table page can be thought of an arra     54  * A page table page can be thought of an array like this: pXd_t[PTRS_PER_PxD]
 57  *                                                 55  *
 58  * The pXx_index() functions return the index      56  * The pXx_index() functions return the index of the entry in the page
 59  * table page which would control the given vi     57  * table page which would control the given virtual address
 60  *                                                 58  *
 61  * As these functions may be used by the same      59  * As these functions may be used by the same code for different levels of
 62  * the page table folding, they are always ava     60  * the page table folding, they are always available, regardless of
 63  * CONFIG_PGTABLE_LEVELS value. For the folded     61  * CONFIG_PGTABLE_LEVELS value. For the folded levels they simply return 0
 64  * because in such cases PTRS_PER_PxD equals 1     62  * because in such cases PTRS_PER_PxD equals 1.
 65  */                                                63  */
 66                                                    64 
 67 static inline unsigned long pte_index(unsigned     65 static inline unsigned long pte_index(unsigned long address)
 68 {                                                  66 {
 69         return (address >> PAGE_SHIFT) & (PTRS     67         return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
 70 }                                                  68 }
 71                                                    69 
 72 #ifndef pmd_index                                  70 #ifndef pmd_index
 73 static inline unsigned long pmd_index(unsigned     71 static inline unsigned long pmd_index(unsigned long address)
 74 {                                                  72 {
 75         return (address >> PMD_SHIFT) & (PTRS_     73         return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
 76 }                                                  74 }
 77 #define pmd_index pmd_index                        75 #define pmd_index pmd_index
 78 #endif                                             76 #endif
 79                                                    77 
 80 #ifndef pud_index                                  78 #ifndef pud_index
 81 static inline unsigned long pud_index(unsigned     79 static inline unsigned long pud_index(unsigned long address)
 82 {                                                  80 {
 83         return (address >> PUD_SHIFT) & (PTRS_     81         return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
 84 }                                                  82 }
 85 #define pud_index pud_index                        83 #define pud_index pud_index
 86 #endif                                             84 #endif
 87                                                    85 
 88 #ifndef pgd_index                                  86 #ifndef pgd_index
 89 /* Must be a compile-time constant, so impleme     87 /* Must be a compile-time constant, so implement it as a macro */
 90 #define pgd_index(a)  (((a) >> PGDIR_SHIFT) &      88 #define pgd_index(a)  (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
 91 #endif                                             89 #endif
 92                                                    90 
 93 #ifndef pte_offset_kernel                          91 #ifndef pte_offset_kernel
 94 static inline pte_t *pte_offset_kernel(pmd_t *     92 static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
 95 {                                                  93 {
 96         return (pte_t *)pmd_page_vaddr(*pmd) +     94         return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
 97 }                                                  95 }
 98 #define pte_offset_kernel pte_offset_kernel        96 #define pte_offset_kernel pte_offset_kernel
 99 #endif                                             97 #endif
100                                                    98 
101 #ifdef CONFIG_HIGHPTE                              99 #ifdef CONFIG_HIGHPTE
102 #define __pte_map(pmd, address) \                 100 #define __pte_map(pmd, address) \
103         ((pte_t *)kmap_local_page(pmd_page(*(p    101         ((pte_t *)kmap_local_page(pmd_page(*(pmd))) + pte_index((address)))
104 #define pte_unmap(pte)  do {    \                 102 #define pte_unmap(pte)  do {    \
105         kunmap_local((pte));    \                 103         kunmap_local((pte));    \
106         rcu_read_unlock();      \                 104         rcu_read_unlock();      \
107 } while (0)                                       105 } while (0)
108 #else                                             106 #else
109 static inline pte_t *__pte_map(pmd_t *pmd, uns    107 static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address)
110 {                                                 108 {
111         return pte_offset_kernel(pmd, address)    109         return pte_offset_kernel(pmd, address);
112 }                                                 110 }
113 static inline void pte_unmap(pte_t *pte)          111 static inline void pte_unmap(pte_t *pte)
114 {                                                 112 {
115         rcu_read_unlock();                        113         rcu_read_unlock();
116 }                                                 114 }
117 #endif                                            115 #endif
118                                                   116 
119 void pte_free_defer(struct mm_struct *mm, pgta    117 void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
120                                                   118 
121 /* Find an entry in the second-level page tabl    119 /* Find an entry in the second-level page table.. */
122 #ifndef pmd_offset                                120 #ifndef pmd_offset
123 static inline pmd_t *pmd_offset(pud_t *pud, un    121 static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
124 {                                                 122 {
125         return pud_pgtable(*pud) + pmd_index(a    123         return pud_pgtable(*pud) + pmd_index(address);
126 }                                                 124 }
127 #define pmd_offset pmd_offset                     125 #define pmd_offset pmd_offset
128 #endif                                            126 #endif
129                                                   127 
130 #ifndef pud_offset                                128 #ifndef pud_offset
131 static inline pud_t *pud_offset(p4d_t *p4d, un    129 static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
132 {                                                 130 {
133         return p4d_pgtable(*p4d) + pud_index(a    131         return p4d_pgtable(*p4d) + pud_index(address);
134 }                                                 132 }
135 #define pud_offset pud_offset                     133 #define pud_offset pud_offset
136 #endif                                            134 #endif
137                                                   135 
138 static inline pgd_t *pgd_offset_pgd(pgd_t *pgd    136 static inline pgd_t *pgd_offset_pgd(pgd_t *pgd, unsigned long address)
139 {                                                 137 {
140         return (pgd + pgd_index(address));        138         return (pgd + pgd_index(address));
141 };                                                139 };
142                                                   140 
143 /*                                                141 /*
144  * a shortcut to get a pgd_t in a given mm        142  * a shortcut to get a pgd_t in a given mm
145  */                                               143  */
146 #ifndef pgd_offset                                144 #ifndef pgd_offset
147 #define pgd_offset(mm, address)         pgd_of    145 #define pgd_offset(mm, address)         pgd_offset_pgd((mm)->pgd, (address))
148 #endif                                            146 #endif
149                                                   147 
150 /*                                                148 /*
151  * a shortcut which implies the use of the ker    149  * a shortcut which implies the use of the kernel's pgd, instead
152  * of a process's                                 150  * of a process's
153  */                                               151  */
                                                   >> 152 #ifndef pgd_offset_k
154 #define pgd_offset_k(address)           pgd_of    153 #define pgd_offset_k(address)           pgd_offset(&init_mm, (address))
                                                   >> 154 #endif
155                                                   155 
156 /*                                                156 /*
157  * In many cases it is known that a virtual ad    157  * In many cases it is known that a virtual address is mapped at PMD or PTE
158  * level, so instead of traversing all the pag    158  * level, so instead of traversing all the page table levels, we can get a
159  * pointer to the PMD entry in user or kernel     159  * pointer to the PMD entry in user or kernel page table or translate a virtual
160  * address to the pointer in the PTE in the ke    160  * address to the pointer in the PTE in the kernel page tables with simple
161  * helpers.                                       161  * helpers.
162  */                                               162  */
163 static inline pmd_t *pmd_off(struct mm_struct     163 static inline pmd_t *pmd_off(struct mm_struct *mm, unsigned long va)
164 {                                                 164 {
165         return pmd_offset(pud_offset(p4d_offse    165         return pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, va), va), va), va);
166 }                                                 166 }
167                                                   167 
168 static inline pmd_t *pmd_off_k(unsigned long v    168 static inline pmd_t *pmd_off_k(unsigned long va)
169 {                                                 169 {
170         return pmd_offset(pud_offset(p4d_offse    170         return pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va);
171 }                                                 171 }
172                                                   172 
173 static inline pte_t *virt_to_kpte(unsigned lon    173 static inline pte_t *virt_to_kpte(unsigned long vaddr)
174 {                                                 174 {
175         pmd_t *pmd = pmd_off_k(vaddr);            175         pmd_t *pmd = pmd_off_k(vaddr);
176                                                   176 
177         return pmd_none(*pmd) ? NULL : pte_off    177         return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr);
178 }                                                 178 }
179                                                   179 
180 #ifndef pmd_young                                 180 #ifndef pmd_young
181 static inline int pmd_young(pmd_t pmd)            181 static inline int pmd_young(pmd_t pmd)
182 {                                                 182 {
183         return 0;                                 183         return 0;
184 }                                                 184 }
185 #endif                                            185 #endif
186                                                   186 
187 #ifndef pmd_dirty                                 187 #ifndef pmd_dirty
188 static inline int pmd_dirty(pmd_t pmd)            188 static inline int pmd_dirty(pmd_t pmd)
189 {                                                 189 {
190         return 0;                                 190         return 0;
191 }                                                 191 }
192 #endif                                            192 #endif
193                                                   193 
194 /*                                                194 /*
195  * A facility to provide lazy MMU batching.  T    195  * A facility to provide lazy MMU batching.  This allows PTE updates and
196  * page invalidations to be delayed until a ca    196  * page invalidations to be delayed until a call to leave lazy MMU mode
197  * is issued.  Some architectures may benefit     197  * is issued.  Some architectures may benefit from doing this, and it is
198  * beneficial for both shadow and direct mode     198  * beneficial for both shadow and direct mode hypervisors, which may batch
199  * the PTE updates which happen during this wi    199  * the PTE updates which happen during this window.  Note that using this
200  * interface requires that read hazards be rem    200  * interface requires that read hazards be removed from the code.  A read
201  * hazard could result in the direct mode hype    201  * hazard could result in the direct mode hypervisor case, since the actual
202  * write to the page tables may not yet have t    202  * write to the page tables may not yet have taken place, so reads though
203  * a raw PTE pointer after it has been modifie    203  * a raw PTE pointer after it has been modified are not guaranteed to be
204  * up to date.  This mode can only be entered     204  * up to date.  This mode can only be entered and left under the protection of
205  * the page table locks for all page tables wh    205  * the page table locks for all page tables which may be modified.  In the UP
206  * case, this is required so that preemption i    206  * case, this is required so that preemption is disabled, and in the SMP case,
207  * it must synchronize the delayed page table     207  * it must synchronize the delayed page table writes properly on other CPUs.
208  */                                               208  */
209 #ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE           209 #ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
210 #define arch_enter_lazy_mmu_mode()      do {}     210 #define arch_enter_lazy_mmu_mode()      do {} while (0)
211 #define arch_leave_lazy_mmu_mode()      do {}     211 #define arch_leave_lazy_mmu_mode()      do {} while (0)
212 #define arch_flush_lazy_mmu_mode()      do {}     212 #define arch_flush_lazy_mmu_mode()      do {} while (0)
213 #endif                                            213 #endif
214                                                   214 
215 #ifndef pte_batch_hint                            215 #ifndef pte_batch_hint
216 /**                                               216 /**
217  * pte_batch_hint - Number of pages that can b    217  * pte_batch_hint - Number of pages that can be added to batch without scanning.
218  * @ptep: Page table pointer for the entry.       218  * @ptep: Page table pointer for the entry.
219  * @pte: Page table entry.                        219  * @pte: Page table entry.
220  *                                                220  *
221  * Some architectures know that a set of conti    221  * Some architectures know that a set of contiguous ptes all map the same
222  * contiguous memory with the same permissions    222  * contiguous memory with the same permissions. In this case, it can provide a
223  * hint to aid pte batching without the core c    223  * hint to aid pte batching without the core code needing to scan every pte.
224  *                                                224  *
225  * An architecture implementation may ignore t    225  * An architecture implementation may ignore the PTE accessed state. Further,
226  * the dirty state must apply atomically to al    226  * the dirty state must apply atomically to all the PTEs described by the hint.
227  *                                                227  *
228  * May be overridden by the architecture, else    228  * May be overridden by the architecture, else pte_batch_hint is always 1.
229  */                                               229  */
230 static inline unsigned int pte_batch_hint(pte_    230 static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte)
231 {                                                 231 {
232         return 1;                                 232         return 1;
233 }                                                 233 }
234 #endif                                            234 #endif
235                                                   235 
236 #ifndef pte_advance_pfn                           236 #ifndef pte_advance_pfn
237 static inline pte_t pte_advance_pfn(pte_t pte,    237 static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
238 {                                                 238 {
239         return __pte(pte_val(pte) + (nr << PFN    239         return __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT));
240 }                                                 240 }
241 #endif                                            241 #endif
242                                                   242 
243 #define pte_next_pfn(pte) pte_advance_pfn(pte,    243 #define pte_next_pfn(pte) pte_advance_pfn(pte, 1)
244                                                   244 
245 #ifndef set_ptes                                  245 #ifndef set_ptes
246 /**                                               246 /**
247  * set_ptes - Map consecutive pages to a conti    247  * set_ptes - Map consecutive pages to a contiguous range of addresses.
248  * @mm: Address space to map the pages into.      248  * @mm: Address space to map the pages into.
249  * @addr: Address to map the first page at.       249  * @addr: Address to map the first page at.
250  * @ptep: Page table pointer for the first ent    250  * @ptep: Page table pointer for the first entry.
251  * @pte: Page table entry for the first page.     251  * @pte: Page table entry for the first page.
252  * @nr: Number of pages to map.                   252  * @nr: Number of pages to map.
253  *                                                253  *
254  * When nr==1, initial state of pte may be pre    254  * When nr==1, initial state of pte may be present or not present, and new state
255  * may be present or not present. When nr>1, i    255  * may be present or not present. When nr>1, initial state of all ptes must be
256  * not present, and new state must be present.    256  * not present, and new state must be present.
257  *                                                257  *
258  * May be overridden by the architecture, or t    258  * May be overridden by the architecture, or the architecture can define
259  * set_pte() and PFN_PTE_SHIFT.                   259  * set_pte() and PFN_PTE_SHIFT.
260  *                                                260  *
261  * Context: The caller holds the page table lo    261  * Context: The caller holds the page table lock.  The pages all belong
262  * to the same folio.  The PTEs are all in the    262  * to the same folio.  The PTEs are all in the same PMD.
263  */                                               263  */
264 static inline void set_ptes(struct mm_struct *    264 static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
265                 pte_t *ptep, pte_t pte, unsign    265                 pte_t *ptep, pte_t pte, unsigned int nr)
266 {                                                 266 {
267         page_table_check_ptes_set(mm, ptep, pt    267         page_table_check_ptes_set(mm, ptep, pte, nr);
268                                                   268 
269         arch_enter_lazy_mmu_mode();               269         arch_enter_lazy_mmu_mode();
270         for (;;) {                                270         for (;;) {
271                 set_pte(ptep, pte);               271                 set_pte(ptep, pte);
272                 if (--nr == 0)                    272                 if (--nr == 0)
273                         break;                    273                         break;
274                 ptep++;                           274                 ptep++;
275                 pte = pte_next_pfn(pte);          275                 pte = pte_next_pfn(pte);
276         }                                         276         }
277         arch_leave_lazy_mmu_mode();               277         arch_leave_lazy_mmu_mode();
278 }                                                 278 }
279 #endif                                            279 #endif
280 #define set_pte_at(mm, addr, ptep, pte) set_pt    280 #define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
281                                                   281 
282 #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS         282 #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
283 extern int ptep_set_access_flags(struct vm_are    283 extern int ptep_set_access_flags(struct vm_area_struct *vma,
284                                  unsigned long    284                                  unsigned long address, pte_t *ptep,
285                                  pte_t entry,     285                                  pte_t entry, int dirty);
286 #endif                                            286 #endif
287                                                   287 
288 #ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS         288 #ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
289 #ifdef CONFIG_TRANSPARENT_HUGEPAGE                289 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
290 extern int pmdp_set_access_flags(struct vm_are    290 extern int pmdp_set_access_flags(struct vm_area_struct *vma,
291                                  unsigned long    291                                  unsigned long address, pmd_t *pmdp,
292                                  pmd_t entry,     292                                  pmd_t entry, int dirty);
293 extern int pudp_set_access_flags(struct vm_are    293 extern int pudp_set_access_flags(struct vm_area_struct *vma,
294                                  unsigned long    294                                  unsigned long address, pud_t *pudp,
295                                  pud_t entry,     295                                  pud_t entry, int dirty);
296 #else                                             296 #else
297 static inline int pmdp_set_access_flags(struct    297 static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
298                                         unsign    298                                         unsigned long address, pmd_t *pmdp,
299                                         pmd_t     299                                         pmd_t entry, int dirty)
300 {                                                 300 {
301         BUILD_BUG();                              301         BUILD_BUG();
302         return 0;                                 302         return 0;
303 }                                                 303 }
304 static inline int pudp_set_access_flags(struct    304 static inline int pudp_set_access_flags(struct vm_area_struct *vma,
305                                         unsign    305                                         unsigned long address, pud_t *pudp,
306                                         pud_t     306                                         pud_t entry, int dirty)
307 {                                                 307 {
308         BUILD_BUG();                              308         BUILD_BUG();
309         return 0;                                 309         return 0;
310 }                                                 310 }
311 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */          311 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
312 #endif                                            312 #endif
313                                                   313 
314 #ifndef ptep_get                                  314 #ifndef ptep_get
315 static inline pte_t ptep_get(pte_t *ptep)         315 static inline pte_t ptep_get(pte_t *ptep)
316 {                                                 316 {
317         return READ_ONCE(*ptep);                  317         return READ_ONCE(*ptep);
318 }                                                 318 }
319 #endif                                            319 #endif
320                                                   320 
321 #ifndef pmdp_get                                  321 #ifndef pmdp_get
322 static inline pmd_t pmdp_get(pmd_t *pmdp)         322 static inline pmd_t pmdp_get(pmd_t *pmdp)
323 {                                                 323 {
324         return READ_ONCE(*pmdp);                  324         return READ_ONCE(*pmdp);
325 }                                                 325 }
326 #endif                                            326 #endif
327                                                   327 
328 #ifndef pudp_get                                  328 #ifndef pudp_get
329 static inline pud_t pudp_get(pud_t *pudp)         329 static inline pud_t pudp_get(pud_t *pudp)
330 {                                                 330 {
331         return READ_ONCE(*pudp);                  331         return READ_ONCE(*pudp);
332 }                                                 332 }
333 #endif                                            333 #endif
334                                                   334 
335 #ifndef p4dp_get                                  335 #ifndef p4dp_get
336 static inline p4d_t p4dp_get(p4d_t *p4dp)         336 static inline p4d_t p4dp_get(p4d_t *p4dp)
337 {                                                 337 {
338         return READ_ONCE(*p4dp);                  338         return READ_ONCE(*p4dp);
339 }                                                 339 }
340 #endif                                            340 #endif
341                                                   341 
342 #ifndef pgdp_get                                  342 #ifndef pgdp_get
343 static inline pgd_t pgdp_get(pgd_t *pgdp)         343 static inline pgd_t pgdp_get(pgd_t *pgdp)
344 {                                                 344 {
345         return READ_ONCE(*pgdp);                  345         return READ_ONCE(*pgdp);
346 }                                                 346 }
347 #endif                                            347 #endif
348                                                   348 
349 #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG     349 #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
350 static inline int ptep_test_and_clear_young(st    350 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
351                                             un    351                                             unsigned long address,
352                                             pt    352                                             pte_t *ptep)
353 {                                                 353 {
354         pte_t pte = ptep_get(ptep);               354         pte_t pte = ptep_get(ptep);
355         int r = 1;                                355         int r = 1;
356         if (!pte_young(pte))                      356         if (!pte_young(pte))
357                 r = 0;                            357                 r = 0;
358         else                                      358         else
359                 set_pte_at(vma->vm_mm, address    359                 set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte));
360         return r;                                 360         return r;
361 }                                                 361 }
362 #endif                                            362 #endif
363                                                   363 
364 #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG     364 #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
365 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || de    365 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
366 static inline int pmdp_test_and_clear_young(st    366 static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
367                                             un    367                                             unsigned long address,
368                                             pm    368                                             pmd_t *pmdp)
369 {                                                 369 {
370         pmd_t pmd = *pmdp;                        370         pmd_t pmd = *pmdp;
371         int r = 1;                                371         int r = 1;
372         if (!pmd_young(pmd))                      372         if (!pmd_young(pmd))
373                 r = 0;                            373                 r = 0;
374         else                                      374         else
375                 set_pmd_at(vma->vm_mm, address    375                 set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd));
376         return r;                                 376         return r;
377 }                                                 377 }
378 #else                                             378 #else
379 static inline int pmdp_test_and_clear_young(st    379 static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
380                                             un    380                                             unsigned long address,
381                                             pm    381                                             pmd_t *pmdp)
382 {                                                 382 {
383         BUILD_BUG();                              383         BUILD_BUG();
384         return 0;                                 384         return 0;
385 }                                                 385 }
386 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFI    386 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */
387 #endif                                            387 #endif
388                                                   388 
389 #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH        389 #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
390 int ptep_clear_flush_young(struct vm_area_stru    390 int ptep_clear_flush_young(struct vm_area_struct *vma,
391                            unsigned long addre    391                            unsigned long address, pte_t *ptep);
392 #endif                                            392 #endif
393                                                   393 
394 #ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH        394 #ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
395 #ifdef CONFIG_TRANSPARENT_HUGEPAGE                395 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
396 extern int pmdp_clear_flush_young(struct vm_ar    396 extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
397                                   unsigned lon    397                                   unsigned long address, pmd_t *pmdp);
398 #else                                             398 #else
399 /*                                                399 /*
400  * Despite relevant to THP only, this API is c    400  * Despite relevant to THP only, this API is called from generic rmap code
401  * under PageTransHuge(), hence needs a dummy     401  * under PageTransHuge(), hence needs a dummy implementation for !THP
402  */                                               402  */
403 static inline int pmdp_clear_flush_young(struc    403 static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
404                                          unsig    404                                          unsigned long address, pmd_t *pmdp)
405 {                                                 405 {
406         BUILD_BUG();                              406         BUILD_BUG();
407         return 0;                                 407         return 0;
408 }                                                 408 }
409 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */          409 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
410 #endif                                            410 #endif
411                                                   411 
412 #ifndef arch_has_hw_nonleaf_pmd_young             412 #ifndef arch_has_hw_nonleaf_pmd_young
413 /*                                                413 /*
414  * Return whether the accessed bit in non-leaf    414  * Return whether the accessed bit in non-leaf PMD entries is supported on the
415  * local CPU.                                     415  * local CPU.
416  */                                               416  */
417 static inline bool arch_has_hw_nonleaf_pmd_you    417 static inline bool arch_has_hw_nonleaf_pmd_young(void)
418 {                                                 418 {
419         return IS_ENABLED(CONFIG_ARCH_HAS_NONL    419         return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG);
420 }                                                 420 }
421 #endif                                            421 #endif
422                                                   422 
423 #ifndef arch_has_hw_pte_young                     423 #ifndef arch_has_hw_pte_young
424 /*                                                424 /*
425  * Return whether the accessed bit is supporte    425  * Return whether the accessed bit is supported on the local CPU.
426  *                                                426  *
427  * This stub assumes accessing through an old     427  * This stub assumes accessing through an old PTE triggers a page fault.
428  * Architectures that automatically set the ac    428  * Architectures that automatically set the access bit should overwrite it.
429  */                                               429  */
430 static inline bool arch_has_hw_pte_young(void)    430 static inline bool arch_has_hw_pte_young(void)
431 {                                                 431 {
432         return IS_ENABLED(CONFIG_ARCH_HAS_HW_P    432         return IS_ENABLED(CONFIG_ARCH_HAS_HW_PTE_YOUNG);
433 }                                                 433 }
434 #endif                                            434 #endif
435                                                   435 
436 #ifndef arch_check_zapped_pte                     436 #ifndef arch_check_zapped_pte
437 static inline void arch_check_zapped_pte(struc    437 static inline void arch_check_zapped_pte(struct vm_area_struct *vma,
438                                          pte_t    438                                          pte_t pte)
439 {                                                 439 {
440 }                                                 440 }
441 #endif                                            441 #endif
442                                                   442 
443 #ifndef arch_check_zapped_pmd                     443 #ifndef arch_check_zapped_pmd
444 static inline void arch_check_zapped_pmd(struc    444 static inline void arch_check_zapped_pmd(struct vm_area_struct *vma,
445                                          pmd_t    445                                          pmd_t pmd)
446 {                                                 446 {
447 }                                                 447 }
448 #endif                                            448 #endif
449                                                   449 
450 #ifndef arch_check_zapped_pud                  << 
451 static inline void arch_check_zapped_pud(struc << 
452 {                                              << 
453 }                                              << 
454 #endif                                         << 
455                                                << 
456 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR            450 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
457 static inline pte_t ptep_get_and_clear(struct     451 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
458                                        unsigne    452                                        unsigned long address,
459                                        pte_t *    453                                        pte_t *ptep)
460 {                                                 454 {
461         pte_t pte = ptep_get(ptep);               455         pte_t pte = ptep_get(ptep);
462         pte_clear(mm, address, ptep);             456         pte_clear(mm, address, ptep);
463         page_table_check_pte_clear(mm, pte);      457         page_table_check_pte_clear(mm, pte);
464         return pte;                               458         return pte;
465 }                                                 459 }
466 #endif                                            460 #endif
467                                                   461 
468 #ifndef clear_young_dirty_ptes                 << 
469 /**                                            << 
470  * clear_young_dirty_ptes - Mark PTEs that map << 
471  *              same folio as old/clean.       << 
472  * @mm: Address space the pages are mapped int << 
473  * @addr: Address the first page is mapped at. << 
474  * @ptep: Page table pointer for the first ent << 
475  * @nr: Number of entries to mark old/clean.   << 
476  * @flags: Flags to modify the PTE batch seman << 
477  *                                             << 
478  * May be overridden by the architecture; othe << 
479  * get_and_clear/modify/set for each pte in th << 
480  *                                             << 
481  * Note that PTE bits in the PTE range besides << 
482  * some PTEs might be write-protected.         << 
483  *                                             << 
484  * Context: The caller holds the page table lo << 
485  * pages that belong to the same folio.  The P << 
486  */                                            << 
487 static inline void clear_young_dirty_ptes(stru << 
488                                           unsi << 
489                                           unsi << 
490 {                                              << 
491         pte_t pte;                             << 
492                                                << 
493         for (;;) {                             << 
494                 if (flags == CYDP_CLEAR_YOUNG) << 
495                         ptep_test_and_clear_yo << 
496                 else {                         << 
497                         pte = ptep_get_and_cle << 
498                         if (flags & CYDP_CLEAR << 
499                                 pte = pte_mkol << 
500                         if (flags & CYDP_CLEAR << 
501                                 pte = pte_mkcl << 
502                         set_pte_at(vma->vm_mm, << 
503                 }                              << 
504                 if (--nr == 0)                 << 
505                         break;                 << 
506                 ptep++;                        << 
507                 addr += PAGE_SIZE;             << 
508         }                                      << 
509 }                                              << 
510 #endif                                         << 
511                                                << 
512 static inline void ptep_clear(struct mm_struct    462 static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
513                               pte_t *ptep)        463                               pte_t *ptep)
514 {                                                 464 {
515         ptep_get_and_clear(mm, addr, ptep);       465         ptep_get_and_clear(mm, addr, ptep);
516 }                                                 466 }
517                                                   467 
518 #ifdef CONFIG_GUP_GET_PXX_LOW_HIGH                468 #ifdef CONFIG_GUP_GET_PXX_LOW_HIGH
519 /*                                                469 /*
520  * For walking the pagetables without holding     470  * For walking the pagetables without holding any locks.  Some architectures
521  * (eg x86-32 PAE) cannot load the entries ato    471  * (eg x86-32 PAE) cannot load the entries atomically without using expensive
522  * instructions.  We are guaranteed that a PTE    472  * instructions.  We are guaranteed that a PTE will only either go from not
523  * present to present, or present to not prese    473  * present to present, or present to not present -- it will not switch to a
524  * completely different present page without a    474  * completely different present page without a TLB flush inbetween; which we
525  * are blocking by holding interrupts off.        475  * are blocking by holding interrupts off.
526  *                                                476  *
527  * Setting ptes from not present to present go    477  * Setting ptes from not present to present goes:
528  *                                                478  *
529  *   ptep->pte_high = h;                          479  *   ptep->pte_high = h;
530  *   smp_wmb();                                   480  *   smp_wmb();
531  *   ptep->pte_low = l;                           481  *   ptep->pte_low = l;
532  *                                                482  *
533  * And present to not present goes:               483  * And present to not present goes:
534  *                                                484  *
535  *   ptep->pte_low = 0;                           485  *   ptep->pte_low = 0;
536  *   smp_wmb();                                   486  *   smp_wmb();
537  *   ptep->pte_high = 0;                          487  *   ptep->pte_high = 0;
538  *                                                488  *
539  * We must ensure here that the load of pte_lo    489  * We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'.
540  * We load pte_high *after* loading pte_low, w    490  * We load pte_high *after* loading pte_low, which ensures we don't see an older
541  * value of pte_high.  *Then* we recheck pte_l    491  * value of pte_high.  *Then* we recheck pte_low, which ensures that we haven't
542  * picked up a changed pte high. We might have    492  * picked up a changed pte high. We might have gotten rubbish values from
543  * pte_low and pte_high, but we are guaranteed    493  * pte_low and pte_high, but we are guaranteed that pte_low will not have the
544  * present bit set *unless* it is 'l'. Because    494  * present bit set *unless* it is 'l'. Because get_user_pages_fast() only
545  * operates on present ptes we're safe.           495  * operates on present ptes we're safe.
546  */                                               496  */
547 static inline pte_t ptep_get_lockless(pte_t *p    497 static inline pte_t ptep_get_lockless(pte_t *ptep)
548 {                                                 498 {
549         pte_t pte;                                499         pte_t pte;
550                                                   500 
551         do {                                      501         do {
552                 pte.pte_low = ptep->pte_low;      502                 pte.pte_low = ptep->pte_low;
553                 smp_rmb();                        503                 smp_rmb();
554                 pte.pte_high = ptep->pte_high;    504                 pte.pte_high = ptep->pte_high;
555                 smp_rmb();                        505                 smp_rmb();
556         } while (unlikely(pte.pte_low != ptep-    506         } while (unlikely(pte.pte_low != ptep->pte_low));
557                                                   507 
558         return pte;                               508         return pte;
559 }                                                 509 }
560 #define ptep_get_lockless ptep_get_lockless       510 #define ptep_get_lockless ptep_get_lockless
561                                                   511 
562 #if CONFIG_PGTABLE_LEVELS > 2                     512 #if CONFIG_PGTABLE_LEVELS > 2
563 static inline pmd_t pmdp_get_lockless(pmd_t *p    513 static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
564 {                                                 514 {
565         pmd_t pmd;                                515         pmd_t pmd;
566                                                   516 
567         do {                                      517         do {
568                 pmd.pmd_low = pmdp->pmd_low;      518                 pmd.pmd_low = pmdp->pmd_low;
569                 smp_rmb();                        519                 smp_rmb();
570                 pmd.pmd_high = pmdp->pmd_high;    520                 pmd.pmd_high = pmdp->pmd_high;
571                 smp_rmb();                        521                 smp_rmb();
572         } while (unlikely(pmd.pmd_low != pmdp-    522         } while (unlikely(pmd.pmd_low != pmdp->pmd_low));
573                                                   523 
574         return pmd;                               524         return pmd;
575 }                                                 525 }
576 #define pmdp_get_lockless pmdp_get_lockless       526 #define pmdp_get_lockless pmdp_get_lockless
577 #define pmdp_get_lockless_sync() tlb_remove_ta    527 #define pmdp_get_lockless_sync() tlb_remove_table_sync_one()
578 #endif /* CONFIG_PGTABLE_LEVELS > 2 */            528 #endif /* CONFIG_PGTABLE_LEVELS > 2 */
579 #endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */          529 #endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */
580                                                   530 
581 /*                                                531 /*
582  * We require that the PTE can be read atomica    532  * We require that the PTE can be read atomically.
583  */                                               533  */
584 #ifndef ptep_get_lockless                         534 #ifndef ptep_get_lockless
585 static inline pte_t ptep_get_lockless(pte_t *p    535 static inline pte_t ptep_get_lockless(pte_t *ptep)
586 {                                                 536 {
587         return ptep_get(ptep);                    537         return ptep_get(ptep);
588 }                                                 538 }
589 #endif                                            539 #endif
590                                                   540 
591 #ifndef pmdp_get_lockless                         541 #ifndef pmdp_get_lockless
592 static inline pmd_t pmdp_get_lockless(pmd_t *p    542 static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
593 {                                                 543 {
594         return pmdp_get(pmdp);                    544         return pmdp_get(pmdp);
595 }                                                 545 }
596 static inline void pmdp_get_lockless_sync(void    546 static inline void pmdp_get_lockless_sync(void)
597 {                                                 547 {
598 }                                                 548 }
599 #endif                                            549 #endif
600                                                   550 
601 #ifdef CONFIG_TRANSPARENT_HUGEPAGE                551 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
602 #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR       552 #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
603 static inline pmd_t pmdp_huge_get_and_clear(st    553 static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
604                                             un    554                                             unsigned long address,
605                                             pm    555                                             pmd_t *pmdp)
606 {                                                 556 {
607         pmd_t pmd = *pmdp;                        557         pmd_t pmd = *pmdp;
608                                                   558 
609         pmd_clear(pmdp);                          559         pmd_clear(pmdp);
610         page_table_check_pmd_clear(mm, pmd);      560         page_table_check_pmd_clear(mm, pmd);
611                                                   561 
612         return pmd;                               562         return pmd;
613 }                                                 563 }
614 #endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR     564 #endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */
615 #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR       565 #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
616 static inline pud_t pudp_huge_get_and_clear(st    566 static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
617                                             un    567                                             unsigned long address,
618                                             pu    568                                             pud_t *pudp)
619 {                                                 569 {
620         pud_t pud = *pudp;                        570         pud_t pud = *pudp;
621                                                   571 
622         pud_clear(pudp);                          572         pud_clear(pudp);
623         page_table_check_pud_clear(mm, pud);      573         page_table_check_pud_clear(mm, pud);
624                                                   574 
625         return pud;                               575         return pud;
626 }                                                 576 }
627 #endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR     577 #endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */
628 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */          578 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
629                                                   579 
630 #ifdef CONFIG_TRANSPARENT_HUGEPAGE                580 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
631 #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FU    581 #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
632 static inline pmd_t pmdp_huge_get_and_clear_fu    582 static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
633                                             un    583                                             unsigned long address, pmd_t *pmdp,
634                                             in    584                                             int full)
635 {                                                 585 {
636         return pmdp_huge_get_and_clear(vma->vm    586         return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
637 }                                                 587 }
638 #endif                                            588 #endif
639                                                   589 
640 #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FU    590 #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
641 static inline pud_t pudp_huge_get_and_clear_fu    591 static inline pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
642                                             un    592                                             unsigned long address, pud_t *pudp,
643                                             in    593                                             int full)
644 {                                                 594 {
645         return pudp_huge_get_and_clear(vma->vm    595         return pudp_huge_get_and_clear(vma->vm_mm, address, pudp);
646 }                                                 596 }
647 #endif                                            597 #endif
648 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */          598 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
649                                                   599 
650 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL       600 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
651 static inline pte_t ptep_get_and_clear_full(st    601 static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
652                                             un    602                                             unsigned long address, pte_t *ptep,
653                                             in    603                                             int full)
654 {                                                 604 {
655         return ptep_get_and_clear(mm, address,    605         return ptep_get_and_clear(mm, address, ptep);
656 }                                                 606 }
657 #endif                                            607 #endif
658                                                   608 
659 #ifndef get_and_clear_full_ptes                   609 #ifndef get_and_clear_full_ptes
660 /**                                               610 /**
661  * get_and_clear_full_ptes - Clear present PTE    611  * get_and_clear_full_ptes - Clear present PTEs that map consecutive pages of
662  *                           the same folio, c    612  *                           the same folio, collecting dirty/accessed bits.
663  * @mm: Address space the pages are mapped int    613  * @mm: Address space the pages are mapped into.
664  * @addr: Address the first page is mapped at.    614  * @addr: Address the first page is mapped at.
665  * @ptep: Page table pointer for the first ent    615  * @ptep: Page table pointer for the first entry.
666  * @nr: Number of entries to clear.               616  * @nr: Number of entries to clear.
667  * @full: Whether we are clearing a full mm.      617  * @full: Whether we are clearing a full mm.
668  *                                                618  *
669  * May be overridden by the architecture; othe    619  * May be overridden by the architecture; otherwise, implemented as a simple
670  * loop over ptep_get_and_clear_full(), mergin    620  * loop over ptep_get_and_clear_full(), merging dirty/accessed bits into the
671  * returned PTE.                                  621  * returned PTE.
672  *                                                622  *
673  * Note that PTE bits in the PTE range besides    623  * Note that PTE bits in the PTE range besides the PFN can differ. For example,
674  * some PTEs might be write-protected.            624  * some PTEs might be write-protected.
675  *                                                625  *
676  * Context: The caller holds the page table lo    626  * Context: The caller holds the page table lock.  The PTEs map consecutive
677  * pages that belong to the same folio.  The P    627  * pages that belong to the same folio.  The PTEs are all in the same PMD.
678  */                                               628  */
679 static inline pte_t get_and_clear_full_ptes(st    629 static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm,
680                 unsigned long addr, pte_t *pte    630                 unsigned long addr, pte_t *ptep, unsigned int nr, int full)
681 {                                                 631 {
682         pte_t pte, tmp_pte;                       632         pte_t pte, tmp_pte;
683                                                   633 
684         pte = ptep_get_and_clear_full(mm, addr    634         pte = ptep_get_and_clear_full(mm, addr, ptep, full);
685         while (--nr) {                            635         while (--nr) {
686                 ptep++;                           636                 ptep++;
687                 addr += PAGE_SIZE;                637                 addr += PAGE_SIZE;
688                 tmp_pte = ptep_get_and_clear_f    638                 tmp_pte = ptep_get_and_clear_full(mm, addr, ptep, full);
689                 if (pte_dirty(tmp_pte))           639                 if (pte_dirty(tmp_pte))
690                         pte = pte_mkdirty(pte)    640                         pte = pte_mkdirty(pte);
691                 if (pte_young(tmp_pte))           641                 if (pte_young(tmp_pte))
692                         pte = pte_mkyoung(pte)    642                         pte = pte_mkyoung(pte);
693         }                                         643         }
694         return pte;                               644         return pte;
695 }                                                 645 }
696 #endif                                            646 #endif
697                                                   647 
698 #ifndef clear_full_ptes                           648 #ifndef clear_full_ptes
699 /**                                               649 /**
700  * clear_full_ptes - Clear present PTEs that m    650  * clear_full_ptes - Clear present PTEs that map consecutive pages of the same
701  *                   folio.                       651  *                   folio.
702  * @mm: Address space the pages are mapped int    652  * @mm: Address space the pages are mapped into.
703  * @addr: Address the first page is mapped at.    653  * @addr: Address the first page is mapped at.
704  * @ptep: Page table pointer for the first ent    654  * @ptep: Page table pointer for the first entry.
705  * @nr: Number of entries to clear.               655  * @nr: Number of entries to clear.
706  * @full: Whether we are clearing a full mm.      656  * @full: Whether we are clearing a full mm.
707  *                                                657  *
708  * May be overridden by the architecture; othe    658  * May be overridden by the architecture; otherwise, implemented as a simple
709  * loop over ptep_get_and_clear_full().           659  * loop over ptep_get_and_clear_full().
710  *                                                660  *
711  * Note that PTE bits in the PTE range besides    661  * Note that PTE bits in the PTE range besides the PFN can differ. For example,
712  * some PTEs might be write-protected.            662  * some PTEs might be write-protected.
713  *                                                663  *
714  * Context: The caller holds the page table lo    664  * Context: The caller holds the page table lock.  The PTEs map consecutive
715  * pages that belong to the same folio.  The P    665  * pages that belong to the same folio.  The PTEs are all in the same PMD.
716  */                                               666  */
717 static inline void clear_full_ptes(struct mm_s    667 static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr,
718                 pte_t *ptep, unsigned int nr,     668                 pte_t *ptep, unsigned int nr, int full)
719 {                                                 669 {
720         for (;;) {                                670         for (;;) {
721                 ptep_get_and_clear_full(mm, ad    671                 ptep_get_and_clear_full(mm, addr, ptep, full);
722                 if (--nr == 0)                    672                 if (--nr == 0)
723                         break;                    673                         break;
724                 ptep++;                           674                 ptep++;
725                 addr += PAGE_SIZE;                675                 addr += PAGE_SIZE;
726         }                                         676         }
727 }                                                 677 }
728 #endif                                            678 #endif
729                                                   679 
730 /*                                                680 /*
731  * If two threads concurrently fault at the sa    681  * If two threads concurrently fault at the same page, the thread that
732  * won the race updates the PTE and its local     682  * won the race updates the PTE and its local TLB/Cache. The other thread
733  * gives up, simply does nothing, and continue    683  * gives up, simply does nothing, and continues; on architectures where
734  * software can update TLB,  local TLB can be     684  * software can update TLB,  local TLB can be updated here to avoid next page
735  * fault. This function updates TLB only, do n    685  * fault. This function updates TLB only, do nothing with cache or others.
736  * It is the difference with function update_m    686  * It is the difference with function update_mmu_cache.
737  */                                               687  */
738 #ifndef update_mmu_tlb_range                   !! 688 #ifndef __HAVE_ARCH_UPDATE_MMU_TLB
739 static inline void update_mmu_tlb_range(struct << 
740                                 unsigned long  << 
741 {                                              << 
742 }                                              << 
743 #endif                                         << 
744                                                << 
745 static inline void update_mmu_tlb(struct vm_ar    689 static inline void update_mmu_tlb(struct vm_area_struct *vma,
746                                 unsigned long     690                                 unsigned long address, pte_t *ptep)
747 {                                                 691 {
748         update_mmu_tlb_range(vma, address, pte << 
749 }                                                 692 }
                                                   >> 693 #define __HAVE_ARCH_UPDATE_MMU_TLB
                                                   >> 694 #endif
750                                                   695 
751 /*                                                696 /*
752  * Some architectures may be able to avoid exp    697  * Some architectures may be able to avoid expensive synchronization
753  * primitives when modifications are made to P    698  * primitives when modifications are made to PTE's which are already
754  * not present, or in the process of an addres    699  * not present, or in the process of an address space destruction.
755  */                                               700  */
756 #ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL    701 #ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
757 static inline void pte_clear_not_present_full(    702 static inline void pte_clear_not_present_full(struct mm_struct *mm,
758                                                   703                                               unsigned long address,
759                                                   704                                               pte_t *ptep,
760                                                   705                                               int full)
761 {                                                 706 {
762         pte_clear(mm, address, ptep);             707         pte_clear(mm, address, ptep);
763 }                                                 708 }
764 #endif                                            709 #endif
765                                                   710 
766 #ifndef clear_not_present_full_ptes            << 
767 /**                                            << 
768  * clear_not_present_full_ptes - Clear multipl << 
769  *                               consecutive i << 
770  * @mm: Address space the ptes represent.      << 
771  * @addr: Address of the first pte.            << 
772  * @ptep: Page table pointer for the first ent << 
773  * @nr: Number of entries to clear.            << 
774  * @full: Whether we are clearing a full mm.   << 
775  *                                             << 
776  * May be overridden by the architecture; othe << 
777  * loop over pte_clear_not_present_full().     << 
778  *                                             << 
779  * Context: The caller holds the page table lo << 
780  * The PTEs are all in the same PMD.           << 
781  */                                            << 
782 static inline void clear_not_present_full_ptes << 
783                 unsigned long addr, pte_t *pte << 
784 {                                              << 
785         for (;;) {                             << 
786                 pte_clear_not_present_full(mm, << 
787                 if (--nr == 0)                 << 
788                         break;                 << 
789                 ptep++;                        << 
790                 addr += PAGE_SIZE;             << 
791         }                                      << 
792 }                                              << 
793 #endif                                         << 
794                                                << 
795 #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH              711 #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
796 extern pte_t ptep_clear_flush(struct vm_area_s    712 extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
797                               unsigned long ad    713                               unsigned long address,
798                               pte_t *ptep);       714                               pte_t *ptep);
799 #endif                                            715 #endif
800                                                   716 
801 #ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH         717 #ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
802 extern pmd_t pmdp_huge_clear_flush(struct vm_a    718 extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
803                               unsigned long ad    719                               unsigned long address,
804                               pmd_t *pmdp);       720                               pmd_t *pmdp);
805 extern pud_t pudp_huge_clear_flush(struct vm_a    721 extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
806                               unsigned long ad    722                               unsigned long address,
807                               pud_t *pudp);       723                               pud_t *pudp);
808 #endif                                            724 #endif
809                                                   725 
810 #ifndef pte_mkwrite                               726 #ifndef pte_mkwrite
811 static inline pte_t pte_mkwrite(pte_t pte, str    727 static inline pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
812 {                                                 728 {
813         return pte_mkwrite_novma(pte);            729         return pte_mkwrite_novma(pte);
814 }                                                 730 }
815 #endif                                            731 #endif
816                                                   732 
817 #if defined(CONFIG_ARCH_WANT_PMD_MKWRITE) && !    733 #if defined(CONFIG_ARCH_WANT_PMD_MKWRITE) && !defined(pmd_mkwrite)
818 static inline pmd_t pmd_mkwrite(pmd_t pmd, str    734 static inline pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
819 {                                                 735 {
820         return pmd_mkwrite_novma(pmd);            736         return pmd_mkwrite_novma(pmd);
821 }                                                 737 }
822 #endif                                            738 #endif
823                                                   739 
824 #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT            740 #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
825 struct mm_struct;                                 741 struct mm_struct;
826 static inline void ptep_set_wrprotect(struct m    742 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
827 {                                                 743 {
828         pte_t old_pte = ptep_get(ptep);           744         pte_t old_pte = ptep_get(ptep);
829         set_pte_at(mm, address, ptep, pte_wrpr    745         set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
830 }                                                 746 }
831 #endif                                            747 #endif
832                                                   748 
833 #ifndef wrprotect_ptes                            749 #ifndef wrprotect_ptes
834 /**                                               750 /**
835  * wrprotect_ptes - Write-protect PTEs that ma    751  * wrprotect_ptes - Write-protect PTEs that map consecutive pages of the same
836  *                  folio.                        752  *                  folio.
837  * @mm: Address space the pages are mapped int    753  * @mm: Address space the pages are mapped into.
838  * @addr: Address the first page is mapped at.    754  * @addr: Address the first page is mapped at.
839  * @ptep: Page table pointer for the first ent    755  * @ptep: Page table pointer for the first entry.
840  * @nr: Number of entries to write-protect.       756  * @nr: Number of entries to write-protect.
841  *                                                757  *
842  * May be overridden by the architecture; othe    758  * May be overridden by the architecture; otherwise, implemented as a simple
843  * loop over ptep_set_wrprotect().                759  * loop over ptep_set_wrprotect().
844  *                                                760  *
845  * Note that PTE bits in the PTE range besides    761  * Note that PTE bits in the PTE range besides the PFN can differ. For example,
846  * some PTEs might be write-protected.            762  * some PTEs might be write-protected.
847  *                                                763  *
848  * Context: The caller holds the page table lo    764  * Context: The caller holds the page table lock.  The PTEs map consecutive
849  * pages that belong to the same folio.  The P    765  * pages that belong to the same folio.  The PTEs are all in the same PMD.
850  */                                               766  */
851 static inline void wrprotect_ptes(struct mm_st    767 static inline void wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
852                 pte_t *ptep, unsigned int nr)     768                 pte_t *ptep, unsigned int nr)
853 {                                                 769 {
854         for (;;) {                                770         for (;;) {
855                 ptep_set_wrprotect(mm, addr, p    771                 ptep_set_wrprotect(mm, addr, ptep);
856                 if (--nr == 0)                    772                 if (--nr == 0)
857                         break;                    773                         break;
858                 ptep++;                           774                 ptep++;
859                 addr += PAGE_SIZE;                775                 addr += PAGE_SIZE;
860         }                                         776         }
861 }                                                 777 }
862 #endif                                            778 #endif
863                                                   779 
864 /*                                                780 /*
865  * On some architectures hardware does not set    781  * On some architectures hardware does not set page access bit when accessing
866  * memory page, it is responsibility of softwa    782  * memory page, it is responsibility of software setting this bit. It brings
867  * out extra page fault penalty to track page     783  * out extra page fault penalty to track page access bit. For optimization page
868  * access bit can be set during all page fault    784  * access bit can be set during all page fault flow on these arches.
869  * To be differentiate with macro pte_mkyoung,    785  * To be differentiate with macro pte_mkyoung, this macro is used on platforms
870  * where software maintains page access bit.      786  * where software maintains page access bit.
871  */                                               787  */
872 #ifndef pte_sw_mkyoung                            788 #ifndef pte_sw_mkyoung
873 static inline pte_t pte_sw_mkyoung(pte_t pte)     789 static inline pte_t pte_sw_mkyoung(pte_t pte)
874 {                                                 790 {
875         return pte;                               791         return pte;
876 }                                                 792 }
877 #define pte_sw_mkyoung  pte_sw_mkyoung            793 #define pte_sw_mkyoung  pte_sw_mkyoung
878 #endif                                            794 #endif
879                                                   795 
880 #ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT            796 #ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
881 #ifdef CONFIG_TRANSPARENT_HUGEPAGE                797 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
882 static inline void pmdp_set_wrprotect(struct m    798 static inline void pmdp_set_wrprotect(struct mm_struct *mm,
883                                       unsigned    799                                       unsigned long address, pmd_t *pmdp)
884 {                                                 800 {
885         pmd_t old_pmd = *pmdp;                    801         pmd_t old_pmd = *pmdp;
886         set_pmd_at(mm, address, pmdp, pmd_wrpr    802         set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd));
887 }                                                 803 }
888 #else                                             804 #else
889 static inline void pmdp_set_wrprotect(struct m    805 static inline void pmdp_set_wrprotect(struct mm_struct *mm,
890                                       unsigned    806                                       unsigned long address, pmd_t *pmdp)
891 {                                                 807 {
892         BUILD_BUG();                              808         BUILD_BUG();
893 }                                                 809 }
894 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */          810 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
895 #endif                                            811 #endif
896 #ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT            812 #ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT
897 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_P    813 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
898 #ifdef CONFIG_TRANSPARENT_HUGEPAGE                814 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
899 static inline void pudp_set_wrprotect(struct m    815 static inline void pudp_set_wrprotect(struct mm_struct *mm,
900                                       unsigned    816                                       unsigned long address, pud_t *pudp)
901 {                                                 817 {
902         pud_t old_pud = *pudp;                    818         pud_t old_pud = *pudp;
903                                                   819 
904         set_pud_at(mm, address, pudp, pud_wrpr    820         set_pud_at(mm, address, pudp, pud_wrprotect(old_pud));
905 }                                                 821 }
906 #else                                             822 #else
907 static inline void pudp_set_wrprotect(struct m    823 static inline void pudp_set_wrprotect(struct mm_struct *mm,
908                                       unsigned    824                                       unsigned long address, pud_t *pudp)
909 {                                                 825 {
910         BUILD_BUG();                              826         BUILD_BUG();
911 }                                                 827 }
912 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */          828 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
913 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAG    829 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
914 #endif                                            830 #endif
915                                                   831 
916 #ifndef pmdp_collapse_flush                       832 #ifndef pmdp_collapse_flush
917 #ifdef CONFIG_TRANSPARENT_HUGEPAGE                833 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
918 extern pmd_t pmdp_collapse_flush(struct vm_are    834 extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
919                                  unsigned long    835                                  unsigned long address, pmd_t *pmdp);
920 #else                                             836 #else
921 static inline pmd_t pmdp_collapse_flush(struct    837 static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
922                                         unsign    838                                         unsigned long address,
923                                         pmd_t     839                                         pmd_t *pmdp)
924 {                                                 840 {
925         BUILD_BUG();                              841         BUILD_BUG();
926         return *pmdp;                             842         return *pmdp;
927 }                                                 843 }
928 #define pmdp_collapse_flush pmdp_collapse_flus    844 #define pmdp_collapse_flush pmdp_collapse_flush
929 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */          845 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
930 #endif                                            846 #endif
931                                                   847 
932 #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT               848 #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
933 extern void pgtable_trans_huge_deposit(struct     849 extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
934                                        pgtable    850                                        pgtable_t pgtable);
935 #endif                                            851 #endif
936                                                   852 
937 #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW              853 #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
938 extern pgtable_t pgtable_trans_huge_withdraw(s    854 extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
939 #endif                                            855 #endif
940                                                   856 
941 #ifndef arch_needs_pgtable_deposit                857 #ifndef arch_needs_pgtable_deposit
942 #define arch_needs_pgtable_deposit() (false)      858 #define arch_needs_pgtable_deposit() (false)
943 #endif                                            859 #endif
944                                                   860 
945 #ifdef CONFIG_TRANSPARENT_HUGEPAGE                861 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
946 /*                                                862 /*
947  * This is an implementation of pmdp_establish    863  * This is an implementation of pmdp_establish() that is only suitable for an
948  * architecture that doesn't have hardware dir    864  * architecture that doesn't have hardware dirty/accessed bits. In this case we
949  * can't race with CPU which sets these bits a    865  * can't race with CPU which sets these bits and non-atomic approach is fine.
950  */                                               866  */
951 static inline pmd_t generic_pmdp_establish(str    867 static inline pmd_t generic_pmdp_establish(struct vm_area_struct *vma,
952                 unsigned long address, pmd_t *    868                 unsigned long address, pmd_t *pmdp, pmd_t pmd)
953 {                                                 869 {
954         pmd_t old_pmd = *pmdp;                    870         pmd_t old_pmd = *pmdp;
955         set_pmd_at(vma->vm_mm, address, pmdp,     871         set_pmd_at(vma->vm_mm, address, pmdp, pmd);
956         return old_pmd;                           872         return old_pmd;
957 }                                                 873 }
958 #endif                                            874 #endif
959                                                   875 
960 #ifndef __HAVE_ARCH_PMDP_INVALIDATE               876 #ifndef __HAVE_ARCH_PMDP_INVALIDATE
961 extern pmd_t pmdp_invalidate(struct vm_area_st    877 extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
962                             pmd_t *pmdp);         878                             pmd_t *pmdp);
963 #endif                                            879 #endif
964                                                   880 
965 #ifndef __HAVE_ARCH_PMDP_INVALIDATE_AD            881 #ifndef __HAVE_ARCH_PMDP_INVALIDATE_AD
966                                                   882 
967 /*                                                883 /*
968  * pmdp_invalidate_ad() invalidates the PMD wh    884  * pmdp_invalidate_ad() invalidates the PMD while changing a transparent
969  * hugepage mapping in the page tables. This f    885  * hugepage mapping in the page tables. This function is similar to
970  * pmdp_invalidate(), but should only be used     886  * pmdp_invalidate(), but should only be used if the access and dirty bits would
971  * not be cleared by the software in the new P    887  * not be cleared by the software in the new PMD value. The function ensures
972  * that hardware changes of the access and dir    888  * that hardware changes of the access and dirty bits updates would not be lost.
973  *                                                889  *
974  * Doing so can allow in certain architectures    890  * Doing so can allow in certain architectures to avoid a TLB flush in most
975  * cases. Yet, another TLB flush might be nece    891  * cases. Yet, another TLB flush might be necessary later if the PMD update
976  * itself requires such flush (e.g., if protec    892  * itself requires such flush (e.g., if protection was set to be stricter). Yet,
977  * even when a TLB flush is needed because of     893  * even when a TLB flush is needed because of the update, the caller may be able
978  * to batch these TLB flushing operations, so     894  * to batch these TLB flushing operations, so fewer TLB flush operations are
979  * needed.                                        895  * needed.
980  */                                               896  */
981 extern pmd_t pmdp_invalidate_ad(struct vm_area    897 extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
982                                 unsigned long     898                                 unsigned long address, pmd_t *pmdp);
983 #endif                                            899 #endif
984                                                   900 
985 #ifndef __HAVE_ARCH_PTE_SAME                      901 #ifndef __HAVE_ARCH_PTE_SAME
986 static inline int pte_same(pte_t pte_a, pte_t     902 static inline int pte_same(pte_t pte_a, pte_t pte_b)
987 {                                                 903 {
988         return pte_val(pte_a) == pte_val(pte_b    904         return pte_val(pte_a) == pte_val(pte_b);
989 }                                                 905 }
990 #endif                                            906 #endif
991                                                   907 
992 #ifndef __HAVE_ARCH_PTE_UNUSED                    908 #ifndef __HAVE_ARCH_PTE_UNUSED
993 /*                                                909 /*
994  * Some architectures provide facilities to vi    910  * Some architectures provide facilities to virtualization guests
995  * so that they can flag allocated pages as un    911  * so that they can flag allocated pages as unused. This allows the
996  * host to transparently reclaim unused pages.    912  * host to transparently reclaim unused pages. This function returns
997  * whether the pte's page is unused.              913  * whether the pte's page is unused.
998  */                                               914  */
999 static inline int pte_unused(pte_t pte)           915 static inline int pte_unused(pte_t pte)
1000 {                                                916 {
1001         return 0;                                917         return 0;
1002 }                                                918 }
1003 #endif                                           919 #endif
1004                                                  920 
1005 #ifndef pte_access_permitted                     921 #ifndef pte_access_permitted
1006 #define pte_access_permitted(pte, write) \       922 #define pte_access_permitted(pte, write) \
1007         (pte_present(pte) && (!(write) || pte    923         (pte_present(pte) && (!(write) || pte_write(pte)))
1008 #endif                                           924 #endif
1009                                                  925 
1010 #ifndef pmd_access_permitted                     926 #ifndef pmd_access_permitted
1011 #define pmd_access_permitted(pmd, write) \       927 #define pmd_access_permitted(pmd, write) \
1012         (pmd_present(pmd) && (!(write) || pmd    928         (pmd_present(pmd) && (!(write) || pmd_write(pmd)))
1013 #endif                                           929 #endif
1014                                                  930 
1015 #ifndef pud_access_permitted                     931 #ifndef pud_access_permitted
1016 #define pud_access_permitted(pud, write) \       932 #define pud_access_permitted(pud, write) \
1017         (pud_present(pud) && (!(write) || pud    933         (pud_present(pud) && (!(write) || pud_write(pud)))
1018 #endif                                           934 #endif
1019                                                  935 
1020 #ifndef p4d_access_permitted                     936 #ifndef p4d_access_permitted
1021 #define p4d_access_permitted(p4d, write) \       937 #define p4d_access_permitted(p4d, write) \
1022         (p4d_present(p4d) && (!(write) || p4d    938         (p4d_present(p4d) && (!(write) || p4d_write(p4d)))
1023 #endif                                           939 #endif
1024                                                  940 
1025 #ifndef pgd_access_permitted                     941 #ifndef pgd_access_permitted
1026 #define pgd_access_permitted(pgd, write) \       942 #define pgd_access_permitted(pgd, write) \
1027         (pgd_present(pgd) && (!(write) || pgd    943         (pgd_present(pgd) && (!(write) || pgd_write(pgd)))
1028 #endif                                           944 #endif
1029                                                  945 
1030 #ifndef __HAVE_ARCH_PMD_SAME                     946 #ifndef __HAVE_ARCH_PMD_SAME
1031 static inline int pmd_same(pmd_t pmd_a, pmd_t    947 static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
1032 {                                                948 {
1033         return pmd_val(pmd_a) == pmd_val(pmd_    949         return pmd_val(pmd_a) == pmd_val(pmd_b);
1034 }                                                950 }
1035 #endif                                           951 #endif
1036                                                  952 
1037 #ifndef pud_same                                 953 #ifndef pud_same
1038 static inline int pud_same(pud_t pud_a, pud_t    954 static inline int pud_same(pud_t pud_a, pud_t pud_b)
1039 {                                                955 {
1040         return pud_val(pud_a) == pud_val(pud_    956         return pud_val(pud_a) == pud_val(pud_b);
1041 }                                                957 }
1042 #define pud_same pud_same                        958 #define pud_same pud_same
1043 #endif                                           959 #endif
1044                                                  960 
1045 #ifndef __HAVE_ARCH_P4D_SAME                     961 #ifndef __HAVE_ARCH_P4D_SAME
1046 static inline int p4d_same(p4d_t p4d_a, p4d_t    962 static inline int p4d_same(p4d_t p4d_a, p4d_t p4d_b)
1047 {                                                963 {
1048         return p4d_val(p4d_a) == p4d_val(p4d_    964         return p4d_val(p4d_a) == p4d_val(p4d_b);
1049 }                                                965 }
1050 #endif                                           966 #endif
1051                                                  967 
1052 #ifndef __HAVE_ARCH_PGD_SAME                     968 #ifndef __HAVE_ARCH_PGD_SAME
1053 static inline int pgd_same(pgd_t pgd_a, pgd_t    969 static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b)
1054 {                                                970 {
1055         return pgd_val(pgd_a) == pgd_val(pgd_    971         return pgd_val(pgd_a) == pgd_val(pgd_b);
1056 }                                                972 }
1057 #endif                                           973 #endif
1058                                                  974 
1059 /*                                               975 /*
1060  * Use set_p*_safe(), and elide TLB flushing,    976  * Use set_p*_safe(), and elide TLB flushing, when confident that *no*
1061  * TLB flush will be required as a result of     977  * TLB flush will be required as a result of the "set". For example, use
1062  * in scenarios where it is known ahead of ti    978  * in scenarios where it is known ahead of time that the routine is
1063  * setting non-present entries, or re-setting    979  * setting non-present entries, or re-setting an existing entry to the
1064  * same value. Otherwise, use the typical "se    980  * same value. Otherwise, use the typical "set" helpers and flush the
1065  * TLB.                                          981  * TLB.
1066  */                                              982  */
1067 #define set_pte_safe(ptep, pte) \                983 #define set_pte_safe(ptep, pte) \
1068 ({ \                                             984 ({ \
1069         WARN_ON_ONCE(pte_present(*ptep) && !p    985         WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \
1070         set_pte(ptep, pte); \                    986         set_pte(ptep, pte); \
1071 })                                               987 })
1072                                                  988 
1073 #define set_pmd_safe(pmdp, pmd) \                989 #define set_pmd_safe(pmdp, pmd) \
1074 ({ \                                             990 ({ \
1075         WARN_ON_ONCE(pmd_present(*pmdp) && !p    991         WARN_ON_ONCE(pmd_present(*pmdp) && !pmd_same(*pmdp, pmd)); \
1076         set_pmd(pmdp, pmd); \                    992         set_pmd(pmdp, pmd); \
1077 })                                               993 })
1078                                                  994 
1079 #define set_pud_safe(pudp, pud) \                995 #define set_pud_safe(pudp, pud) \
1080 ({ \                                             996 ({ \
1081         WARN_ON_ONCE(pud_present(*pudp) && !p    997         WARN_ON_ONCE(pud_present(*pudp) && !pud_same(*pudp, pud)); \
1082         set_pud(pudp, pud); \                    998         set_pud(pudp, pud); \
1083 })                                               999 })
1084                                                  1000 
1085 #define set_p4d_safe(p4dp, p4d) \                1001 #define set_p4d_safe(p4dp, p4d) \
1086 ({ \                                             1002 ({ \
1087         WARN_ON_ONCE(p4d_present(*p4dp) && !p    1003         WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \
1088         set_p4d(p4dp, p4d); \                    1004         set_p4d(p4dp, p4d); \
1089 })                                               1005 })
1090                                                  1006 
1091 #define set_pgd_safe(pgdp, pgd) \                1007 #define set_pgd_safe(pgdp, pgd) \
1092 ({ \                                             1008 ({ \
1093         WARN_ON_ONCE(pgd_present(*pgdp) && !p    1009         WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \
1094         set_pgd(pgdp, pgd); \                    1010         set_pgd(pgdp, pgd); \
1095 })                                               1011 })
1096                                                  1012 
1097 #ifndef __HAVE_ARCH_DO_SWAP_PAGE                 1013 #ifndef __HAVE_ARCH_DO_SWAP_PAGE
1098 static inline void arch_do_swap_page_nr(struc << 
1099                                      struct v << 
1100                                      unsigned << 
1101                                      pte_t pt << 
1102                                      int nr)  << 
1103 {                                             << 
1104                                               << 
1105 }                                             << 
1106 #else                                         << 
1107 /*                                               1014 /*
1108  * Some architectures support metadata associ    1015  * Some architectures support metadata associated with a page. When a
1109  * page is being swapped out, this metadata m    1016  * page is being swapped out, this metadata must be saved so it can be
1110  * restored when the page is swapped back in.    1017  * restored when the page is swapped back in. SPARC M7 and newer
1111  * processors support an ADI (Application Dat    1018  * processors support an ADI (Application Data Integrity) tag for the
1112  * page as metadata for the page. arch_do_swa    1019  * page as metadata for the page. arch_do_swap_page() can restore this
1113  * metadata when a page is swapped back in.      1020  * metadata when a page is swapped back in.
1114  */                                              1021  */
1115 static inline void arch_do_swap_page_nr(struc !! 1022 static inline void arch_do_swap_page(struct mm_struct *mm,
1116                                         struc !! 1023                                      struct vm_area_struct *vma,
1117                                         unsig !! 1024                                      unsigned long addr,
1118                                         pte_t !! 1025                                      pte_t pte, pte_t oldpte)
1119                                         int n !! 1026 {
1120 {                                             !! 1027 
1121         for (int i = 0; i < nr; i++) {        << 
1122                 arch_do_swap_page(vma->vm_mm, << 
1123                                 pte_advance_p << 
1124                                 pte_advance_p << 
1125         }                                     << 
1126 }                                                1028 }
1127 #endif                                           1029 #endif
1128                                                  1030 
1129 #ifndef __HAVE_ARCH_UNMAP_ONE                    1031 #ifndef __HAVE_ARCH_UNMAP_ONE
1130 /*                                               1032 /*
1131  * Some architectures support metadata associ    1033  * Some architectures support metadata associated with a page. When a
1132  * page is being swapped out, this metadata m    1034  * page is being swapped out, this metadata must be saved so it can be
1133  * restored when the page is swapped back in.    1035  * restored when the page is swapped back in. SPARC M7 and newer
1134  * processors support an ADI (Application Dat    1036  * processors support an ADI (Application Data Integrity) tag for the
1135  * page as metadata for the page. arch_unmap_    1037  * page as metadata for the page. arch_unmap_one() can save this
1136  * metadata on a swap-out of a page.             1038  * metadata on a swap-out of a page.
1137  */                                              1039  */
1138 static inline int arch_unmap_one(struct mm_st    1040 static inline int arch_unmap_one(struct mm_struct *mm,
1139                                   struct vm_a    1041                                   struct vm_area_struct *vma,
1140                                   unsigned lo    1042                                   unsigned long addr,
1141                                   pte_t orig_    1043                                   pte_t orig_pte)
1142 {                                                1044 {
1143         return 0;                                1045         return 0;
1144 }                                                1046 }
1145 #endif                                           1047 #endif
1146                                                  1048 
1147 /*                                               1049 /*
1148  * Allow architectures to preserve additional    1050  * Allow architectures to preserve additional metadata associated with
1149  * swapped-out pages. The corresponding __HAV    1051  * swapped-out pages. The corresponding __HAVE_ARCH_SWAP_* macros and function
1150  * prototypes must be defined in the arch-spe    1052  * prototypes must be defined in the arch-specific asm/pgtable.h file.
1151  */                                              1053  */
1152 #ifndef __HAVE_ARCH_PREPARE_TO_SWAP              1054 #ifndef __HAVE_ARCH_PREPARE_TO_SWAP
1153 static inline int arch_prepare_to_swap(struct !! 1055 static inline int arch_prepare_to_swap(struct page *page)
1154 {                                                1056 {
1155         return 0;                                1057         return 0;
1156 }                                                1058 }
1157 #endif                                           1059 #endif
1158                                                  1060 
1159 #ifndef __HAVE_ARCH_SWAP_INVALIDATE              1061 #ifndef __HAVE_ARCH_SWAP_INVALIDATE
1160 static inline void arch_swap_invalidate_page(    1062 static inline void arch_swap_invalidate_page(int type, pgoff_t offset)
1161 {                                                1063 {
1162 }                                                1064 }
1163                                                  1065 
1164 static inline void arch_swap_invalidate_area(    1066 static inline void arch_swap_invalidate_area(int type)
1165 {                                                1067 {
1166 }                                                1068 }
1167 #endif                                           1069 #endif
1168                                                  1070 
1169 #ifndef __HAVE_ARCH_SWAP_RESTORE                 1071 #ifndef __HAVE_ARCH_SWAP_RESTORE
1170 static inline void arch_swap_restore(swp_entr    1072 static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
1171 {                                                1073 {
1172 }                                                1074 }
1173 #endif                                           1075 #endif
1174                                                  1076 
1175 #ifndef __HAVE_ARCH_PGD_OFFSET_GATE              1077 #ifndef __HAVE_ARCH_PGD_OFFSET_GATE
1176 #define pgd_offset_gate(mm, addr)       pgd_o    1078 #define pgd_offset_gate(mm, addr)       pgd_offset(mm, addr)
1177 #endif                                           1079 #endif
1178                                                  1080 
1179 #ifndef __HAVE_ARCH_MOVE_PTE                     1081 #ifndef __HAVE_ARCH_MOVE_PTE
1180 #define move_pte(pte, old_addr, new_addr)     !! 1082 #define move_pte(pte, prot, old_addr, new_addr) (pte)
1181 #endif                                           1083 #endif
1182                                                  1084 
1183 #ifndef pte_accessible                           1085 #ifndef pte_accessible
1184 # define pte_accessible(mm, pte)        ((voi    1086 # define pte_accessible(mm, pte)        ((void)(pte), 1)
1185 #endif                                           1087 #endif
1186                                                  1088 
1187 #ifndef flush_tlb_fix_spurious_fault             1089 #ifndef flush_tlb_fix_spurious_fault
1188 #define flush_tlb_fix_spurious_fault(vma, add    1090 #define flush_tlb_fix_spurious_fault(vma, address, ptep) flush_tlb_page(vma, address)
1189 #endif                                           1091 #endif
1190                                                  1092 
1191 /*                                               1093 /*
1192  * When walking page tables, get the address     1094  * When walking page tables, get the address of the next boundary,
1193  * or the end address of the range if that co    1095  * or the end address of the range if that comes earlier.  Although no
1194  * vma end wraps to 0, rounded up __boundary     1096  * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
1195  */                                              1097  */
1196                                                  1098 
1197 #define pgd_addr_end(addr, end)                  1099 #define pgd_addr_end(addr, end)                                         \
1198 ({      unsigned long __boundary = ((addr) +     1100 ({      unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;  \
1199         (__boundary - 1 < (end) - 1)? __bound    1101         (__boundary - 1 < (end) - 1)? __boundary: (end);                \
1200 })                                               1102 })
1201                                                  1103 
1202 #ifndef p4d_addr_end                             1104 #ifndef p4d_addr_end
1203 #define p4d_addr_end(addr, end)                  1105 #define p4d_addr_end(addr, end)                                         \
1204 ({      unsigned long __boundary = ((addr) +     1106 ({      unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK;      \
1205         (__boundary - 1 < (end) - 1)? __bound    1107         (__boundary - 1 < (end) - 1)? __boundary: (end);                \
1206 })                                               1108 })
1207 #endif                                           1109 #endif
1208                                                  1110 
1209 #ifndef pud_addr_end                             1111 #ifndef pud_addr_end
1210 #define pud_addr_end(addr, end)                  1112 #define pud_addr_end(addr, end)                                         \
1211 ({      unsigned long __boundary = ((addr) +     1113 ({      unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;      \
1212         (__boundary - 1 < (end) - 1)? __bound    1114         (__boundary - 1 < (end) - 1)? __boundary: (end);                \
1213 })                                               1115 })
1214 #endif                                           1116 #endif
1215                                                  1117 
1216 #ifndef pmd_addr_end                             1118 #ifndef pmd_addr_end
1217 #define pmd_addr_end(addr, end)                  1119 #define pmd_addr_end(addr, end)                                         \
1218 ({      unsigned long __boundary = ((addr) +     1120 ({      unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK;      \
1219         (__boundary - 1 < (end) - 1)? __bound    1121         (__boundary - 1 < (end) - 1)? __boundary: (end);                \
1220 })                                               1122 })
1221 #endif                                           1123 #endif
1222                                                  1124 
1223 /*                                               1125 /*
1224  * When walking page tables, we usually want     1126  * When walking page tables, we usually want to skip any p?d_none entries;
1225  * and any p?d_bad entries - reporting the er    1127  * and any p?d_bad entries - reporting the error before resetting to none.
1226  * Do the tests inline, but report and clear     1128  * Do the tests inline, but report and clear the bad entry in mm/memory.c.
1227  */                                              1129  */
1228 void pgd_clear_bad(pgd_t *);                     1130 void pgd_clear_bad(pgd_t *);
1229                                                  1131 
1230 #ifndef __PAGETABLE_P4D_FOLDED                   1132 #ifndef __PAGETABLE_P4D_FOLDED
1231 void p4d_clear_bad(p4d_t *);                     1133 void p4d_clear_bad(p4d_t *);
1232 #else                                            1134 #else
1233 #define p4d_clear_bad(p4d)        do { } whil    1135 #define p4d_clear_bad(p4d)        do { } while (0)
1234 #endif                                           1136 #endif
1235                                                  1137 
1236 #ifndef __PAGETABLE_PUD_FOLDED                   1138 #ifndef __PAGETABLE_PUD_FOLDED
1237 void pud_clear_bad(pud_t *);                     1139 void pud_clear_bad(pud_t *);
1238 #else                                            1140 #else
1239 #define pud_clear_bad(p4d)        do { } whil    1141 #define pud_clear_bad(p4d)        do { } while (0)
1240 #endif                                           1142 #endif
1241                                                  1143 
1242 void pmd_clear_bad(pmd_t *);                     1144 void pmd_clear_bad(pmd_t *);
1243                                                  1145 
1244 static inline int pgd_none_or_clear_bad(pgd_t    1146 static inline int pgd_none_or_clear_bad(pgd_t *pgd)
1245 {                                                1147 {
1246         if (pgd_none(*pgd))                      1148         if (pgd_none(*pgd))
1247                 return 1;                        1149                 return 1;
1248         if (unlikely(pgd_bad(*pgd))) {           1150         if (unlikely(pgd_bad(*pgd))) {
1249                 pgd_clear_bad(pgd);              1151                 pgd_clear_bad(pgd);
1250                 return 1;                        1152                 return 1;
1251         }                                        1153         }
1252         return 0;                                1154         return 0;
1253 }                                                1155 }
1254                                                  1156 
1255 static inline int p4d_none_or_clear_bad(p4d_t    1157 static inline int p4d_none_or_clear_bad(p4d_t *p4d)
1256 {                                                1158 {
1257         if (p4d_none(*p4d))                      1159         if (p4d_none(*p4d))
1258                 return 1;                        1160                 return 1;
1259         if (unlikely(p4d_bad(*p4d))) {           1161         if (unlikely(p4d_bad(*p4d))) {
1260                 p4d_clear_bad(p4d);              1162                 p4d_clear_bad(p4d);
1261                 return 1;                        1163                 return 1;
1262         }                                        1164         }
1263         return 0;                                1165         return 0;
1264 }                                                1166 }
1265                                                  1167 
1266 static inline int pud_none_or_clear_bad(pud_t    1168 static inline int pud_none_or_clear_bad(pud_t *pud)
1267 {                                                1169 {
1268         if (pud_none(*pud))                      1170         if (pud_none(*pud))
1269                 return 1;                        1171                 return 1;
1270         if (unlikely(pud_bad(*pud))) {           1172         if (unlikely(pud_bad(*pud))) {
1271                 pud_clear_bad(pud);              1173                 pud_clear_bad(pud);
1272                 return 1;                        1174                 return 1;
1273         }                                        1175         }
1274         return 0;                                1176         return 0;
1275 }                                                1177 }
1276                                                  1178 
1277 static inline int pmd_none_or_clear_bad(pmd_t    1179 static inline int pmd_none_or_clear_bad(pmd_t *pmd)
1278 {                                                1180 {
1279         if (pmd_none(*pmd))                      1181         if (pmd_none(*pmd))
1280                 return 1;                        1182                 return 1;
1281         if (unlikely(pmd_bad(*pmd))) {           1183         if (unlikely(pmd_bad(*pmd))) {
1282                 pmd_clear_bad(pmd);              1184                 pmd_clear_bad(pmd);
1283                 return 1;                        1185                 return 1;
1284         }                                        1186         }
1285         return 0;                                1187         return 0;
1286 }                                                1188 }
1287                                                  1189 
1288 static inline pte_t __ptep_modify_prot_start(    1190 static inline pte_t __ptep_modify_prot_start(struct vm_area_struct *vma,
1289                                                  1191                                              unsigned long addr,
1290                                                  1192                                              pte_t *ptep)
1291 {                                                1193 {
1292         /*                                       1194         /*
1293          * Get the current pte state, but zer    1195          * Get the current pte state, but zero it out to make it
1294          * non-present, preventing the hardwa    1196          * non-present, preventing the hardware from asynchronously
1295          * updating it.                          1197          * updating it.
1296          */                                      1198          */
1297         return ptep_get_and_clear(vma->vm_mm,    1199         return ptep_get_and_clear(vma->vm_mm, addr, ptep);
1298 }                                                1200 }
1299                                                  1201 
1300 static inline void __ptep_modify_prot_commit(    1202 static inline void __ptep_modify_prot_commit(struct vm_area_struct *vma,
1301                                                  1203                                              unsigned long addr,
1302                                                  1204                                              pte_t *ptep, pte_t pte)
1303 {                                                1205 {
1304         /*                                       1206         /*
1305          * The pte is non-present, so there's    1207          * The pte is non-present, so there's no hardware state to
1306          * preserve.                             1208          * preserve.
1307          */                                      1209          */
1308         set_pte_at(vma->vm_mm, addr, ptep, pt    1210         set_pte_at(vma->vm_mm, addr, ptep, pte);
1309 }                                                1211 }
1310                                                  1212 
1311 #ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACT    1213 #ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
1312 /*                                               1214 /*
1313  * Start a pte protection read-modify-write t    1215  * Start a pte protection read-modify-write transaction, which
1314  * protects against asynchronous hardware mod    1216  * protects against asynchronous hardware modifications to the pte.
1315  * The intention is not to prevent the hardwa    1217  * The intention is not to prevent the hardware from making pte
1316  * updates, but to prevent any updates it may    1218  * updates, but to prevent any updates it may make from being lost.
1317  *                                               1219  *
1318  * This does not protect against other softwa    1220  * This does not protect against other software modifications of the
1319  * pte; the appropriate pte lock must be held    1221  * pte; the appropriate pte lock must be held over the transaction.
1320  *                                               1222  *
1321  * Note that this interface is intended to be    1223  * Note that this interface is intended to be batchable, meaning that
1322  * ptep_modify_prot_commit may not actually u    1224  * ptep_modify_prot_commit may not actually update the pte, but merely
1323  * queue the update to be done at some later     1225  * queue the update to be done at some later time.  The update must be
1324  * actually committed before the pte lock is     1226  * actually committed before the pte lock is released, however.
1325  */                                              1227  */
1326 static inline pte_t ptep_modify_prot_start(st    1228 static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
1327                                            un    1229                                            unsigned long addr,
1328                                            pt    1230                                            pte_t *ptep)
1329 {                                                1231 {
1330         return __ptep_modify_prot_start(vma,     1232         return __ptep_modify_prot_start(vma, addr, ptep);
1331 }                                                1233 }
1332                                                  1234 
1333 /*                                               1235 /*
1334  * Commit an update to a pte, leaving any har    1236  * Commit an update to a pte, leaving any hardware-controlled bits in
1335  * the PTE unmodified.                           1237  * the PTE unmodified.
1336  */                                              1238  */
1337 static inline void ptep_modify_prot_commit(st    1239 static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
1338                                            un    1240                                            unsigned long addr,
1339                                            pt    1241                                            pte_t *ptep, pte_t old_pte, pte_t pte)
1340 {                                                1242 {
1341         __ptep_modify_prot_commit(vma, addr,     1243         __ptep_modify_prot_commit(vma, addr, ptep, pte);
1342 }                                                1244 }
1343 #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSA    1245 #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
1344 #endif /* CONFIG_MMU */                          1246 #endif /* CONFIG_MMU */
1345                                                  1247 
1346 /*                                               1248 /*
1347  * No-op macros that just return the current     1249  * No-op macros that just return the current protection value. Defined here
1348  * because these macros can be used even if C    1250  * because these macros can be used even if CONFIG_MMU is not defined.
1349  */                                              1251  */
1350                                                  1252 
1351 #ifndef pgprot_nx                                1253 #ifndef pgprot_nx
1352 #define pgprot_nx(prot) (prot)                   1254 #define pgprot_nx(prot) (prot)
1353 #endif                                           1255 #endif
1354                                                  1256 
1355 #ifndef pgprot_noncached                         1257 #ifndef pgprot_noncached
1356 #define pgprot_noncached(prot)  (prot)           1258 #define pgprot_noncached(prot)  (prot)
1357 #endif                                           1259 #endif
1358                                                  1260 
1359 #ifndef pgprot_writecombine                      1261 #ifndef pgprot_writecombine
1360 #define pgprot_writecombine pgprot_noncached     1262 #define pgprot_writecombine pgprot_noncached
1361 #endif                                           1263 #endif
1362                                                  1264 
1363 #ifndef pgprot_writethrough                      1265 #ifndef pgprot_writethrough
1364 #define pgprot_writethrough pgprot_noncached     1266 #define pgprot_writethrough pgprot_noncached
1365 #endif                                           1267 #endif
1366                                                  1268 
1367 #ifndef pgprot_device                            1269 #ifndef pgprot_device
1368 #define pgprot_device pgprot_noncached           1270 #define pgprot_device pgprot_noncached
1369 #endif                                           1271 #endif
1370                                                  1272 
1371 #ifndef pgprot_mhp                               1273 #ifndef pgprot_mhp
1372 #define pgprot_mhp(prot)        (prot)           1274 #define pgprot_mhp(prot)        (prot)
1373 #endif                                           1275 #endif
1374                                                  1276 
1375 #ifdef CONFIG_MMU                                1277 #ifdef CONFIG_MMU
1376 #ifndef pgprot_modify                            1278 #ifndef pgprot_modify
1377 #define pgprot_modify pgprot_modify              1279 #define pgprot_modify pgprot_modify
1378 static inline pgprot_t pgprot_modify(pgprot_t    1280 static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
1379 {                                                1281 {
1380         if (pgprot_val(oldprot) == pgprot_val    1282         if (pgprot_val(oldprot) == pgprot_val(pgprot_noncached(oldprot)))
1381                 newprot = pgprot_noncached(ne    1283                 newprot = pgprot_noncached(newprot);
1382         if (pgprot_val(oldprot) == pgprot_val    1284         if (pgprot_val(oldprot) == pgprot_val(pgprot_writecombine(oldprot)))
1383                 newprot = pgprot_writecombine    1285                 newprot = pgprot_writecombine(newprot);
1384         if (pgprot_val(oldprot) == pgprot_val    1286         if (pgprot_val(oldprot) == pgprot_val(pgprot_device(oldprot)))
1385                 newprot = pgprot_device(newpr    1287                 newprot = pgprot_device(newprot);
1386         return newprot;                          1288         return newprot;
1387 }                                                1289 }
1388 #endif                                           1290 #endif
1389 #endif /* CONFIG_MMU */                          1291 #endif /* CONFIG_MMU */
1390                                                  1292 
1391 #ifndef pgprot_encrypted                         1293 #ifndef pgprot_encrypted
1392 #define pgprot_encrypted(prot)  (prot)           1294 #define pgprot_encrypted(prot)  (prot)
1393 #endif                                           1295 #endif
1394                                                  1296 
1395 #ifndef pgprot_decrypted                         1297 #ifndef pgprot_decrypted
1396 #define pgprot_decrypted(prot)  (prot)           1298 #define pgprot_decrypted(prot)  (prot)
1397 #endif                                           1299 #endif
1398                                                  1300 
1399 /*                                               1301 /*
1400  * A facility to provide batching of the relo    1302  * A facility to provide batching of the reload of page tables and
1401  * other process state with the actual contex    1303  * other process state with the actual context switch code for
1402  * paravirtualized guests.  By convention, on    1304  * paravirtualized guests.  By convention, only one of the batched
1403  * update (lazy) modes (CPU, MMU) should be a    1305  * update (lazy) modes (CPU, MMU) should be active at any given time,
1404  * entry should never be nested, and entry an    1306  * entry should never be nested, and entry and exits should always be
1405  * paired.  This is for sanity of maintaining    1307  * paired.  This is for sanity of maintaining and reasoning about the
1406  * kernel code.  In this case, the exit (end     1308  * kernel code.  In this case, the exit (end of the context switch) is
1407  * in architecture-specific code, and so does    1309  * in architecture-specific code, and so doesn't need a generic
1408  * definition.                                   1310  * definition.
1409  */                                              1311  */
1410 #ifndef __HAVE_ARCH_START_CONTEXT_SWITCH         1312 #ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
1411 #define arch_start_context_switch(prev) do {}    1313 #define arch_start_context_switch(prev) do {} while (0)
1412 #endif                                           1314 #endif
1413                                                  1315 
1414 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY               1316 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
1415 #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION         1317 #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
1416 static inline pmd_t pmd_swp_mksoft_dirty(pmd_    1318 static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
1417 {                                                1319 {
1418         return pmd;                              1320         return pmd;
1419 }                                                1321 }
1420                                                  1322 
1421 static inline int pmd_swp_soft_dirty(pmd_t pm    1323 static inline int pmd_swp_soft_dirty(pmd_t pmd)
1422 {                                                1324 {
1423         return 0;                                1325         return 0;
1424 }                                                1326 }
1425                                                  1327 
1426 static inline pmd_t pmd_swp_clear_soft_dirty(    1328 static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
1427 {                                                1329 {
1428         return pmd;                              1330         return pmd;
1429 }                                                1331 }
1430 #endif                                           1332 #endif
1431 #else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */         1333 #else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */
1432 static inline int pte_soft_dirty(pte_t pte)      1334 static inline int pte_soft_dirty(pte_t pte)
1433 {                                                1335 {
1434         return 0;                                1336         return 0;
1435 }                                                1337 }
1436                                                  1338 
1437 static inline int pmd_soft_dirty(pmd_t pmd)      1339 static inline int pmd_soft_dirty(pmd_t pmd)
1438 {                                                1340 {
1439         return 0;                                1341         return 0;
1440 }                                                1342 }
1441                                                  1343 
1442 static inline pte_t pte_mksoft_dirty(pte_t pt    1344 static inline pte_t pte_mksoft_dirty(pte_t pte)
1443 {                                                1345 {
1444         return pte;                              1346         return pte;
1445 }                                                1347 }
1446                                                  1348 
1447 static inline pmd_t pmd_mksoft_dirty(pmd_t pm    1349 static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
1448 {                                                1350 {
1449         return pmd;                              1351         return pmd;
1450 }                                                1352 }
1451                                                  1353 
1452 static inline pte_t pte_clear_soft_dirty(pte_    1354 static inline pte_t pte_clear_soft_dirty(pte_t pte)
1453 {                                                1355 {
1454         return pte;                              1356         return pte;
1455 }                                                1357 }
1456                                                  1358 
1457 static inline pmd_t pmd_clear_soft_dirty(pmd_    1359 static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
1458 {                                                1360 {
1459         return pmd;                              1361         return pmd;
1460 }                                                1362 }
1461                                                  1363 
1462 static inline pte_t pte_swp_mksoft_dirty(pte_    1364 static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
1463 {                                                1365 {
1464         return pte;                              1366         return pte;
1465 }                                                1367 }
1466                                                  1368 
1467 static inline int pte_swp_soft_dirty(pte_t pt    1369 static inline int pte_swp_soft_dirty(pte_t pte)
1468 {                                                1370 {
1469         return 0;                                1371         return 0;
1470 }                                                1372 }
1471                                                  1373 
1472 static inline pte_t pte_swp_clear_soft_dirty(    1374 static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
1473 {                                                1375 {
1474         return pte;                              1376         return pte;
1475 }                                                1377 }
1476                                                  1378 
1477 static inline pmd_t pmd_swp_mksoft_dirty(pmd_    1379 static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
1478 {                                                1380 {
1479         return pmd;                              1381         return pmd;
1480 }                                                1382 }
1481                                                  1383 
1482 static inline int pmd_swp_soft_dirty(pmd_t pm    1384 static inline int pmd_swp_soft_dirty(pmd_t pmd)
1483 {                                                1385 {
1484         return 0;                                1386         return 0;
1485 }                                                1387 }
1486                                                  1388 
1487 static inline pmd_t pmd_swp_clear_soft_dirty(    1389 static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
1488 {                                                1390 {
1489         return pmd;                              1391         return pmd;
1490 }                                                1392 }
1491 #endif                                           1393 #endif
1492                                                  1394 
1493 #ifndef __HAVE_PFNMAP_TRACKING                   1395 #ifndef __HAVE_PFNMAP_TRACKING
1494 /*                                               1396 /*
1495  * Interfaces that can be used by architectur    1397  * Interfaces that can be used by architecture code to keep track of
1496  * memory type of pfn mappings specified by t    1398  * memory type of pfn mappings specified by the remap_pfn_range,
1497  * vmf_insert_pfn.                               1399  * vmf_insert_pfn.
1498  */                                              1400  */
1499                                                  1401 
1500 /*                                               1402 /*
1501  * track_pfn_remap is called when a _new_ pfn    1403  * track_pfn_remap is called when a _new_ pfn mapping is being established
1502  * by remap_pfn_range() for physical range in    1404  * by remap_pfn_range() for physical range indicated by pfn and size.
1503  */                                              1405  */
1504 static inline int track_pfn_remap(struct vm_a    1406 static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
1505                                   unsigned lo    1407                                   unsigned long pfn, unsigned long addr,
1506                                   unsigned lo    1408                                   unsigned long size)
1507 {                                                1409 {
1508         return 0;                                1410         return 0;
1509 }                                                1411 }
1510                                                  1412 
1511 /*                                               1413 /*
1512  * track_pfn_insert is called when a _new_ si    1414  * track_pfn_insert is called when a _new_ single pfn is established
1513  * by vmf_insert_pfn().                          1415  * by vmf_insert_pfn().
1514  */                                              1416  */
1515 static inline void track_pfn_insert(struct vm    1417 static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
1516                                     pfn_t pfn    1418                                     pfn_t pfn)
1517 {                                                1419 {
1518 }                                                1420 }
1519                                                  1421 
1520 /*                                               1422 /*
1521  * track_pfn_copy is called when vma that is     1423  * track_pfn_copy is called when vma that is covering the pfnmap gets
1522  * copied through copy_page_range().             1424  * copied through copy_page_range().
1523  */                                              1425  */
1524 static inline int track_pfn_copy(struct vm_ar    1426 static inline int track_pfn_copy(struct vm_area_struct *vma)
1525 {                                                1427 {
1526         return 0;                                1428         return 0;
1527 }                                                1429 }
1528                                                  1430 
1529 /*                                               1431 /*
1530  * untrack_pfn is called while unmapping a pf    1432  * untrack_pfn is called while unmapping a pfnmap for a region.
1531  * untrack can be called for a specific regio    1433  * untrack can be called for a specific region indicated by pfn and size or
1532  * can be for the entire vma (in which case p    1434  * can be for the entire vma (in which case pfn, size are zero).
1533  */                                              1435  */
1534 static inline void untrack_pfn(struct vm_area    1436 static inline void untrack_pfn(struct vm_area_struct *vma,
1535                                unsigned long     1437                                unsigned long pfn, unsigned long size,
1536                                bool mm_wr_loc    1438                                bool mm_wr_locked)
1537 {                                                1439 {
1538 }                                                1440 }
1539                                                  1441 
1540 /*                                               1442 /*
1541  * untrack_pfn_clear is called while mremappi    1443  * untrack_pfn_clear is called while mremapping a pfnmap for a new region
1542  * or fails to copy pgtable during duplicate     1444  * or fails to copy pgtable during duplicate vm area.
1543  */                                              1445  */
1544 static inline void untrack_pfn_clear(struct v    1446 static inline void untrack_pfn_clear(struct vm_area_struct *vma)
1545 {                                                1447 {
1546 }                                                1448 }
1547 #else                                            1449 #else
1548 extern int track_pfn_remap(struct vm_area_str    1450 extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
1549                            unsigned long pfn,    1451                            unsigned long pfn, unsigned long addr,
1550                            unsigned long size    1452                            unsigned long size);
1551 extern void track_pfn_insert(struct vm_area_s    1453 extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
1552                              pfn_t pfn);         1454                              pfn_t pfn);
1553 extern int track_pfn_copy(struct vm_area_stru    1455 extern int track_pfn_copy(struct vm_area_struct *vma);
1554 extern void untrack_pfn(struct vm_area_struct    1456 extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
1555                         unsigned long size, b    1457                         unsigned long size, bool mm_wr_locked);
1556 extern void untrack_pfn_clear(struct vm_area_    1458 extern void untrack_pfn_clear(struct vm_area_struct *vma);
1557 #endif                                           1459 #endif
1558                                                  1460 
1559 #ifdef CONFIG_MMU                                1461 #ifdef CONFIG_MMU
1560 #ifdef __HAVE_COLOR_ZERO_PAGE                    1462 #ifdef __HAVE_COLOR_ZERO_PAGE
1561 static inline int is_zero_pfn(unsigned long p    1463 static inline int is_zero_pfn(unsigned long pfn)
1562 {                                                1464 {
1563         extern unsigned long zero_pfn;           1465         extern unsigned long zero_pfn;
1564         unsigned long offset_from_zero_pfn =     1466         unsigned long offset_from_zero_pfn = pfn - zero_pfn;
1565         return offset_from_zero_pfn <= (zero_    1467         return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
1566 }                                                1468 }
1567                                                  1469 
1568 #define my_zero_pfn(addr)       page_to_pfn(Z    1470 #define my_zero_pfn(addr)       page_to_pfn(ZERO_PAGE(addr))
1569                                                  1471 
1570 #else                                            1472 #else
1571 static inline int is_zero_pfn(unsigned long p    1473 static inline int is_zero_pfn(unsigned long pfn)
1572 {                                                1474 {
1573         extern unsigned long zero_pfn;           1475         extern unsigned long zero_pfn;
1574         return pfn == zero_pfn;                  1476         return pfn == zero_pfn;
1575 }                                                1477 }
1576                                                  1478 
1577 static inline unsigned long my_zero_pfn(unsig    1479 static inline unsigned long my_zero_pfn(unsigned long addr)
1578 {                                                1480 {
1579         extern unsigned long zero_pfn;           1481         extern unsigned long zero_pfn;
1580         return zero_pfn;                         1482         return zero_pfn;
1581 }                                                1483 }
1582 #endif                                           1484 #endif
1583 #else                                            1485 #else
1584 static inline int is_zero_pfn(unsigned long p    1486 static inline int is_zero_pfn(unsigned long pfn)
1585 {                                                1487 {
1586         return 0;                                1488         return 0;
1587 }                                                1489 }
1588                                                  1490 
1589 static inline unsigned long my_zero_pfn(unsig    1491 static inline unsigned long my_zero_pfn(unsigned long addr)
1590 {                                                1492 {
1591         return 0;                                1493         return 0;
1592 }                                                1494 }
1593 #endif /* CONFIG_MMU */                          1495 #endif /* CONFIG_MMU */
1594                                                  1496 
1595 #ifdef CONFIG_MMU                                1497 #ifdef CONFIG_MMU
1596                                                  1498 
1597 #ifndef CONFIG_TRANSPARENT_HUGEPAGE              1499 #ifndef CONFIG_TRANSPARENT_HUGEPAGE
1598 static inline int pmd_trans_huge(pmd_t pmd)      1500 static inline int pmd_trans_huge(pmd_t pmd)
1599 {                                                1501 {
1600         return 0;                                1502         return 0;
1601 }                                                1503 }
1602 #ifndef pmd_write                                1504 #ifndef pmd_write
1603 static inline int pmd_write(pmd_t pmd)           1505 static inline int pmd_write(pmd_t pmd)
1604 {                                                1506 {
1605         BUG();                                   1507         BUG();
1606         return 0;                                1508         return 0;
1607 }                                                1509 }
1608 #endif /* pmd_write */                           1510 #endif /* pmd_write */
1609 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */         1511 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1610                                                  1512 
1611 #ifndef pud_write                                1513 #ifndef pud_write
1612 static inline int pud_write(pud_t pud)           1514 static inline int pud_write(pud_t pud)
1613 {                                                1515 {
1614         BUG();                                   1516         BUG();
1615         return 0;                                1517         return 0;
1616 }                                                1518 }
1617 #endif /* pud_write */                           1519 #endif /* pud_write */
1618                                                  1520 
1619 #if !defined(CONFIG_ARCH_HAS_PTE_DEVMAP) || !    1521 #if !defined(CONFIG_ARCH_HAS_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE)
1620 static inline int pmd_devmap(pmd_t pmd)          1522 static inline int pmd_devmap(pmd_t pmd)
1621 {                                                1523 {
1622         return 0;                                1524         return 0;
1623 }                                                1525 }
1624 static inline int pud_devmap(pud_t pud)          1526 static inline int pud_devmap(pud_t pud)
1625 {                                                1527 {
1626         return 0;                                1528         return 0;
1627 }                                                1529 }
1628 static inline int pgd_devmap(pgd_t pgd)          1530 static inline int pgd_devmap(pgd_t pgd)
1629 {                                                1531 {
1630         return 0;                                1532         return 0;
1631 }                                                1533 }
1632 #endif                                           1534 #endif
1633                                                  1535 
1634 #if !defined(CONFIG_TRANSPARENT_HUGEPAGE) ||     1536 #if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
1635         !defined(CONFIG_HAVE_ARCH_TRANSPARENT    1537         !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
1636 static inline int pud_trans_huge(pud_t pud)      1538 static inline int pud_trans_huge(pud_t pud)
1637 {                                                1539 {
1638         return 0;                                1540         return 0;
1639 }                                                1541 }
1640 #endif                                           1542 #endif
1641                                                  1543 
1642 static inline int pud_trans_unstable(pud_t *p    1544 static inline int pud_trans_unstable(pud_t *pud)
1643 {                                                1545 {
1644 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \    1546 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
1645         defined(CONFIG_HAVE_ARCH_TRANSPARENT_    1547         defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
1646         pud_t pudval = READ_ONCE(*pud);          1548         pud_t pudval = READ_ONCE(*pud);
1647                                                  1549 
1648         if (pud_none(pudval) || pud_trans_hug    1550         if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval))
1649                 return 1;                        1551                 return 1;
1650         if (unlikely(pud_bad(pudval))) {         1552         if (unlikely(pud_bad(pudval))) {
1651                 pud_clear_bad(pud);              1553                 pud_clear_bad(pud);
1652                 return 1;                        1554                 return 1;
1653         }                                        1555         }
1654 #endif                                           1556 #endif
1655         return 0;                                1557         return 0;
1656 }                                                1558 }
1657                                                  1559 
1658 #ifndef CONFIG_NUMA_BALANCING                    1560 #ifndef CONFIG_NUMA_BALANCING
1659 /*                                               1561 /*
1660  * In an inaccessible (PROT_NONE) VMA, pte_pr    1562  * In an inaccessible (PROT_NONE) VMA, pte_protnone() may indicate "yes". It is
1661  * perfectly valid to indicate "no" in that c    1563  * perfectly valid to indicate "no" in that case, which is why our default
1662  * implementation defaults to "always no".       1564  * implementation defaults to "always no".
1663  *                                               1565  *
1664  * In an accessible VMA, however, pte_protnon    1566  * In an accessible VMA, however, pte_protnone() reliably indicates PROT_NONE
1665  * page protection due to NUMA hinting. NUMA     1567  * page protection due to NUMA hinting. NUMA hinting faults only apply in
1666  * accessible VMAs.                              1568  * accessible VMAs.
1667  *                                               1569  *
1668  * So, to reliably identify PROT_NONE PTEs th    1570  * So, to reliably identify PROT_NONE PTEs that require a NUMA hinting fault,
1669  * looking at the VMA accessibility is suffic    1571  * looking at the VMA accessibility is sufficient.
1670  */                                              1572  */
1671 static inline int pte_protnone(pte_t pte)        1573 static inline int pte_protnone(pte_t pte)
1672 {                                                1574 {
1673         return 0;                                1575         return 0;
1674 }                                                1576 }
1675                                                  1577 
1676 static inline int pmd_protnone(pmd_t pmd)        1578 static inline int pmd_protnone(pmd_t pmd)
1677 {                                                1579 {
1678         return 0;                                1580         return 0;
1679 }                                                1581 }
1680 #endif /* CONFIG_NUMA_BALANCING */               1582 #endif /* CONFIG_NUMA_BALANCING */
1681                                                  1583 
1682 #endif /* CONFIG_MMU */                          1584 #endif /* CONFIG_MMU */
1683                                                  1585 
1684 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP                1586 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
1685                                                  1587 
1686 #ifndef __PAGETABLE_P4D_FOLDED                   1588 #ifndef __PAGETABLE_P4D_FOLDED
1687 int p4d_set_huge(p4d_t *p4d, phys_addr_t addr    1589 int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot);
1688 void p4d_clear_huge(p4d_t *p4d);                 1590 void p4d_clear_huge(p4d_t *p4d);
1689 #else                                            1591 #else
1690 static inline int p4d_set_huge(p4d_t *p4d, ph    1592 static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
1691 {                                                1593 {
1692         return 0;                                1594         return 0;
1693 }                                                1595 }
1694 static inline void p4d_clear_huge(p4d_t *p4d)    1596 static inline void p4d_clear_huge(p4d_t *p4d) { }
1695 #endif /* !__PAGETABLE_P4D_FOLDED */             1597 #endif /* !__PAGETABLE_P4D_FOLDED */
1696                                                  1598 
1697 int pud_set_huge(pud_t *pud, phys_addr_t addr    1599 int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
1698 int pmd_set_huge(pmd_t *pmd, phys_addr_t addr    1600 int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
1699 int pud_clear_huge(pud_t *pud);                  1601 int pud_clear_huge(pud_t *pud);
1700 int pmd_clear_huge(pmd_t *pmd);                  1602 int pmd_clear_huge(pmd_t *pmd);
1701 int p4d_free_pud_page(p4d_t *p4d, unsigned lo    1603 int p4d_free_pud_page(p4d_t *p4d, unsigned long addr);
1702 int pud_free_pmd_page(pud_t *pud, unsigned lo    1604 int pud_free_pmd_page(pud_t *pud, unsigned long addr);
1703 int pmd_free_pte_page(pmd_t *pmd, unsigned lo    1605 int pmd_free_pte_page(pmd_t *pmd, unsigned long addr);
1704 #else   /* !CONFIG_HAVE_ARCH_HUGE_VMAP */        1606 #else   /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
1705 static inline int p4d_set_huge(p4d_t *p4d, ph    1607 static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
1706 {                                                1608 {
1707         return 0;                                1609         return 0;
1708 }                                                1610 }
1709 static inline int pud_set_huge(pud_t *pud, ph    1611 static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
1710 {                                                1612 {
1711         return 0;                                1613         return 0;
1712 }                                                1614 }
1713 static inline int pmd_set_huge(pmd_t *pmd, ph    1615 static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
1714 {                                                1616 {
1715         return 0;                                1617         return 0;
1716 }                                                1618 }
1717 static inline void p4d_clear_huge(p4d_t *p4d)    1619 static inline void p4d_clear_huge(p4d_t *p4d) { }
1718 static inline int pud_clear_huge(pud_t *pud)     1620 static inline int pud_clear_huge(pud_t *pud)
1719 {                                                1621 {
1720         return 0;                                1622         return 0;
1721 }                                                1623 }
1722 static inline int pmd_clear_huge(pmd_t *pmd)     1624 static inline int pmd_clear_huge(pmd_t *pmd)
1723 {                                                1625 {
1724         return 0;                                1626         return 0;
1725 }                                                1627 }
1726 static inline int p4d_free_pud_page(p4d_t *p4    1628 static inline int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
1727 {                                                1629 {
1728         return 0;                                1630         return 0;
1729 }                                                1631 }
1730 static inline int pud_free_pmd_page(pud_t *pu    1632 static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr)
1731 {                                                1633 {
1732         return 0;                                1634         return 0;
1733 }                                                1635 }
1734 static inline int pmd_free_pte_page(pmd_t *pm    1636 static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
1735 {                                                1637 {
1736         return 0;                                1638         return 0;
1737 }                                                1639 }
1738 #endif  /* CONFIG_HAVE_ARCH_HUGE_VMAP */         1640 #endif  /* CONFIG_HAVE_ARCH_HUGE_VMAP */
1739                                                  1641 
1740 #ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE          1642 #ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
1741 #ifdef CONFIG_TRANSPARENT_HUGEPAGE               1643 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1742 /*                                               1644 /*
1743  * ARCHes with special requirements for evict    1645  * ARCHes with special requirements for evicting THP backing TLB entries can
1744  * implement this. Otherwise also, it can hel    1646  * implement this. Otherwise also, it can help optimize normal TLB flush in
1745  * THP regime. Stock flush_tlb_range() typica    1647  * THP regime. Stock flush_tlb_range() typically has optimization to nuke the
1746  * entire TLB if flush span is greater than a    1648  * entire TLB if flush span is greater than a threshold, which will
1747  * likely be true for a single huge page. Thu    1649  * likely be true for a single huge page. Thus a single THP flush will
1748  * invalidate the entire TLB which is not des    1650  * invalidate the entire TLB which is not desirable.
1749  * e.g. see arch/arc: flush_pmd_tlb_range        1651  * e.g. see arch/arc: flush_pmd_tlb_range
1750  */                                              1652  */
1751 #define flush_pmd_tlb_range(vma, addr, end)      1653 #define flush_pmd_tlb_range(vma, addr, end)     flush_tlb_range(vma, addr, end)
1752 #define flush_pud_tlb_range(vma, addr, end)      1654 #define flush_pud_tlb_range(vma, addr, end)     flush_tlb_range(vma, addr, end)
1753 #else                                            1655 #else
1754 #define flush_pmd_tlb_range(vma, addr, end)      1656 #define flush_pmd_tlb_range(vma, addr, end)     BUILD_BUG()
1755 #define flush_pud_tlb_range(vma, addr, end)      1657 #define flush_pud_tlb_range(vma, addr, end)     BUILD_BUG()
1756 #endif                                           1658 #endif
1757 #endif                                           1659 #endif
1758                                                  1660 
1759 struct file;                                     1661 struct file;
1760 int phys_mem_access_prot_allowed(struct file     1662 int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
1761                         unsigned long size, p    1663                         unsigned long size, pgprot_t *vma_prot);
1762                                                  1664 
1763 #ifndef CONFIG_X86_ESPFIX64                      1665 #ifndef CONFIG_X86_ESPFIX64
1764 static inline void init_espfix_bsp(void) { }     1666 static inline void init_espfix_bsp(void) { }
1765 #endif                                           1667 #endif
1766                                                  1668 
1767 extern void __init pgtable_cache_init(void);     1669 extern void __init pgtable_cache_init(void);
1768                                                  1670 
1769 #ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED           1671 #ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
1770 static inline bool pfn_modify_allowed(unsigne    1672 static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
1771 {                                                1673 {
1772         return true;                             1674         return true;
1773 }                                                1675 }
1774                                                  1676 
1775 static inline bool arch_has_pfn_modify_check(    1677 static inline bool arch_has_pfn_modify_check(void)
1776 {                                                1678 {
1777         return false;                            1679         return false;
1778 }                                                1680 }
1779 #endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */      1681 #endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */
1780                                                  1682 
1781 /*                                               1683 /*
1782  * Architecture PAGE_KERNEL_* fallbacks          1684  * Architecture PAGE_KERNEL_* fallbacks
1783  *                                               1685  *
1784  * Some architectures don't define certain PA    1686  * Some architectures don't define certain PAGE_KERNEL_* flags. This is either
1785  * because they really don't support them, or    1687  * because they really don't support them, or the port needs to be updated to
1786  * reflect the required functionality. Below     1688  * reflect the required functionality. Below are a set of relatively safe
1787  * fallbacks, as best effort, which we can co    1689  * fallbacks, as best effort, which we can count on in lieu of the architectures
1788  * not defining them on their own yet.           1690  * not defining them on their own yet.
1789  */                                              1691  */
1790                                                  1692 
1791 #ifndef PAGE_KERNEL_RO                           1693 #ifndef PAGE_KERNEL_RO
1792 # define PAGE_KERNEL_RO PAGE_KERNEL              1694 # define PAGE_KERNEL_RO PAGE_KERNEL
1793 #endif                                           1695 #endif
1794                                                  1696 
1795 #ifndef PAGE_KERNEL_EXEC                         1697 #ifndef PAGE_KERNEL_EXEC
1796 # define PAGE_KERNEL_EXEC PAGE_KERNEL            1698 # define PAGE_KERNEL_EXEC PAGE_KERNEL
1797 #endif                                           1699 #endif
1798                                                  1700 
1799 /*                                               1701 /*
1800  * Page Table Modification bits for pgtbl_mod    1702  * Page Table Modification bits for pgtbl_mod_mask.
1801  *                                               1703  *
1802  * These are used by the p?d_alloc_track*() s    1704  * These are used by the p?d_alloc_track*() set of functions an in the generic
1803  * vmalloc/ioremap code to track at which pag    1705  * vmalloc/ioremap code to track at which page-table levels entries have been
1804  * modified. Based on that the code can bette    1706  * modified. Based on that the code can better decide when vmalloc and ioremap
1805  * mapping changes need to be synchronized to    1707  * mapping changes need to be synchronized to other page-tables in the system.
1806  */                                              1708  */
1807 #define         __PGTBL_PGD_MODIFIED    0        1709 #define         __PGTBL_PGD_MODIFIED    0
1808 #define         __PGTBL_P4D_MODIFIED    1        1710 #define         __PGTBL_P4D_MODIFIED    1
1809 #define         __PGTBL_PUD_MODIFIED    2        1711 #define         __PGTBL_PUD_MODIFIED    2
1810 #define         __PGTBL_PMD_MODIFIED    3        1712 #define         __PGTBL_PMD_MODIFIED    3
1811 #define         __PGTBL_PTE_MODIFIED    4        1713 #define         __PGTBL_PTE_MODIFIED    4
1812                                                  1714 
1813 #define         PGTBL_PGD_MODIFIED      BIT(_    1715 #define         PGTBL_PGD_MODIFIED      BIT(__PGTBL_PGD_MODIFIED)
1814 #define         PGTBL_P4D_MODIFIED      BIT(_    1716 #define         PGTBL_P4D_MODIFIED      BIT(__PGTBL_P4D_MODIFIED)
1815 #define         PGTBL_PUD_MODIFIED      BIT(_    1717 #define         PGTBL_PUD_MODIFIED      BIT(__PGTBL_PUD_MODIFIED)
1816 #define         PGTBL_PMD_MODIFIED      BIT(_    1718 #define         PGTBL_PMD_MODIFIED      BIT(__PGTBL_PMD_MODIFIED)
1817 #define         PGTBL_PTE_MODIFIED      BIT(_    1719 #define         PGTBL_PTE_MODIFIED      BIT(__PGTBL_PTE_MODIFIED)
1818                                                  1720 
1819 /* Page-Table Modification Mask */               1721 /* Page-Table Modification Mask */
1820 typedef unsigned int pgtbl_mod_mask;             1722 typedef unsigned int pgtbl_mod_mask;
1821                                                  1723 
1822 #endif /* !__ASSEMBLY__ */                       1724 #endif /* !__ASSEMBLY__ */
1823                                                  1725 
1824 #if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !d    1726 #if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT)
1825 #ifdef CONFIG_PHYS_ADDR_T_64BIT                  1727 #ifdef CONFIG_PHYS_ADDR_T_64BIT
1826 /*                                               1728 /*
1827  * ZSMALLOC needs to know the highest PFN on     1729  * ZSMALLOC needs to know the highest PFN on 32-bit architectures
1828  * with physical address space extension, but    1730  * with physical address space extension, but falls back to
1829  * BITS_PER_LONG otherwise.                      1731  * BITS_PER_LONG otherwise.
1830  */                                              1732  */
1831 #error Missing MAX_POSSIBLE_PHYSMEM_BITS defi    1733 #error Missing MAX_POSSIBLE_PHYSMEM_BITS definition
1832 #else                                            1734 #else
1833 #define MAX_POSSIBLE_PHYSMEM_BITS 32             1735 #define MAX_POSSIBLE_PHYSMEM_BITS 32
1834 #endif                                           1736 #endif
1835 #endif                                           1737 #endif
1836                                                  1738 
1837 #ifndef has_transparent_hugepage                 1739 #ifndef has_transparent_hugepage
1838 #define has_transparent_hugepage() IS_BUILTIN    1740 #define has_transparent_hugepage() IS_BUILTIN(CONFIG_TRANSPARENT_HUGEPAGE)
1839 #endif                                           1741 #endif
1840                                                  1742 
1841 #ifndef has_transparent_pud_hugepage             1743 #ifndef has_transparent_pud_hugepage
1842 #define has_transparent_pud_hugepage() IS_BUI    1744 #define has_transparent_pud_hugepage() IS_BUILTIN(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
1843 #endif                                           1745 #endif
1844 /*                                               1746 /*
1845  * On some architectures it depends on the mm    1747  * On some architectures it depends on the mm if the p4d/pud or pmd
1846  * layer of the page table hierarchy is folde    1748  * layer of the page table hierarchy is folded or not.
1847  */                                              1749  */
1848 #ifndef mm_p4d_folded                            1750 #ifndef mm_p4d_folded
1849 #define mm_p4d_folded(mm)       __is_defined(    1751 #define mm_p4d_folded(mm)       __is_defined(__PAGETABLE_P4D_FOLDED)
1850 #endif                                           1752 #endif
1851                                                  1753 
1852 #ifndef mm_pud_folded                            1754 #ifndef mm_pud_folded
1853 #define mm_pud_folded(mm)       __is_defined(    1755 #define mm_pud_folded(mm)       __is_defined(__PAGETABLE_PUD_FOLDED)
1854 #endif                                           1756 #endif
1855                                                  1757 
1856 #ifndef mm_pmd_folded                            1758 #ifndef mm_pmd_folded
1857 #define mm_pmd_folded(mm)       __is_defined(    1759 #define mm_pmd_folded(mm)       __is_defined(__PAGETABLE_PMD_FOLDED)
1858 #endif                                           1760 #endif
1859                                                  1761 
1860 #ifndef p4d_offset_lockless                      1762 #ifndef p4d_offset_lockless
1861 #define p4d_offset_lockless(pgdp, pgd, addres    1763 #define p4d_offset_lockless(pgdp, pgd, address) p4d_offset(&(pgd), address)
1862 #endif                                           1764 #endif
1863 #ifndef pud_offset_lockless                      1765 #ifndef pud_offset_lockless
1864 #define pud_offset_lockless(p4dp, p4d, addres    1766 #define pud_offset_lockless(p4dp, p4d, address) pud_offset(&(p4d), address)
1865 #endif                                           1767 #endif
1866 #ifndef pmd_offset_lockless                      1768 #ifndef pmd_offset_lockless
1867 #define pmd_offset_lockless(pudp, pud, addres    1769 #define pmd_offset_lockless(pudp, pud, address) pmd_offset(&(pud), address)
1868 #endif                                           1770 #endif
1869                                                  1771 
1870 /*                                               1772 /*
1871  * pXd_leaf() is the API to check whether a p !! 1773  * p?d_leaf() - true if this entry is a final mapping to a physical address.
1872  * mapping.  It should work globally across a !! 1774  * This differs from p?d_huge() by the fact that they are always available (if
1873  * dependency on CONFIG_* options.  For archi !! 1775  * the architecture supports large pages at the appropriate level) even
1874  * huge mappings on specific levels, below fa !! 1776  * if CONFIG_HUGETLB_PAGE is not defined.
1875  *                                            !! 1777  * Only meaningful when called on a valid entry.
1876  * A leaf pgtable entry should always imply t << 
1877  *                                            << 
1878  * - It is a "present" entry.  IOW, before us << 
1879  *   with pXd_present() first. NOTE: it may n << 
1880  *   bit" is set.  For example, PROT_NONE ent << 
1881  *                                            << 
1882  * - It should _never_ be a swap entry of any << 
1883  *   should have guarded this, but let's be c << 
1884  *                                            << 
1885  * - It should contain a huge PFN, which poin << 
1886  *   PAGE_SIZE of the platform.  The PFN form << 
1887  *                                            << 
1888  * - It should cover all kinds of huge mappin << 
1889  *   pXd_devmap(), or hugetlb mappings).      << 
1890  */                                              1778  */
1891 #ifndef pgd_leaf                                 1779 #ifndef pgd_leaf
1892 #define pgd_leaf(x)     false                    1780 #define pgd_leaf(x)     false
1893 #endif                                           1781 #endif
1894 #ifndef p4d_leaf                                 1782 #ifndef p4d_leaf
1895 #define p4d_leaf(x)     false                    1783 #define p4d_leaf(x)     false
1896 #endif                                           1784 #endif
1897 #ifndef pud_leaf                                 1785 #ifndef pud_leaf
1898 #define pud_leaf(x)     false                    1786 #define pud_leaf(x)     false
1899 #endif                                           1787 #endif
1900 #ifndef pmd_leaf                                 1788 #ifndef pmd_leaf
1901 #define pmd_leaf(x)     false                    1789 #define pmd_leaf(x)     false
1902 #endif                                           1790 #endif
1903                                                  1791 
1904 #ifndef pgd_leaf_size                            1792 #ifndef pgd_leaf_size
1905 #define pgd_leaf_size(x) (1ULL << PGDIR_SHIFT    1793 #define pgd_leaf_size(x) (1ULL << PGDIR_SHIFT)
1906 #endif                                           1794 #endif
1907 #ifndef p4d_leaf_size                            1795 #ifndef p4d_leaf_size
1908 #define p4d_leaf_size(x) P4D_SIZE                1796 #define p4d_leaf_size(x) P4D_SIZE
1909 #endif                                           1797 #endif
1910 #ifndef pud_leaf_size                            1798 #ifndef pud_leaf_size
1911 #define pud_leaf_size(x) PUD_SIZE                1799 #define pud_leaf_size(x) PUD_SIZE
1912 #endif                                           1800 #endif
1913 #ifndef pmd_leaf_size                            1801 #ifndef pmd_leaf_size
1914 #define pmd_leaf_size(x) PMD_SIZE                1802 #define pmd_leaf_size(x) PMD_SIZE
1915 #endif                                           1803 #endif
1916 #ifndef __pte_leaf_size                       << 
1917 #ifndef pte_leaf_size                            1804 #ifndef pte_leaf_size
1918 #define pte_leaf_size(x) PAGE_SIZE               1805 #define pte_leaf_size(x) PAGE_SIZE
1919 #endif                                           1806 #endif
1920 #define __pte_leaf_size(x,y) pte_leaf_size(y) << 
1921 #endif                                        << 
1922                                               << 
1923 /*                                            << 
1924  * We always define pmd_pfn for all archs as  << 
1925  * code.  Now it happens too for pud_pfn (and << 
1926  * mappings too in the future; we're not ther << 
1927  * it for all archs (like pmd_pfn), provide a << 
1928  *                                            << 
1929  * Note that returning 0 here means any arch  << 
1930  * get severely wrong when it hits a real pud << 
1931  * responsibility to properly define it when  << 
1932  */                                           << 
1933 #ifndef pud_pfn                               << 
1934 #define pud_pfn(x) 0                          << 
1935 #endif                                        << 
1936                                                  1807 
1937 /*                                               1808 /*
1938  * Some architectures have MMUs that are conf    1809  * Some architectures have MMUs that are configurable or selectable at boot
1939  * time. These lead to variable PTRS_PER_x. F    1810  * time. These lead to variable PTRS_PER_x. For statically allocated arrays it
1940  * helps to have a static maximum value.         1811  * helps to have a static maximum value.
1941  */                                              1812  */
1942                                                  1813 
1943 #ifndef MAX_PTRS_PER_PTE                         1814 #ifndef MAX_PTRS_PER_PTE
1944 #define MAX_PTRS_PER_PTE PTRS_PER_PTE            1815 #define MAX_PTRS_PER_PTE PTRS_PER_PTE
1945 #endif                                           1816 #endif
1946                                                  1817 
1947 #ifndef MAX_PTRS_PER_PMD                         1818 #ifndef MAX_PTRS_PER_PMD
1948 #define MAX_PTRS_PER_PMD PTRS_PER_PMD            1819 #define MAX_PTRS_PER_PMD PTRS_PER_PMD
1949 #endif                                           1820 #endif
1950                                                  1821 
1951 #ifndef MAX_PTRS_PER_PUD                         1822 #ifndef MAX_PTRS_PER_PUD
1952 #define MAX_PTRS_PER_PUD PTRS_PER_PUD            1823 #define MAX_PTRS_PER_PUD PTRS_PER_PUD
1953 #endif                                           1824 #endif
1954                                                  1825 
1955 #ifndef MAX_PTRS_PER_P4D                         1826 #ifndef MAX_PTRS_PER_P4D
1956 #define MAX_PTRS_PER_P4D PTRS_PER_P4D            1827 #define MAX_PTRS_PER_P4D PTRS_PER_P4D
1957 #endif                                        << 
1958                                               << 
1959 #ifndef pte_pgprot                            << 
1960 #define pte_pgprot(x) ((pgprot_t) {0})        << 
1961 #endif                                        << 
1962                                               << 
1963 #ifndef pmd_pgprot                            << 
1964 #define pmd_pgprot(x) ((pgprot_t) {0})        << 
1965 #endif                                        << 
1966                                               << 
1967 #ifndef pud_pgprot                            << 
1968 #define pud_pgprot(x) ((pgprot_t) {0})        << 
1969 #endif                                           1828 #endif
1970                                                  1829 
1971 /* description of effects of mapping type and    1830 /* description of effects of mapping type and prot in current implementation.
1972  * this is due to the limited x86 page protec    1831  * this is due to the limited x86 page protection hardware.  The expected
1973  * behavior is in parens:                        1832  * behavior is in parens:
1974  *                                               1833  *
1975  * map_type     prot                             1834  * map_type     prot
1976  *              PROT_NONE       PROT_READ        1835  *              PROT_NONE       PROT_READ       PROT_WRITE      PROT_EXEC
1977  * MAP_SHARED   r: (no) no      r: (yes) yes     1836  * MAP_SHARED   r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
1978  *              w: (no) no      w: (no) no       1837  *              w: (no) no      w: (no) no      w: (yes) yes    w: (no) no
1979  *              x: (no) no      x: (no) yes      1838  *              x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
1980  *                                               1839  *
1981  * MAP_PRIVATE  r: (no) no      r: (yes) yes     1840  * MAP_PRIVATE  r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
1982  *              w: (no) no      w: (no) no       1841  *              w: (no) no      w: (no) no      w: (copy) copy  w: (no) no
1983  *              x: (no) no      x: (no) yes      1842  *              x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
1984  *                                               1843  *
1985  * On arm64, PROT_EXEC has the following beha    1844  * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and
1986  * MAP_PRIVATE (with Enhanced PAN supported):    1845  * MAP_PRIVATE (with Enhanced PAN supported):
1987  *                                               1846  *                                                              r: (no) no
1988  *                                               1847  *                                                              w: (no) no
1989  *                                               1848  *                                                              x: (yes) yes
1990  */                                              1849  */
1991 #define DECLARE_VM_GET_PAGE_PROT                 1850 #define DECLARE_VM_GET_PAGE_PROT                                        \
1992 pgprot_t vm_get_page_prot(unsigned long vm_fl    1851 pgprot_t vm_get_page_prot(unsigned long vm_flags)                       \
1993 {                                                1852 {                                                                       \
1994                 return protection_map[vm_flag    1853                 return protection_map[vm_flags &                        \
1995                         (VM_READ | VM_WRITE |    1854                         (VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)];    \
1996 }                                                1855 }                                                                       \
1997 EXPORT_SYMBOL(vm_get_page_prot);                 1856 EXPORT_SYMBOL(vm_get_page_prot);
1998                                                  1857 
1999 #endif /* _LINUX_PGTABLE_H */                    1858 #endif /* _LINUX_PGTABLE_H */
2000                                                  1859 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php