~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/mm/page_isolation.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /mm/page_isolation.c (Architecture i386) and /mm/page_isolation.c (Architecture m68k)


  1 // SPDX-License-Identifier: GPL-2.0                 1 // SPDX-License-Identifier: GPL-2.0
  2 /*                                                  2 /*
  3  * linux/mm/page_isolation.c                        3  * linux/mm/page_isolation.c
  4  */                                                 4  */
  5                                                     5 
  6 #include <linux/mm.h>                               6 #include <linux/mm.h>
  7 #include <linux/page-isolation.h>                   7 #include <linux/page-isolation.h>
  8 #include <linux/pageblock-flags.h>                  8 #include <linux/pageblock-flags.h>
  9 #include <linux/memory.h>                           9 #include <linux/memory.h>
 10 #include <linux/hugetlb.h>                         10 #include <linux/hugetlb.h>
 11 #include <linux/page_owner.h>                      11 #include <linux/page_owner.h>
 12 #include <linux/migrate.h>                         12 #include <linux/migrate.h>
 13 #include "internal.h"                              13 #include "internal.h"
 14                                                    14 
 15 #define CREATE_TRACE_POINTS                        15 #define CREATE_TRACE_POINTS
 16 #include <trace/events/page_isolation.h>           16 #include <trace/events/page_isolation.h>
 17                                                    17 
 18 /*                                                 18 /*
 19  * This function checks whether the range [sta     19  * This function checks whether the range [start_pfn, end_pfn) includes
 20  * unmovable pages or not. The range must fall     20  * unmovable pages or not. The range must fall into a single pageblock and
 21  * consequently belong to a single zone.           21  * consequently belong to a single zone.
 22  *                                                 22  *
 23  * PageLRU check without isolation or lru_lock     23  * PageLRU check without isolation or lru_lock could race so that
 24  * MIGRATE_MOVABLE block might include unmovab     24  * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable
 25  * check without lock_page also may miss some      25  * check without lock_page also may miss some movable non-lru pages at
 26  * race condition. So you can't expect this fu     26  * race condition. So you can't expect this function should be exact.
 27  *                                                 27  *
 28  * Returns a page without holding a reference.     28  * Returns a page without holding a reference. If the caller wants to
 29  * dereference that page (e.g., dumping), it h     29  * dereference that page (e.g., dumping), it has to make sure that it
 30  * cannot get removed (e.g., via memory unplug     30  * cannot get removed (e.g., via memory unplug) concurrently.
 31  *                                                 31  *
 32  */                                                32  */
 33 static struct page *has_unmovable_pages(unsign     33 static struct page *has_unmovable_pages(unsigned long start_pfn, unsigned long end_pfn,
 34                                 int migratetyp     34                                 int migratetype, int flags)
 35 {                                                  35 {
 36         struct page *page = pfn_to_page(start_     36         struct page *page = pfn_to_page(start_pfn);
 37         struct zone *zone = page_zone(page);       37         struct zone *zone = page_zone(page);
 38         unsigned long pfn;                         38         unsigned long pfn;
 39                                                    39 
 40         VM_BUG_ON(pageblock_start_pfn(start_pf     40         VM_BUG_ON(pageblock_start_pfn(start_pfn) !=
 41                   pageblock_start_pfn(end_pfn      41                   pageblock_start_pfn(end_pfn - 1));
 42                                                    42 
 43         if (is_migrate_cma_page(page)) {           43         if (is_migrate_cma_page(page)) {
 44                 /*                                 44                 /*
 45                  * CMA allocations (alloc_cont     45                  * CMA allocations (alloc_contig_range) really need to mark
 46                  * isolate CMA pageblocks even     46                  * isolate CMA pageblocks even when they are not movable in fact
 47                  * so consider them movable he     47                  * so consider them movable here.
 48                  */                                48                  */
 49                 if (is_migrate_cma(migratetype     49                 if (is_migrate_cma(migratetype))
 50                         return NULL;               50                         return NULL;
 51                                                    51 
 52                 return page;                       52                 return page;
 53         }                                          53         }
 54                                                    54 
 55         for (pfn = start_pfn; pfn < end_pfn; p     55         for (pfn = start_pfn; pfn < end_pfn; pfn++) {
 56                 page = pfn_to_page(pfn);           56                 page = pfn_to_page(pfn);
 57                                                    57 
 58                 /*                                 58                 /*
 59                  * Both, bootmem allocations a     59                  * Both, bootmem allocations and memory holes are marked
 60                  * PG_reserved and are unmovab     60                  * PG_reserved and are unmovable. We can even have unmovable
 61                  * allocations inside ZONE_MOV     61                  * allocations inside ZONE_MOVABLE, for example when
 62                  * specifying "movablecore".       62                  * specifying "movablecore".
 63                  */                                63                  */
 64                 if (PageReserved(page))            64                 if (PageReserved(page))
 65                         return page;               65                         return page;
 66                                                    66 
 67                 /*                                 67                 /*
 68                  * If the zone is movable and      68                  * If the zone is movable and we have ruled out all reserved
 69                  * pages then it should be rea     69                  * pages then it should be reasonably safe to assume the rest
 70                  * is movable.                     70                  * is movable.
 71                  */                                71                  */
 72                 if (zone_idx(zone) == ZONE_MOV     72                 if (zone_idx(zone) == ZONE_MOVABLE)
 73                         continue;                  73                         continue;
 74                                                    74 
 75                 /*                                 75                 /*
 76                  * Hugepages are not in LRU li     76                  * Hugepages are not in LRU lists, but they're movable.
 77                  * THPs are on the LRU, but ne     77                  * THPs are on the LRU, but need to be counted as #small pages.
 78                  * We need not scan over tail      78                  * We need not scan over tail pages because we don't
 79                  * handle each tail page indiv     79                  * handle each tail page individually in migration.
 80                  */                                80                  */
 81                 if (PageHuge(page) || PageTran     81                 if (PageHuge(page) || PageTransCompound(page)) {
 82                         struct folio *folio =      82                         struct folio *folio = page_folio(page);
 83                         unsigned int skip_page     83                         unsigned int skip_pages;
 84                                                    84 
 85                         if (PageHuge(page)) {      85                         if (PageHuge(page)) {
 86                                 if (!hugepage_     86                                 if (!hugepage_migration_supported(folio_hstate(folio)))
 87                                         return     87                                         return page;
 88                         } else if (!folio_test     88                         } else if (!folio_test_lru(folio) && !__folio_test_movable(folio)) {
 89                                 return page;       89                                 return page;
 90                         }                          90                         }
 91                                                    91 
 92                         skip_pages = folio_nr_     92                         skip_pages = folio_nr_pages(folio) - folio_page_idx(folio, page);
 93                         pfn += skip_pages - 1;     93                         pfn += skip_pages - 1;
 94                         continue;                  94                         continue;
 95                 }                                  95                 }
 96                                                    96 
 97                 /*                                 97                 /*
 98                  * We can't use page_count wit     98                  * We can't use page_count without pin a page
 99                  * because another CPU can fre     99                  * because another CPU can free compound page.
100                  * This check already skips co    100                  * This check already skips compound tails of THP
101                  * because their page->_refcou    101                  * because their page->_refcount is zero at all time.
102                  */                               102                  */
103                 if (!page_ref_count(page)) {      103                 if (!page_ref_count(page)) {
104                         if (PageBuddy(page))      104                         if (PageBuddy(page))
105                                 pfn += (1 << b    105                                 pfn += (1 << buddy_order(page)) - 1;
106                         continue;                 106                         continue;
107                 }                                 107                 }
108                                                   108 
109                 /*                                109                 /*
110                  * The HWPoisoned page may be     110                  * The HWPoisoned page may be not in buddy system, and
111                  * page_count() is not 0.         111                  * page_count() is not 0.
112                  */                               112                  */
113                 if ((flags & MEMORY_OFFLINE) &    113                 if ((flags & MEMORY_OFFLINE) && PageHWPoison(page))
114                         continue;                 114                         continue;
115                                                   115 
116                 /*                                116                 /*
117                  * We treat all PageOffline()     117                  * We treat all PageOffline() pages as movable when offlining
118                  * to give drivers a chance to    118                  * to give drivers a chance to decrement their reference count
119                  * in MEM_GOING_OFFLINE in ord    119                  * in MEM_GOING_OFFLINE in order to indicate that these pages
120                  * can be offlined as there ar    120                  * can be offlined as there are no direct references anymore.
121                  * For actually unmovable Page    121                  * For actually unmovable PageOffline() where the driver does
122                  * not support this, we will f    122                  * not support this, we will fail later when trying to actually
123                  * move these pages that still    123                  * move these pages that still have a reference count > 0.
124                  * (false negatives in this fu    124                  * (false negatives in this function only)
125                  */                               125                  */
126                 if ((flags & MEMORY_OFFLINE) &    126                 if ((flags & MEMORY_OFFLINE) && PageOffline(page))
127                         continue;                 127                         continue;
128                                                   128 
129                 if (__PageMovable(page) || Pag    129                 if (__PageMovable(page) || PageLRU(page))
130                         continue;                 130                         continue;
131                                                   131 
132                 /*                                132                 /*
133                  * If there are RECLAIMABLE pa    133                  * If there are RECLAIMABLE pages, we need to check
134                  * it.  But now, memory offlin    134                  * it.  But now, memory offline itself doesn't call
135                  * shrink_node_slabs() and it     135                  * shrink_node_slabs() and it still to be fixed.
136                  */                               136                  */
137                 return page;                      137                 return page;
138         }                                         138         }
139         return NULL;                              139         return NULL;
140 }                                                 140 }
141                                                   141 
142 /*                                                142 /*
143  * This function set pageblock migratetype to     143  * This function set pageblock migratetype to isolate if no unmovable page is
144  * present in [start_pfn, end_pfn). The pagebl    144  * present in [start_pfn, end_pfn). The pageblock must intersect with
145  * [start_pfn, end_pfn).                          145  * [start_pfn, end_pfn).
146  */                                               146  */
147 static int set_migratetype_isolate(struct page    147 static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags,
148                         unsigned long start_pf    148                         unsigned long start_pfn, unsigned long end_pfn)
149 {                                                 149 {
150         struct zone *zone = page_zone(page);      150         struct zone *zone = page_zone(page);
151         struct page *unmovable;                   151         struct page *unmovable;
152         unsigned long flags;                      152         unsigned long flags;
153         unsigned long check_unmovable_start, c    153         unsigned long check_unmovable_start, check_unmovable_end;
154                                                   154 
155         if (PageUnaccepted(page))                 155         if (PageUnaccepted(page))
156                 accept_page(page);                156                 accept_page(page);
157                                                   157 
158         spin_lock_irqsave(&zone->lock, flags);    158         spin_lock_irqsave(&zone->lock, flags);
159                                                   159 
160         /*                                        160         /*
161          * We assume the caller intended to SE    161          * We assume the caller intended to SET migrate type to isolate.
162          * If it is already set, then someone     162          * If it is already set, then someone else must have raced and
163          * set it before us.                      163          * set it before us.
164          */                                       164          */
165         if (is_migrate_isolate_page(page)) {      165         if (is_migrate_isolate_page(page)) {
166                 spin_unlock_irqrestore(&zone->    166                 spin_unlock_irqrestore(&zone->lock, flags);
167                 return -EBUSY;                    167                 return -EBUSY;
168         }                                         168         }
169                                                   169 
170         /*                                        170         /*
171          * FIXME: Now, memory hotplug doesn't     171          * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
172          * We just check MOVABLE pages.           172          * We just check MOVABLE pages.
173          *                                        173          *
174          * Pass the intersection of [start_pfn    174          * Pass the intersection of [start_pfn, end_pfn) and the page's pageblock
175          * to avoid redundant checks.             175          * to avoid redundant checks.
176          */                                       176          */
177         check_unmovable_start = max(page_to_pf    177         check_unmovable_start = max(page_to_pfn(page), start_pfn);
178         check_unmovable_end = min(pageblock_en    178         check_unmovable_end = min(pageblock_end_pfn(page_to_pfn(page)),
179                                   end_pfn);       179                                   end_pfn);
180                                                   180 
181         unmovable = has_unmovable_pages(check_    181         unmovable = has_unmovable_pages(check_unmovable_start, check_unmovable_end,
182                         migratetype, isol_flag    182                         migratetype, isol_flags);
183         if (!unmovable) {                         183         if (!unmovable) {
184                 if (!move_freepages_block_isol    184                 if (!move_freepages_block_isolate(zone, page, MIGRATE_ISOLATE)) {
185                         spin_unlock_irqrestore    185                         spin_unlock_irqrestore(&zone->lock, flags);
186                         return -EBUSY;            186                         return -EBUSY;
187                 }                                 187                 }
188                 zone->nr_isolate_pageblock++;     188                 zone->nr_isolate_pageblock++;
189                 spin_unlock_irqrestore(&zone->    189                 spin_unlock_irqrestore(&zone->lock, flags);
190                 return 0;                         190                 return 0;
191         }                                         191         }
192                                                   192 
193         spin_unlock_irqrestore(&zone->lock, fl    193         spin_unlock_irqrestore(&zone->lock, flags);
194         if (isol_flags & REPORT_FAILURE) {        194         if (isol_flags & REPORT_FAILURE) {
195                 /*                                195                 /*
196                  * printk() with zone->lock he    196                  * printk() with zone->lock held will likely trigger a
197                  * lockdep splat, so defer it     197                  * lockdep splat, so defer it here.
198                  */                               198                  */
199                 dump_page(unmovable, "unmovabl    199                 dump_page(unmovable, "unmovable page");
200         }                                         200         }
201                                                   201 
202         return -EBUSY;                            202         return -EBUSY;
203 }                                                 203 }
204                                                   204 
205 static void unset_migratetype_isolate(struct p    205 static void unset_migratetype_isolate(struct page *page, int migratetype)
206 {                                                 206 {
207         struct zone *zone;                        207         struct zone *zone;
208         unsigned long flags;                      208         unsigned long flags;
209         bool isolated_page = false;               209         bool isolated_page = false;
210         unsigned int order;                       210         unsigned int order;
211         struct page *buddy;                       211         struct page *buddy;
212                                                   212 
213         zone = page_zone(page);                   213         zone = page_zone(page);
214         spin_lock_irqsave(&zone->lock, flags);    214         spin_lock_irqsave(&zone->lock, flags);
215         if (!is_migrate_isolate_page(page))       215         if (!is_migrate_isolate_page(page))
216                 goto out;                         216                 goto out;
217                                                   217 
218         /*                                        218         /*
219          * Because freepage with more than pag    219          * Because freepage with more than pageblock_order on isolated
220          * pageblock is restricted to merge du    220          * pageblock is restricted to merge due to freepage counting problem,
221          * it is possible that there is free b    221          * it is possible that there is free buddy page.
222          * move_freepages_block() doesn't care    222          * move_freepages_block() doesn't care of merge so we need other
223          * approach in order to merge them. Is    223          * approach in order to merge them. Isolation and free will make
224          * these pages to be merged.              224          * these pages to be merged.
225          */                                       225          */
226         if (PageBuddy(page)) {                    226         if (PageBuddy(page)) {
227                 order = buddy_order(page);        227                 order = buddy_order(page);
228                 if (order >= pageblock_order &    228                 if (order >= pageblock_order && order < MAX_PAGE_ORDER) {
229                         buddy = find_buddy_pag    229                         buddy = find_buddy_page_pfn(page, page_to_pfn(page),
230                                                   230                                                     order, NULL);
231                         if (buddy && !is_migra    231                         if (buddy && !is_migrate_isolate_page(buddy)) {
232                                 isolated_page     232                                 isolated_page = !!__isolate_free_page(page, order);
233                                 /*                233                                 /*
234                                  * Isolating a    234                                  * Isolating a free page in an isolated pageblock
235                                  * is expected    235                                  * is expected to always work as watermarks don't
236                                  * apply here.    236                                  * apply here.
237                                  */               237                                  */
238                                 VM_WARN_ON(!is    238                                 VM_WARN_ON(!isolated_page);
239                         }                         239                         }
240                 }                                 240                 }
241         }                                         241         }
242                                                   242 
243         /*                                        243         /*
244          * If we isolate freepage with more th    244          * If we isolate freepage with more than pageblock_order, there
245          * should be no freepage in the range,    245          * should be no freepage in the range, so we could avoid costly
246          * pageblock scanning for freepage mov    246          * pageblock scanning for freepage moving.
247          *                                        247          *
248          * We didn't actually touch any of the    248          * We didn't actually touch any of the isolated pages, so place them
249          * to the tail of the freelist. This i    249          * to the tail of the freelist. This is an optimization for memory
250          * onlining - just onlined memory won'    250          * onlining - just onlined memory won't immediately be considered for
251          * allocation.                            251          * allocation.
252          */                                       252          */
253         if (!isolated_page) {                     253         if (!isolated_page) {
254                 /*                                254                 /*
255                  * Isolating this block alread    255                  * Isolating this block already succeeded, so this
256                  * should not fail on zone bou    256                  * should not fail on zone boundaries.
257                  */                               257                  */
258                 WARN_ON_ONCE(!move_freepages_b    258                 WARN_ON_ONCE(!move_freepages_block_isolate(zone, page, migratetype));
259         } else {                                  259         } else {
260                 set_pageblock_migratetype(page    260                 set_pageblock_migratetype(page, migratetype);
261                 __putback_isolated_page(page,     261                 __putback_isolated_page(page, order, migratetype);
262         }                                         262         }
263         zone->nr_isolate_pageblock--;             263         zone->nr_isolate_pageblock--;
264 out:                                              264 out:
265         spin_unlock_irqrestore(&zone->lock, fl    265         spin_unlock_irqrestore(&zone->lock, flags);
266 }                                                 266 }
267                                                   267 
268 static inline struct page *                       268 static inline struct page *
269 __first_valid_page(unsigned long pfn, unsigned    269 __first_valid_page(unsigned long pfn, unsigned long nr_pages)
270 {                                                 270 {
271         int i;                                    271         int i;
272                                                   272 
273         for (i = 0; i < nr_pages; i++) {          273         for (i = 0; i < nr_pages; i++) {
274                 struct page *page;                274                 struct page *page;
275                                                   275 
276                 page = pfn_to_online_page(pfn     276                 page = pfn_to_online_page(pfn + i);
277                 if (!page)                        277                 if (!page)
278                         continue;                 278                         continue;
279                 return page;                      279                 return page;
280         }                                         280         }
281         return NULL;                              281         return NULL;
282 }                                                 282 }
283                                                   283 
284 /**                                               284 /**
285  * isolate_single_pageblock() -- tries to isol    285  * isolate_single_pageblock() -- tries to isolate a pageblock that might be
286  * within a free or in-use page.                  286  * within a free or in-use page.
287  * @boundary_pfn:               pageblock-alig    287  * @boundary_pfn:               pageblock-aligned pfn that a page might cross
288  * @flags:                      isolation flag    288  * @flags:                      isolation flags
289  * @gfp_flags:                  GFP flags used    289  * @gfp_flags:                  GFP flags used for migrating pages
290  * @isolate_before:     isolate the pageblock     290  * @isolate_before:     isolate the pageblock before the boundary_pfn
291  * @skip_isolation:     the flag to skip the p    291  * @skip_isolation:     the flag to skip the pageblock isolation in second
292  *                      isolate_single_pageblo    292  *                      isolate_single_pageblock()
293  * @migratetype:        migrate type to set in    293  * @migratetype:        migrate type to set in error recovery.
294  *                                                294  *
295  * Free and in-use pages can be as big as MAX_    295  * Free and in-use pages can be as big as MAX_PAGE_ORDER and contain more than one
296  * pageblock. When not all pageblocks within a    296  * pageblock. When not all pageblocks within a page are isolated at the same
297  * time, free page accounting can go wrong. Fo    297  * time, free page accounting can go wrong. For example, in the case of
298  * MAX_PAGE_ORDER = pageblock_order + 1, a MAX    298  * MAX_PAGE_ORDER = pageblock_order + 1, a MAX_PAGE_ORDER page has two
299  * pagelbocks.                                    299  * pagelbocks.
300  * [      MAX_PAGE_ORDER         ]                300  * [      MAX_PAGE_ORDER         ]
301  * [  pageblock0  |  pageblock1  ]                301  * [  pageblock0  |  pageblock1  ]
302  * When either pageblock is isolated, if it is    302  * When either pageblock is isolated, if it is a free page, the page is not
303  * split into separate migratetype lists, whic    303  * split into separate migratetype lists, which is supposed to; if it is an
304  * in-use page and freed later, __free_one_pag    304  * in-use page and freed later, __free_one_page() does not split the free page
305  * either. The function handles this by splitt    305  * either. The function handles this by splitting the free page or migrating
306  * the in-use page then splitting the free pag    306  * the in-use page then splitting the free page.
307  */                                               307  */
308 static int isolate_single_pageblock(unsigned l    308 static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
309                         gfp_t gfp_flags, bool     309                         gfp_t gfp_flags, bool isolate_before, bool skip_isolation,
310                         int migratetype)          310                         int migratetype)
311 {                                                 311 {
312         unsigned long start_pfn;                  312         unsigned long start_pfn;
313         unsigned long isolate_pageblock;          313         unsigned long isolate_pageblock;
314         unsigned long pfn;                        314         unsigned long pfn;
315         struct zone *zone;                        315         struct zone *zone;
316         int ret;                                  316         int ret;
317                                                   317 
318         VM_BUG_ON(!pageblock_aligned(boundary_    318         VM_BUG_ON(!pageblock_aligned(boundary_pfn));
319                                                   319 
320         if (isolate_before)                       320         if (isolate_before)
321                 isolate_pageblock = boundary_p    321                 isolate_pageblock = boundary_pfn - pageblock_nr_pages;
322         else                                      322         else
323                 isolate_pageblock = boundary_p    323                 isolate_pageblock = boundary_pfn;
324                                                   324 
325         /*                                        325         /*
326          * scan at the beginning of MAX_ORDER_    326          * scan at the beginning of MAX_ORDER_NR_PAGES aligned range to avoid
327          * only isolating a subset of pagebloc    327          * only isolating a subset of pageblocks from a bigger than pageblock
328          * free or in-use page. Also make sure    328          * free or in-use page. Also make sure all to-be-isolated pageblocks
329          * are within the same zone.              329          * are within the same zone.
330          */                                       330          */
331         zone  = page_zone(pfn_to_page(isolate_    331         zone  = page_zone(pfn_to_page(isolate_pageblock));
332         start_pfn  = max(ALIGN_DOWN(isolate_pa    332         start_pfn  = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES),
333                                       zone->zo    333                                       zone->zone_start_pfn);
334                                                   334 
335         if (skip_isolation) {                     335         if (skip_isolation) {
336                 int mt __maybe_unused = get_pa    336                 int mt __maybe_unused = get_pageblock_migratetype(pfn_to_page(isolate_pageblock));
337                                                   337 
338                 VM_BUG_ON(!is_migrate_isolate(    338                 VM_BUG_ON(!is_migrate_isolate(mt));
339         } else {                                  339         } else {
340                 ret = set_migratetype_isolate(    340                 ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype,
341                                 flags, isolate    341                                 flags, isolate_pageblock, isolate_pageblock + pageblock_nr_pages);
342                                                   342 
343                 if (ret)                          343                 if (ret)
344                         return ret;               344                         return ret;
345         }                                         345         }
346                                                   346 
347         /*                                        347         /*
348          * Bail out early when the to-be-isola    348          * Bail out early when the to-be-isolated pageblock does not form
349          * a free or in-use page across bounda    349          * a free or in-use page across boundary_pfn:
350          *                                        350          *
351          * 1. isolate before boundary_pfn: the    351          * 1. isolate before boundary_pfn: the page after is not online
352          * 2. isolate after boundary_pfn: the     352          * 2. isolate after boundary_pfn: the page before is not online
353          *                                        353          *
354          * This also ensures correctness. With    354          * This also ensures correctness. Without it, when isolate after
355          * boundary_pfn and [start_pfn, bounda    355          * boundary_pfn and [start_pfn, boundary_pfn) are not online,
356          * __first_valid_page() will return un    356          * __first_valid_page() will return unexpected NULL in the for loop
357          * below.                                 357          * below.
358          */                                       358          */
359         if (isolate_before) {                     359         if (isolate_before) {
360                 if (!pfn_to_online_page(bounda    360                 if (!pfn_to_online_page(boundary_pfn))
361                         return 0;                 361                         return 0;
362         } else {                                  362         } else {
363                 if (!pfn_to_online_page(bounda    363                 if (!pfn_to_online_page(boundary_pfn - 1))
364                         return 0;                 364                         return 0;
365         }                                         365         }
366                                                   366 
367         for (pfn = start_pfn; pfn < boundary_p    367         for (pfn = start_pfn; pfn < boundary_pfn;) {
368                 struct page *page = __first_va    368                 struct page *page = __first_valid_page(pfn, boundary_pfn - pfn);
369                                                   369 
370                 VM_BUG_ON(!page);                 370                 VM_BUG_ON(!page);
371                 pfn = page_to_pfn(page);          371                 pfn = page_to_pfn(page);
372                                                   372 
373                 if (PageUnaccepted(page)) {       373                 if (PageUnaccepted(page)) {
374                         pfn += MAX_ORDER_NR_PA    374                         pfn += MAX_ORDER_NR_PAGES;
375                         continue;                 375                         continue;
376                 }                                 376                 }
377                                                   377 
378                 if (PageBuddy(page)) {            378                 if (PageBuddy(page)) {
379                         int order = buddy_orde    379                         int order = buddy_order(page);
380                                                   380 
381                         /* move_freepages_bloc    381                         /* move_freepages_block_isolate() handled this */
382                         VM_WARN_ON_ONCE(pfn +     382                         VM_WARN_ON_ONCE(pfn + (1 << order) > boundary_pfn);
383                                                   383 
384                         pfn += 1UL << order;      384                         pfn += 1UL << order;
385                         continue;                 385                         continue;
386                 }                                 386                 }
387                                                   387 
388                 /*                                388                 /*
389                  * If a compound page is strad    389                  * If a compound page is straddling our block, attempt
390                  * to migrate it out of the wa    390                  * to migrate it out of the way.
391                  *                                391                  *
392                  * We don't have to worry abou    392                  * We don't have to worry about this creating a large
393                  * free page that straddles in    393                  * free page that straddles into our block: gigantic
394                  * pages are freed as order-0     394                  * pages are freed as order-0 chunks, and LRU pages
395                  * (currently) do not exceed p    395                  * (currently) do not exceed pageblock_order.
396                  *                                396                  *
397                  * The block of interest has a    397                  * The block of interest has already been marked
398                  * MIGRATE_ISOLATE above, so w    398                  * MIGRATE_ISOLATE above, so when migration is done it
399                  * will free its pages onto th    399                  * will free its pages onto the correct freelists.
400                  */                               400                  */
401                 if (PageCompound(page)) {         401                 if (PageCompound(page)) {
402                         struct page *head = co    402                         struct page *head = compound_head(page);
403                         unsigned long head_pfn    403                         unsigned long head_pfn = page_to_pfn(head);
404                         unsigned long nr_pages    404                         unsigned long nr_pages = compound_nr(head);
405                                                   405 
406                         if (head_pfn + nr_page    406                         if (head_pfn + nr_pages <= boundary_pfn ||
407                             PageHuge(page)) {     407                             PageHuge(page)) {
408                                 pfn = head_pfn    408                                 pfn = head_pfn + nr_pages;
409                                 continue;         409                                 continue;
410                         }                         410                         }
411                                                   411 
412                         /*                        412                         /*
413                          * These pages are mov    413                          * These pages are movable too, but they're
414                          * not expected to exc    414                          * not expected to exceed pageblock_order.
415                          *                        415                          *
416                          * Let us know when th    416                          * Let us know when they do, so we can add
417                          * proper free and spl    417                          * proper free and split handling for them.
418                          */                       418                          */
419                         VM_WARN_ON_ONCE_PAGE(P    419                         VM_WARN_ON_ONCE_PAGE(PageLRU(page), page);
420                         VM_WARN_ON_ONCE_PAGE(_    420                         VM_WARN_ON_ONCE_PAGE(__PageMovable(page), page);
421                                                   421 
422                         goto failed;              422                         goto failed;
423                 }                                 423                 }
424                                                   424 
425                 pfn++;                            425                 pfn++;
426         }                                         426         }
427         return 0;                                 427         return 0;
428 failed:                                           428 failed:
429         /* restore the original migratetype */    429         /* restore the original migratetype */
430         if (!skip_isolation)                      430         if (!skip_isolation)
431                 unset_migratetype_isolate(pfn_    431                 unset_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype);
432         return -EBUSY;                            432         return -EBUSY;
433 }                                                 433 }
434                                                   434 
435 /**                                               435 /**
436  * start_isolate_page_range() - mark page rang    436  * start_isolate_page_range() - mark page range MIGRATE_ISOLATE
437  * @start_pfn:          The first PFN of the r    437  * @start_pfn:          The first PFN of the range to be isolated.
438  * @end_pfn:            The last PFN of the ra    438  * @end_pfn:            The last PFN of the range to be isolated.
439  * @migratetype:        Migrate type to set in    439  * @migratetype:        Migrate type to set in error recovery.
440  * @flags:              The following flags ar    440  * @flags:              The following flags are allowed (they can be combined in
441  *                      a bit mask)               441  *                      a bit mask)
442  *                      MEMORY_OFFLINE - isola    442  *                      MEMORY_OFFLINE - isolate to offline (!allocate) memory
443  *                                       e.g.,    443  *                                       e.g., skip over PageHWPoison() pages
444  *                                       and P    444  *                                       and PageOffline() pages.
445  *                      REPORT_FAILURE - repor    445  *                      REPORT_FAILURE - report details about the failure to
446  *                      isolate the range         446  *                      isolate the range
447  * @gfp_flags:          GFP flags used for mig    447  * @gfp_flags:          GFP flags used for migrating pages that sit across the
448  *                      range boundaries.         448  *                      range boundaries.
449  *                                                449  *
450  * Making page-allocation-type to be MIGRATE_I    450  * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
451  * the range will never be allocated. Any free    451  * the range will never be allocated. Any free pages and pages freed in the
452  * future will not be allocated again. If spec    452  * future will not be allocated again. If specified range includes migrate types
453  * other than MOVABLE or CMA, this will fail w    453  * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all
454  * pages in the range finally, the caller have    454  * pages in the range finally, the caller have to free all pages in the range.
455  * test_page_isolated() can be used for test i    455  * test_page_isolated() can be used for test it.
456  *                                                456  *
457  * The function first tries to isolate the pag    457  * The function first tries to isolate the pageblocks at the beginning and end
458  * of the range, since there might be pages ac    458  * of the range, since there might be pages across the range boundaries.
459  * Afterwards, it isolates the rest of the ran    459  * Afterwards, it isolates the rest of the range.
460  *                                                460  *
461  * There is no high level synchronization mech    461  * There is no high level synchronization mechanism that prevents two threads
462  * from trying to isolate overlapping ranges.     462  * from trying to isolate overlapping ranges. If this happens, one thread
463  * will notice pageblocks in the overlapping r    463  * will notice pageblocks in the overlapping range already set to isolate.
464  * This happens in set_migratetype_isolate, an    464  * This happens in set_migratetype_isolate, and set_migratetype_isolate
465  * returns an error. We then clean up by resto    465  * returns an error. We then clean up by restoring the migration type on
466  * pageblocks we may have modified and return     466  * pageblocks we may have modified and return -EBUSY to caller. This
467  * prevents two threads from simultaneously wo    467  * prevents two threads from simultaneously working on overlapping ranges.
468  *                                                468  *
469  * Please note that there is no strong synchro    469  * Please note that there is no strong synchronization with the page allocator
470  * either. Pages might be freed while their pa    470  * either. Pages might be freed while their page blocks are marked ISOLATED.
471  * A call to drain_all_pages() after isolation    471  * A call to drain_all_pages() after isolation can flush most of them. However
472  * in some cases pages might still end up on p    472  * in some cases pages might still end up on pcp lists and that would allow
473  * for their allocation even when they are in     473  * for their allocation even when they are in fact isolated already. Depending
474  * on how strong of a guarantee the caller nee    474  * on how strong of a guarantee the caller needs, zone_pcp_disable/enable()
475  * might be used to flush and disable pcplist     475  * might be used to flush and disable pcplist before isolation and enable after
476  * unisolation.                                   476  * unisolation.
477  *                                                477  *
478  * Return: 0 on success and -EBUSY if any part    478  * Return: 0 on success and -EBUSY if any part of range cannot be isolated.
479  */                                               479  */
480 int start_isolate_page_range(unsigned long sta    480 int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
481                              int migratetype,     481                              int migratetype, int flags, gfp_t gfp_flags)
482 {                                                 482 {
483         unsigned long pfn;                        483         unsigned long pfn;
484         struct page *page;                        484         struct page *page;
485         /* isolation is done at page block gra    485         /* isolation is done at page block granularity */
486         unsigned long isolate_start = pagebloc    486         unsigned long isolate_start = pageblock_start_pfn(start_pfn);
487         unsigned long isolate_end = pageblock_    487         unsigned long isolate_end = pageblock_align(end_pfn);
488         int ret;                                  488         int ret;
489         bool skip_isolation = false;              489         bool skip_isolation = false;
490                                                   490 
491         /* isolate [isolate_start, isolate_sta    491         /* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */
492         ret = isolate_single_pageblock(isolate    492         ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false,
493                         skip_isolation, migrat    493                         skip_isolation, migratetype);
494         if (ret)                                  494         if (ret)
495                 return ret;                       495                 return ret;
496                                                   496 
497         if (isolate_start == isolate_end - pag    497         if (isolate_start == isolate_end - pageblock_nr_pages)
498                 skip_isolation = true;            498                 skip_isolation = true;
499                                                   499 
500         /* isolate [isolate_end - pageblock_nr    500         /* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */
501         ret = isolate_single_pageblock(isolate    501         ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true,
502                         skip_isolation, migrat    502                         skip_isolation, migratetype);
503         if (ret) {                                503         if (ret) {
504                 unset_migratetype_isolate(pfn_    504                 unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
505                 return ret;                       505                 return ret;
506         }                                         506         }
507                                                   507 
508         /* skip isolated pageblocks at the beg    508         /* skip isolated pageblocks at the beginning and end */
509         for (pfn = isolate_start + pageblock_n    509         for (pfn = isolate_start + pageblock_nr_pages;
510              pfn < isolate_end - pageblock_nr_    510              pfn < isolate_end - pageblock_nr_pages;
511              pfn += pageblock_nr_pages) {         511              pfn += pageblock_nr_pages) {
512                 page = __first_valid_page(pfn,    512                 page = __first_valid_page(pfn, pageblock_nr_pages);
513                 if (page && set_migratetype_is    513                 if (page && set_migratetype_isolate(page, migratetype, flags,
514                                         start_    514                                         start_pfn, end_pfn)) {
515                         undo_isolate_page_rang    515                         undo_isolate_page_range(isolate_start, pfn, migratetype);
516                         unset_migratetype_isol    516                         unset_migratetype_isolate(
517                                 pfn_to_page(is    517                                 pfn_to_page(isolate_end - pageblock_nr_pages),
518                                 migratetype);     518                                 migratetype);
519                         return -EBUSY;            519                         return -EBUSY;
520                 }                                 520                 }
521         }                                         521         }
522         return 0;                                 522         return 0;
523 }                                                 523 }
524                                                   524 
525 /**                                               525 /**
526  * undo_isolate_page_range - undo effects of s    526  * undo_isolate_page_range - undo effects of start_isolate_page_range()
527  * @start_pfn:          The first PFN of the i    527  * @start_pfn:          The first PFN of the isolated range
528  * @end_pfn:            The last PFN of the is    528  * @end_pfn:            The last PFN of the isolated range
529  * @migratetype:        New migrate type to se    529  * @migratetype:        New migrate type to set on the range
530  *                                                530  *
531  * This finds every MIGRATE_ISOLATE page block    531  * This finds every MIGRATE_ISOLATE page block in the given range
532  * and switches it to @migratetype.               532  * and switches it to @migratetype.
533  */                                               533  */
534 void undo_isolate_page_range(unsigned long sta    534 void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
535                             int migratetype)      535                             int migratetype)
536 {                                                 536 {
537         unsigned long pfn;                        537         unsigned long pfn;
538         struct page *page;                        538         struct page *page;
539         unsigned long isolate_start = pagebloc    539         unsigned long isolate_start = pageblock_start_pfn(start_pfn);
540         unsigned long isolate_end = pageblock_    540         unsigned long isolate_end = pageblock_align(end_pfn);
541                                                   541 
542         for (pfn = isolate_start;                 542         for (pfn = isolate_start;
543              pfn < isolate_end;                   543              pfn < isolate_end;
544              pfn += pageblock_nr_pages) {         544              pfn += pageblock_nr_pages) {
545                 page = __first_valid_page(pfn,    545                 page = __first_valid_page(pfn, pageblock_nr_pages);
546                 if (!page || !is_migrate_isola    546                 if (!page || !is_migrate_isolate_page(page))
547                         continue;                 547                         continue;
548                 unset_migratetype_isolate(page    548                 unset_migratetype_isolate(page, migratetype);
549         }                                         549         }
550 }                                                 550 }
551 /*                                                551 /*
552  * Test all pages in the range is free(means i    552  * Test all pages in the range is free(means isolated) or not.
553  * all pages in [start_pfn...end_pfn) must be     553  * all pages in [start_pfn...end_pfn) must be in the same zone.
554  * zone->lock must be held before call this.      554  * zone->lock must be held before call this.
555  *                                                555  *
556  * Returns the last tested pfn.                   556  * Returns the last tested pfn.
557  */                                               557  */
558 static unsigned long                              558 static unsigned long
559 __test_page_isolated_in_pageblock(unsigned lon    559 __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn,
560                                   int flags)      560                                   int flags)
561 {                                                 561 {
562         struct page *page;                        562         struct page *page;
563                                                   563 
564         while (pfn < end_pfn) {                   564         while (pfn < end_pfn) {
565                 page = pfn_to_page(pfn);          565                 page = pfn_to_page(pfn);
566                 if (PageBuddy(page))              566                 if (PageBuddy(page))
567                         /*                        567                         /*
568                          * If the page is on a    568                          * If the page is on a free list, it has to be on
569                          * the correct MIGRATE    569                          * the correct MIGRATE_ISOLATE freelist. There is no
570                          * simple way to verif    570                          * simple way to verify that as VM_BUG_ON(), though.
571                          */                       571                          */
572                         pfn += 1 << buddy_orde    572                         pfn += 1 << buddy_order(page);
573                 else if ((flags & MEMORY_OFFLI    573                 else if ((flags & MEMORY_OFFLINE) && PageHWPoison(page))
574                         /* A HWPoisoned page c    574                         /* A HWPoisoned page cannot be also PageBuddy */
575                         pfn++;                    575                         pfn++;
576                 else if ((flags & MEMORY_OFFLI    576                 else if ((flags & MEMORY_OFFLINE) && PageOffline(page) &&
577                          !page_count(page))       577                          !page_count(page))
578                         /*                        578                         /*
579                          * The responsible dri    579                          * The responsible driver agreed to skip PageOffline()
580                          * pages when offlinin    580                          * pages when offlining memory by dropping its
581                          * reference in MEM_GO    581                          * reference in MEM_GOING_OFFLINE.
582                          */                       582                          */
583                         pfn++;                    583                         pfn++;
584                 else                              584                 else
585                         break;                    585                         break;
586         }                                         586         }
587                                                   587 
588         return pfn;                               588         return pfn;
589 }                                                 589 }
590                                                   590 
591 /**                                               591 /**
592  * test_pages_isolated - check if pageblocks i    592  * test_pages_isolated - check if pageblocks in range are isolated
593  * @start_pfn:          The first PFN of the i    593  * @start_pfn:          The first PFN of the isolated range
594  * @end_pfn:            The first PFN *after*     594  * @end_pfn:            The first PFN *after* the isolated range
595  * @isol_flags:         Testing mode flags        595  * @isol_flags:         Testing mode flags
596  *                                                596  *
597  * This tests if all in the specified range ar    597  * This tests if all in the specified range are free.
598  *                                                598  *
599  * If %MEMORY_OFFLINE is specified in @flags,     599  * If %MEMORY_OFFLINE is specified in @flags, it will consider
600  * poisoned and offlined pages free as well.      600  * poisoned and offlined pages free as well.
601  *                                                601  *
602  * Caller must ensure the requested range does    602  * Caller must ensure the requested range doesn't span zones.
603  *                                                603  *
604  * Returns 0 if true, -EBUSY if one or more pa    604  * Returns 0 if true, -EBUSY if one or more pages are in use.
605  */                                               605  */
606 int test_pages_isolated(unsigned long start_pf    606 int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
607                         int isol_flags)           607                         int isol_flags)
608 {                                                 608 {
609         unsigned long pfn, flags;                 609         unsigned long pfn, flags;
610         struct page *page;                        610         struct page *page;
611         struct zone *zone;                        611         struct zone *zone;
612         int ret;                                  612         int ret;
613                                                   613 
614         /*                                        614         /*
615          * Note: pageblock_nr_pages != MAX_PAG    615          * Note: pageblock_nr_pages != MAX_PAGE_ORDER. Then, chunks of free
616          * pages are not aligned to pageblock_    616          * pages are not aligned to pageblock_nr_pages.
617          * Then we just check migratetype firs    617          * Then we just check migratetype first.
618          */                                       618          */
619         for (pfn = start_pfn; pfn < end_pfn; p    619         for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
620                 page = __first_valid_page(pfn,    620                 page = __first_valid_page(pfn, pageblock_nr_pages);
621                 if (page && !is_migrate_isolat    621                 if (page && !is_migrate_isolate_page(page))
622                         break;                    622                         break;
623         }                                         623         }
624         page = __first_valid_page(start_pfn, e    624         page = __first_valid_page(start_pfn, end_pfn - start_pfn);
625         if ((pfn < end_pfn) || !page) {           625         if ((pfn < end_pfn) || !page) {
626                 ret = -EBUSY;                     626                 ret = -EBUSY;
627                 goto out;                         627                 goto out;
628         }                                         628         }
629                                                   629 
630         /* Check all pages are free or marked     630         /* Check all pages are free or marked as ISOLATED */
631         zone = page_zone(page);                   631         zone = page_zone(page);
632         spin_lock_irqsave(&zone->lock, flags);    632         spin_lock_irqsave(&zone->lock, flags);
633         pfn = __test_page_isolated_in_pagebloc    633         pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, isol_flags);
634         spin_unlock_irqrestore(&zone->lock, fl    634         spin_unlock_irqrestore(&zone->lock, flags);
635                                                   635 
636         ret = pfn < end_pfn ? -EBUSY : 0;         636         ret = pfn < end_pfn ? -EBUSY : 0;
637                                                   637 
638 out:                                              638 out:
639         trace_test_pages_isolated(start_pfn, e    639         trace_test_pages_isolated(start_pfn, end_pfn, pfn);
640                                                   640 
641         return ret;                               641         return ret;
642 }                                                 642 }
643                                                   643 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php