~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/mm/swapfile.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /mm/swapfile.c (Version linux-6.12-rc7) and /mm/swapfile.c (Version linux-5.15.171)


  1 // SPDX-License-Identifier: GPL-2.0-only            1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*                                                  2 /*
  3  *  linux/mm/swapfile.c                             3  *  linux/mm/swapfile.c
  4  *                                                  4  *
  5  *  Copyright (C) 1991, 1992, 1993, 1994  Linu      5  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
  6  *  Swap reorganised 29.12.95, Stephen Tweedie      6  *  Swap reorganised 29.12.95, Stephen Tweedie
  7  */                                                 7  */
  8                                                     8 
  9 #include <linux/blkdev.h>                      << 
 10 #include <linux/mm.h>                               9 #include <linux/mm.h>
 11 #include <linux/sched/mm.h>                        10 #include <linux/sched/mm.h>
 12 #include <linux/sched/task.h>                      11 #include <linux/sched/task.h>
 13 #include <linux/hugetlb.h>                         12 #include <linux/hugetlb.h>
 14 #include <linux/mman.h>                            13 #include <linux/mman.h>
 15 #include <linux/slab.h>                            14 #include <linux/slab.h>
 16 #include <linux/kernel_stat.h>                     15 #include <linux/kernel_stat.h>
 17 #include <linux/swap.h>                            16 #include <linux/swap.h>
 18 #include <linux/vmalloc.h>                         17 #include <linux/vmalloc.h>
 19 #include <linux/pagemap.h>                         18 #include <linux/pagemap.h>
 20 #include <linux/namei.h>                           19 #include <linux/namei.h>
 21 #include <linux/shmem_fs.h>                        20 #include <linux/shmem_fs.h>
 22 #include <linux/blk-cgroup.h>                  !!  21 #include <linux/blkdev.h>
 23 #include <linux/random.h>                          22 #include <linux/random.h>
 24 #include <linux/writeback.h>                       23 #include <linux/writeback.h>
 25 #include <linux/proc_fs.h>                         24 #include <linux/proc_fs.h>
 26 #include <linux/seq_file.h>                        25 #include <linux/seq_file.h>
 27 #include <linux/init.h>                            26 #include <linux/init.h>
 28 #include <linux/ksm.h>                             27 #include <linux/ksm.h>
 29 #include <linux/rmap.h>                            28 #include <linux/rmap.h>
 30 #include <linux/security.h>                        29 #include <linux/security.h>
 31 #include <linux/backing-dev.h>                     30 #include <linux/backing-dev.h>
 32 #include <linux/mutex.h>                           31 #include <linux/mutex.h>
 33 #include <linux/capability.h>                      32 #include <linux/capability.h>
 34 #include <linux/syscalls.h>                        33 #include <linux/syscalls.h>
 35 #include <linux/memcontrol.h>                      34 #include <linux/memcontrol.h>
 36 #include <linux/poll.h>                            35 #include <linux/poll.h>
 37 #include <linux/oom.h>                             36 #include <linux/oom.h>
                                                   >>  37 #include <linux/frontswap.h>
 38 #include <linux/swapfile.h>                        38 #include <linux/swapfile.h>
 39 #include <linux/export.h>                          39 #include <linux/export.h>
 40 #include <linux/swap_slots.h>                      40 #include <linux/swap_slots.h>
 41 #include <linux/sort.h>                            41 #include <linux/sort.h>
 42 #include <linux/completion.h>                      42 #include <linux/completion.h>
 43 #include <linux/suspend.h>                     << 
 44 #include <linux/zswap.h>                       << 
 45 #include <linux/plist.h>                       << 
 46                                                    43 
 47 #include <asm/tlbflush.h>                          44 #include <asm/tlbflush.h>
 48 #include <linux/swapops.h>                         45 #include <linux/swapops.h>
 49 #include <linux/swap_cgroup.h>                     46 #include <linux/swap_cgroup.h>
 50 #include "internal.h"                          << 
 51 #include "swap.h"                              << 
 52                                                    47 
 53 static bool swap_count_continued(struct swap_i     48 static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
 54                                  unsigned char     49                                  unsigned char);
 55 static void free_swap_count_continuations(stru     50 static void free_swap_count_continuations(struct swap_info_struct *);
 56 static void swap_entry_range_free(struct swap_ << 
 57                                   unsigned int << 
 58 static void swap_range_alloc(struct swap_info_ << 
 59                              unsigned int nr_e << 
 60 static bool folio_swapcache_freeable(struct fo << 
 61 static struct swap_cluster_info *lock_cluster_ << 
 62                 struct swap_info_struct *si, u << 
 63 static void unlock_cluster_or_swap_info(struct << 
 64                                         struct << 
 65                                                    51 
 66 static DEFINE_SPINLOCK(swap_lock);             !!  52 DEFINE_SPINLOCK(swap_lock);
 67 static unsigned int nr_swapfiles;                  53 static unsigned int nr_swapfiles;
 68 atomic_long_t nr_swap_pages;                       54 atomic_long_t nr_swap_pages;
 69 /*                                                 55 /*
 70  * Some modules use swappable objects and may      56  * Some modules use swappable objects and may try to swap them out under
 71  * memory pressure (via the shrinker). Before      57  * memory pressure (via the shrinker). Before doing so, they may wish to
 72  * check to see if any swap space is available     58  * check to see if any swap space is available.
 73  */                                                59  */
 74 EXPORT_SYMBOL_GPL(nr_swap_pages);                  60 EXPORT_SYMBOL_GPL(nr_swap_pages);
 75 /* protected with swap_lock. reading in vm_swa     61 /* protected with swap_lock. reading in vm_swap_full() doesn't need lock */
 76 long total_swap_pages;                             62 long total_swap_pages;
 77 static int least_priority = -1;                    63 static int least_priority = -1;
 78 unsigned long swapfile_maximum_size;           << 
 79 #ifdef CONFIG_MIGRATION                        << 
 80 bool swap_migration_ad_supported;              << 
 81 #endif  /* CONFIG_MIGRATION */                 << 
 82                                                    64 
 83 static const char Bad_file[] = "Bad swap file      65 static const char Bad_file[] = "Bad swap file entry ";
 84 static const char Unused_file[] = "Unused swap     66 static const char Unused_file[] = "Unused swap file entry ";
 85 static const char Bad_offset[] = "Bad swap off     67 static const char Bad_offset[] = "Bad swap offset entry ";
 86 static const char Unused_offset[] = "Unused sw     68 static const char Unused_offset[] = "Unused swap offset entry ";
 87                                                    69 
 88 /*                                                 70 /*
 89  * all active swap_info_structs                    71  * all active swap_info_structs
 90  * protected with swap_lock, and ordered by pr     72  * protected with swap_lock, and ordered by priority.
 91  */                                                73  */
 92 static PLIST_HEAD(swap_active_head);           !!  74 PLIST_HEAD(swap_active_head);
 93                                                    75 
 94 /*                                                 76 /*
 95  * all available (active, not full) swap_info_     77  * all available (active, not full) swap_info_structs
 96  * protected with swap_avail_lock, ordered by      78  * protected with swap_avail_lock, ordered by priority.
 97  * This is used by folio_alloc_swap() instead  !!  79  * This is used by get_swap_page() instead of swap_active_head
 98  * because swap_active_head includes all swap_     80  * because swap_active_head includes all swap_info_structs,
 99  * but folio_alloc_swap() doesn't need to look !!  81  * but get_swap_page() doesn't need to look at full ones.
100  * This uses its own lock instead of swap_lock     82  * This uses its own lock instead of swap_lock because when a
101  * swap_info_struct changes between not-full/f     83  * swap_info_struct changes between not-full/full, it needs to
102  * add/remove itself to/from this list, but th     84  * add/remove itself to/from this list, but the swap_info_struct->lock
103  * is held and the locking order requires swap     85  * is held and the locking order requires swap_lock to be taken
104  * before any swap_info_struct->lock.              86  * before any swap_info_struct->lock.
105  */                                                87  */
106 static struct plist_head *swap_avail_heads;        88 static struct plist_head *swap_avail_heads;
107 static DEFINE_SPINLOCK(swap_avail_lock);           89 static DEFINE_SPINLOCK(swap_avail_lock);
108                                                    90 
109 static struct swap_info_struct *swap_info[MAX_ !!  91 struct swap_info_struct *swap_info[MAX_SWAPFILES];
110                                                    92 
111 static DEFINE_MUTEX(swapon_mutex);                 93 static DEFINE_MUTEX(swapon_mutex);
112                                                    94 
113 static DECLARE_WAIT_QUEUE_HEAD(proc_poll_wait)     95 static DECLARE_WAIT_QUEUE_HEAD(proc_poll_wait);
114 /* Activity counter to indicate that a swapon      96 /* Activity counter to indicate that a swapon or swapoff has occurred */
115 static atomic_t proc_poll_event = ATOMIC_INIT(     97 static atomic_t proc_poll_event = ATOMIC_INIT(0);
116                                                    98 
117 atomic_t nr_rotate_swap = ATOMIC_INIT(0);          99 atomic_t nr_rotate_swap = ATOMIC_INIT(0);
118                                                   100 
119 static struct swap_info_struct *swap_type_to_s    101 static struct swap_info_struct *swap_type_to_swap_info(int type)
120 {                                                 102 {
121         if (type >= MAX_SWAPFILES)                103         if (type >= MAX_SWAPFILES)
122                 return NULL;                      104                 return NULL;
123                                                   105 
124         return READ_ONCE(swap_info[type]); /*     106         return READ_ONCE(swap_info[type]); /* rcu_dereference() */
125 }                                                 107 }
126                                                   108 
127 static inline unsigned char swap_count(unsigne    109 static inline unsigned char swap_count(unsigned char ent)
128 {                                                 110 {
129         return ent & ~SWAP_HAS_CACHE;   /* may    111         return ent & ~SWAP_HAS_CACHE;   /* may include COUNT_CONTINUED flag */
130 }                                                 112 }
131                                                   113 
132 /* Reclaim the swap entry anyway if possible *    114 /* Reclaim the swap entry anyway if possible */
133 #define TTRS_ANYWAY             0x1               115 #define TTRS_ANYWAY             0x1
134 /*                                                116 /*
135  * Reclaim the swap entry if there are no more    117  * Reclaim the swap entry if there are no more mappings of the
136  * corresponding page                             118  * corresponding page
137  */                                               119  */
138 #define TTRS_UNMAPPED           0x2               120 #define TTRS_UNMAPPED           0x2
139 /* Reclaim the swap entry if swap is getting f !! 121 /* Reclaim the swap entry if swap is getting full*/
140 #define TTRS_FULL               0x4               122 #define TTRS_FULL               0x4
141 /* Reclaim directly, bypass the slot cache and << 
142 #define TTRS_DIRECT             0x8            << 
143                                                << 
144 static bool swap_is_has_cache(struct swap_info << 
145                               unsigned long of << 
146 {                                              << 
147         unsigned char *map = si->swap_map + of << 
148         unsigned char *map_end = map + nr_page << 
149                                                << 
150         do {                                   << 
151                 VM_BUG_ON(!(*map & SWAP_HAS_CA << 
152                 if (*map != SWAP_HAS_CACHE)    << 
153                         return false;          << 
154         } while (++map < map_end);             << 
155                                                << 
156         return true;                           << 
157 }                                              << 
158                                                << 
159 static bool swap_is_last_map(struct swap_info_ << 
160                 unsigned long offset, int nr_p << 
161 {                                              << 
162         unsigned char *map = si->swap_map + of << 
163         unsigned char *map_end = map + nr_page << 
164         unsigned char count = *map;            << 
165                                                   123 
166         if (swap_count(count) != 1)            !! 124 /* returns 1 if swap entry is freed */
167                 return false;                  << 
168                                                << 
169         while (++map < map_end) {              << 
170                 if (*map != count)             << 
171                         return false;          << 
172         }                                      << 
173                                                << 
174         *has_cache = !!(count & SWAP_HAS_CACHE << 
175         return true;                           << 
176 }                                              << 
177                                                << 
178 /*                                             << 
179  * returns number of pages in the folio that b << 
180  * the folio was reclaimed. If negative, the f << 
181  * folio was associated with the swap entry.   << 
182  */                                            << 
183 static int __try_to_reclaim_swap(struct swap_i    125 static int __try_to_reclaim_swap(struct swap_info_struct *si,
184                                  unsigned long    126                                  unsigned long offset, unsigned long flags)
185 {                                                 127 {
186         swp_entry_t entry = swp_entry(si->type    128         swp_entry_t entry = swp_entry(si->type, offset);
187         struct address_space *address_space =  !! 129         struct page *page;
188         struct swap_cluster_info *ci;          !! 130         int ret = 0;
189         struct folio *folio;                   << 
190         int ret, nr_pages;                     << 
191         bool need_reclaim;                     << 
192                                                   131 
193         folio = filemap_get_folio(address_spac !! 132         page = find_get_page(swap_address_space(entry), offset);
194         if (IS_ERR(folio))                     !! 133         if (!page)
195                 return 0;                         134                 return 0;
196                                                << 
197         nr_pages = folio_nr_pages(folio);      << 
198         ret = -nr_pages;                       << 
199                                                << 
200         /*                                        135         /*
201          * When this function is called from s    136          * When this function is called from scan_swap_map_slots() and it's
202          * called by vmscan.c at reclaiming fo !! 137          * called by vmscan.c at reclaiming pages. So, we hold a lock on a page,
203          * here. We have to use trylock for av    138          * here. We have to use trylock for avoiding deadlock. This is a special
204          * case and you should use folio_free_ !! 139          * case and you should use try_to_free_swap() with explicit lock_page()
205          * in usual operations.                   140          * in usual operations.
206          */                                       141          */
207         if (!folio_trylock(folio))             !! 142         if (trylock_page(page)) {
208                 goto out;                      !! 143                 if ((flags & TTRS_ANYWAY) ||
209                                                !! 144                     ((flags & TTRS_UNMAPPED) && !page_mapped(page)) ||
210         /* offset could point to the middle of !! 145                     ((flags & TTRS_FULL) && mem_cgroup_swap_full(page)))
211         entry = folio->swap;                   !! 146                         ret = try_to_free_swap(page);
212         offset = swp_offset(entry);            !! 147                 unlock_page(page);
213                                                !! 148         }
214         need_reclaim = ((flags & TTRS_ANYWAY)  !! 149         put_page(page);
215                         ((flags & TTRS_UNMAPPE << 
216                         ((flags & TTRS_FULL) & << 
217         if (!need_reclaim || !folio_swapcache_ << 
218                 goto out_unlock;               << 
219                                                << 
220         /*                                     << 
221          * It's safe to delete the folio from  << 
222          * swap_map is HAS_CACHE only, which m << 
223          * reference or pending writeback, and << 
224          */                                    << 
225         ci = lock_cluster_or_swap_info(si, off << 
226         need_reclaim = swap_is_has_cache(si, o << 
227         unlock_cluster_or_swap_info(si, ci);   << 
228         if (!need_reclaim)                     << 
229                 goto out_unlock;               << 
230                                                << 
231         if (!(flags & TTRS_DIRECT)) {          << 
232                 /* Free through slot cache */  << 
233                 delete_from_swap_cache(folio); << 
234                 folio_set_dirty(folio);        << 
235                 ret = nr_pages;                << 
236                 goto out_unlock;               << 
237         }                                      << 
238                                                << 
239         xa_lock_irq(&address_space->i_pages);  << 
240         __delete_from_swap_cache(folio, entry, << 
241         xa_unlock_irq(&address_space->i_pages) << 
242         folio_ref_sub(folio, nr_pages);        << 
243         folio_set_dirty(folio);                << 
244                                                << 
245         spin_lock(&si->lock);                  << 
246         /* Only sinple page folio can be backe << 
247         if (nr_pages == 1)                     << 
248                 zswap_invalidate(entry);       << 
249         swap_entry_range_free(si, entry, nr_pa << 
250         spin_unlock(&si->lock);                << 
251         ret = nr_pages;                        << 
252 out_unlock:                                    << 
253         folio_unlock(folio);                   << 
254 out:                                           << 
255         folio_put(folio);                      << 
256         return ret;                               150         return ret;
257 }                                                 151 }
258                                                   152 
259 static inline struct swap_extent *first_se(str    153 static inline struct swap_extent *first_se(struct swap_info_struct *sis)
260 {                                                 154 {
261         struct rb_node *rb = rb_first(&sis->sw    155         struct rb_node *rb = rb_first(&sis->swap_extent_root);
262         return rb_entry(rb, struct swap_extent    156         return rb_entry(rb, struct swap_extent, rb_node);
263 }                                                 157 }
264                                                   158 
265 static inline struct swap_extent *next_se(stru    159 static inline struct swap_extent *next_se(struct swap_extent *se)
266 {                                                 160 {
267         struct rb_node *rb = rb_next(&se->rb_n    161         struct rb_node *rb = rb_next(&se->rb_node);
268         return rb ? rb_entry(rb, struct swap_e    162         return rb ? rb_entry(rb, struct swap_extent, rb_node) : NULL;
269 }                                                 163 }
270                                                   164 
271 /*                                                165 /*
272  * swapon tell device that all the old swap co    166  * swapon tell device that all the old swap contents can be discarded,
273  * to allow the swap device to optimize its we    167  * to allow the swap device to optimize its wear-levelling.
274  */                                               168  */
275 static int discard_swap(struct swap_info_struc    169 static int discard_swap(struct swap_info_struct *si)
276 {                                                 170 {
277         struct swap_extent *se;                   171         struct swap_extent *se;
278         sector_t start_block;                     172         sector_t start_block;
279         sector_t nr_blocks;                       173         sector_t nr_blocks;
280         int err = 0;                              174         int err = 0;
281                                                   175 
282         /* Do not discard the swap header page    176         /* Do not discard the swap header page! */
283         se = first_se(si);                        177         se = first_se(si);
284         start_block = (se->start_block + 1) <<    178         start_block = (se->start_block + 1) << (PAGE_SHIFT - 9);
285         nr_blocks = ((sector_t)se->nr_pages -     179         nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
286         if (nr_blocks) {                          180         if (nr_blocks) {
287                 err = blkdev_issue_discard(si-    181                 err = blkdev_issue_discard(si->bdev, start_block,
288                                 nr_blocks, GFP !! 182                                 nr_blocks, GFP_KERNEL, 0);
289                 if (err)                          183                 if (err)
290                         return err;               184                         return err;
291                 cond_resched();                   185                 cond_resched();
292         }                                         186         }
293                                                   187 
294         for (se = next_se(se); se; se = next_s    188         for (se = next_se(se); se; se = next_se(se)) {
295                 start_block = se->start_block     189                 start_block = se->start_block << (PAGE_SHIFT - 9);
296                 nr_blocks = (sector_t)se->nr_p    190                 nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
297                                                   191 
298                 err = blkdev_issue_discard(si-    192                 err = blkdev_issue_discard(si->bdev, start_block,
299                                 nr_blocks, GFP !! 193                                 nr_blocks, GFP_KERNEL, 0);
300                 if (err)                          194                 if (err)
301                         break;                    195                         break;
302                                                   196 
303                 cond_resched();                   197                 cond_resched();
304         }                                         198         }
305         return err;             /* That will o    199         return err;             /* That will often be -EOPNOTSUPP */
306 }                                                 200 }
307                                                   201 
308 static struct swap_extent *                       202 static struct swap_extent *
309 offset_to_swap_extent(struct swap_info_struct     203 offset_to_swap_extent(struct swap_info_struct *sis, unsigned long offset)
310 {                                                 204 {
311         struct swap_extent *se;                   205         struct swap_extent *se;
312         struct rb_node *rb;                       206         struct rb_node *rb;
313                                                   207 
314         rb = sis->swap_extent_root.rb_node;       208         rb = sis->swap_extent_root.rb_node;
315         while (rb) {                              209         while (rb) {
316                 se = rb_entry(rb, struct swap_    210                 se = rb_entry(rb, struct swap_extent, rb_node);
317                 if (offset < se->start_page)      211                 if (offset < se->start_page)
318                         rb = rb->rb_left;         212                         rb = rb->rb_left;
319                 else if (offset >= se->start_p    213                 else if (offset >= se->start_page + se->nr_pages)
320                         rb = rb->rb_right;        214                         rb = rb->rb_right;
321                 else                              215                 else
322                         return se;                216                         return se;
323         }                                         217         }
324         /* It *must* be present */                218         /* It *must* be present */
325         BUG();                                    219         BUG();
326 }                                                 220 }
327                                                   221 
328 sector_t swap_folio_sector(struct folio *folio !! 222 sector_t swap_page_sector(struct page *page)
329 {                                                 223 {
330         struct swap_info_struct *sis = swp_swa !! 224         struct swap_info_struct *sis = page_swap_info(page);
331         struct swap_extent *se;                   225         struct swap_extent *se;
332         sector_t sector;                          226         sector_t sector;
333         pgoff_t offset;                           227         pgoff_t offset;
334                                                   228 
335         offset = swp_offset(folio->swap);      !! 229         offset = __page_file_index(page);
336         se = offset_to_swap_extent(sis, offset    230         se = offset_to_swap_extent(sis, offset);
337         sector = se->start_block + (offset - s    231         sector = se->start_block + (offset - se->start_page);
338         return sector << (PAGE_SHIFT - 9);        232         return sector << (PAGE_SHIFT - 9);
339 }                                                 233 }
340                                                   234 
341 /*                                                235 /*
342  * swap allocation tell device that a cluster     236  * swap allocation tell device that a cluster of swap can now be discarded,
343  * to allow the swap device to optimize its we    237  * to allow the swap device to optimize its wear-levelling.
344  */                                               238  */
345 static void discard_swap_cluster(struct swap_i    239 static void discard_swap_cluster(struct swap_info_struct *si,
346                                  pgoff_t start    240                                  pgoff_t start_page, pgoff_t nr_pages)
347 {                                                 241 {
348         struct swap_extent *se = offset_to_swa    242         struct swap_extent *se = offset_to_swap_extent(si, start_page);
349                                                   243 
350         while (nr_pages) {                        244         while (nr_pages) {
351                 pgoff_t offset = start_page -     245                 pgoff_t offset = start_page - se->start_page;
352                 sector_t start_block = se->sta    246                 sector_t start_block = se->start_block + offset;
353                 sector_t nr_blocks = se->nr_pa    247                 sector_t nr_blocks = se->nr_pages - offset;
354                                                   248 
355                 if (nr_blocks > nr_pages)         249                 if (nr_blocks > nr_pages)
356                         nr_blocks = nr_pages;     250                         nr_blocks = nr_pages;
357                 start_page += nr_blocks;          251                 start_page += nr_blocks;
358                 nr_pages -= nr_blocks;            252                 nr_pages -= nr_blocks;
359                                                   253 
360                 start_block <<= PAGE_SHIFT - 9    254                 start_block <<= PAGE_SHIFT - 9;
361                 nr_blocks <<= PAGE_SHIFT - 9;     255                 nr_blocks <<= PAGE_SHIFT - 9;
362                 if (blkdev_issue_discard(si->b    256                 if (blkdev_issue_discard(si->bdev, start_block,
363                                         nr_blo !! 257                                         nr_blocks, GFP_NOIO, 0))
364                         break;                    258                         break;
365                                                   259 
366                 se = next_se(se);                 260                 se = next_se(se);
367         }                                         261         }
368 }                                                 262 }
369                                                   263 
370 #ifdef CONFIG_THP_SWAP                            264 #ifdef CONFIG_THP_SWAP
371 #define SWAPFILE_CLUSTER        HPAGE_PMD_NR      265 #define SWAPFILE_CLUSTER        HPAGE_PMD_NR
372                                                   266 
373 #define swap_entry_order(order) (order)        !! 267 #define swap_entry_size(size)   (size)
374 #else                                             268 #else
375 #define SWAPFILE_CLUSTER        256               269 #define SWAPFILE_CLUSTER        256
376                                                   270 
377 /*                                                271 /*
378  * Define swap_entry_order() as constant to le !! 272  * Define swap_entry_size() as constant to let compiler to optimize
379  * out some code if !CONFIG_THP_SWAP              273  * out some code if !CONFIG_THP_SWAP
380  */                                               274  */
381 #define swap_entry_order(order) 0              !! 275 #define swap_entry_size(size)   1
382 #endif                                            276 #endif
383 #define LATENCY_LIMIT           256               277 #define LATENCY_LIMIT           256
384                                                   278 
                                                   >> 279 static inline void cluster_set_flag(struct swap_cluster_info *info,
                                                   >> 280         unsigned int flag)
                                                   >> 281 {
                                                   >> 282         info->flags = flag;
                                                   >> 283 }
                                                   >> 284 
                                                   >> 285 static inline unsigned int cluster_count(struct swap_cluster_info *info)
                                                   >> 286 {
                                                   >> 287         return info->data;
                                                   >> 288 }
                                                   >> 289 
                                                   >> 290 static inline void cluster_set_count(struct swap_cluster_info *info,
                                                   >> 291                                      unsigned int c)
                                                   >> 292 {
                                                   >> 293         info->data = c;
                                                   >> 294 }
                                                   >> 295 
                                                   >> 296 static inline void cluster_set_count_flag(struct swap_cluster_info *info,
                                                   >> 297                                          unsigned int c, unsigned int f)
                                                   >> 298 {
                                                   >> 299         info->flags = f;
                                                   >> 300         info->data = c;
                                                   >> 301 }
                                                   >> 302 
                                                   >> 303 static inline unsigned int cluster_next(struct swap_cluster_info *info)
                                                   >> 304 {
                                                   >> 305         return info->data;
                                                   >> 306 }
                                                   >> 307 
                                                   >> 308 static inline void cluster_set_next(struct swap_cluster_info *info,
                                                   >> 309                                     unsigned int n)
                                                   >> 310 {
                                                   >> 311         info->data = n;
                                                   >> 312 }
                                                   >> 313 
                                                   >> 314 static inline void cluster_set_next_flag(struct swap_cluster_info *info,
                                                   >> 315                                          unsigned int n, unsigned int f)
                                                   >> 316 {
                                                   >> 317         info->flags = f;
                                                   >> 318         info->data = n;
                                                   >> 319 }
                                                   >> 320 
385 static inline bool cluster_is_free(struct swap    321 static inline bool cluster_is_free(struct swap_cluster_info *info)
386 {                                                 322 {
387         return info->flags & CLUSTER_FLAG_FREE    323         return info->flags & CLUSTER_FLAG_FREE;
388 }                                                 324 }
389                                                   325 
390 static inline unsigned int cluster_index(struc !! 326 static inline bool cluster_is_null(struct swap_cluster_info *info)
391                                          struc !! 327 {
                                                   >> 328         return info->flags & CLUSTER_FLAG_NEXT_NULL;
                                                   >> 329 }
                                                   >> 330 
                                                   >> 331 static inline void cluster_set_null(struct swap_cluster_info *info)
                                                   >> 332 {
                                                   >> 333         info->flags = CLUSTER_FLAG_NEXT_NULL;
                                                   >> 334         info->data = 0;
                                                   >> 335 }
                                                   >> 336 
                                                   >> 337 static inline bool cluster_is_huge(struct swap_cluster_info *info)
392 {                                                 338 {
393         return ci - si->cluster_info;          !! 339         if (IS_ENABLED(CONFIG_THP_SWAP))
                                                   >> 340                 return info->flags & CLUSTER_FLAG_HUGE;
                                                   >> 341         return false;
394 }                                                 342 }
395                                                   343 
396 static inline unsigned int cluster_offset(stru !! 344 static inline void cluster_clear_huge(struct swap_cluster_info *info)
397                                           stru << 
398 {                                                 345 {
399         return cluster_index(si, ci) * SWAPFIL !! 346         info->flags &= ~CLUSTER_FLAG_HUGE;
400 }                                                 347 }
401                                                   348 
402 static inline struct swap_cluster_info *lock_c    349 static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
403                                                   350                                                      unsigned long offset)
404 {                                                 351 {
405         struct swap_cluster_info *ci;             352         struct swap_cluster_info *ci;
406                                                   353 
407         ci = si->cluster_info;                    354         ci = si->cluster_info;
408         if (ci) {                                 355         if (ci) {
409                 ci += offset / SWAPFILE_CLUSTE    356                 ci += offset / SWAPFILE_CLUSTER;
410                 spin_lock(&ci->lock);             357                 spin_lock(&ci->lock);
411         }                                         358         }
412         return ci;                                359         return ci;
413 }                                                 360 }
414                                                   361 
415 static inline void unlock_cluster(struct swap_    362 static inline void unlock_cluster(struct swap_cluster_info *ci)
416 {                                                 363 {
417         if (ci)                                   364         if (ci)
418                 spin_unlock(&ci->lock);           365                 spin_unlock(&ci->lock);
419 }                                                 366 }
420                                                   367 
421 /*                                                368 /*
422  * Determine the locking method in use for thi    369  * Determine the locking method in use for this device.  Return
423  * swap_cluster_info if SSD-style cluster-base    370  * swap_cluster_info if SSD-style cluster-based locking is in place.
424  */                                               371  */
425 static inline struct swap_cluster_info *lock_c    372 static inline struct swap_cluster_info *lock_cluster_or_swap_info(
426                 struct swap_info_struct *si, u    373                 struct swap_info_struct *si, unsigned long offset)
427 {                                                 374 {
428         struct swap_cluster_info *ci;             375         struct swap_cluster_info *ci;
429                                                   376 
430         /* Try to use fine-grained SSD-style l    377         /* Try to use fine-grained SSD-style locking if available: */
431         ci = lock_cluster(si, offset);            378         ci = lock_cluster(si, offset);
432         /* Otherwise, fall back to traditional    379         /* Otherwise, fall back to traditional, coarse locking: */
433         if (!ci)                                  380         if (!ci)
434                 spin_lock(&si->lock);             381                 spin_lock(&si->lock);
435                                                   382 
436         return ci;                                383         return ci;
437 }                                                 384 }
438                                                   385 
439 static inline void unlock_cluster_or_swap_info    386 static inline void unlock_cluster_or_swap_info(struct swap_info_struct *si,
440                                                   387                                                struct swap_cluster_info *ci)
441 {                                                 388 {
442         if (ci)                                   389         if (ci)
443                 unlock_cluster(ci);               390                 unlock_cluster(ci);
444         else                                      391         else
445                 spin_unlock(&si->lock);           392                 spin_unlock(&si->lock);
446 }                                                 393 }
447                                                   394 
                                                   >> 395 static inline bool cluster_list_empty(struct swap_cluster_list *list)
                                                   >> 396 {
                                                   >> 397         return cluster_is_null(&list->head);
                                                   >> 398 }
                                                   >> 399 
                                                   >> 400 static inline unsigned int cluster_list_first(struct swap_cluster_list *list)
                                                   >> 401 {
                                                   >> 402         return cluster_next(&list->head);
                                                   >> 403 }
                                                   >> 404 
                                                   >> 405 static void cluster_list_init(struct swap_cluster_list *list)
                                                   >> 406 {
                                                   >> 407         cluster_set_null(&list->head);
                                                   >> 408         cluster_set_null(&list->tail);
                                                   >> 409 }
                                                   >> 410 
                                                   >> 411 static void cluster_list_add_tail(struct swap_cluster_list *list,
                                                   >> 412                                   struct swap_cluster_info *ci,
                                                   >> 413                                   unsigned int idx)
                                                   >> 414 {
                                                   >> 415         if (cluster_list_empty(list)) {
                                                   >> 416                 cluster_set_next_flag(&list->head, idx, 0);
                                                   >> 417                 cluster_set_next_flag(&list->tail, idx, 0);
                                                   >> 418         } else {
                                                   >> 419                 struct swap_cluster_info *ci_tail;
                                                   >> 420                 unsigned int tail = cluster_next(&list->tail);
                                                   >> 421 
                                                   >> 422                 /*
                                                   >> 423                  * Nested cluster lock, but both cluster locks are
                                                   >> 424                  * only acquired when we held swap_info_struct->lock
                                                   >> 425                  */
                                                   >> 426                 ci_tail = ci + tail;
                                                   >> 427                 spin_lock_nested(&ci_tail->lock, SINGLE_DEPTH_NESTING);
                                                   >> 428                 cluster_set_next(ci_tail, idx);
                                                   >> 429                 spin_unlock(&ci_tail->lock);
                                                   >> 430                 cluster_set_next_flag(&list->tail, idx, 0);
                                                   >> 431         }
                                                   >> 432 }
                                                   >> 433 
                                                   >> 434 static unsigned int cluster_list_del_first(struct swap_cluster_list *list,
                                                   >> 435                                            struct swap_cluster_info *ci)
                                                   >> 436 {
                                                   >> 437         unsigned int idx;
                                                   >> 438 
                                                   >> 439         idx = cluster_next(&list->head);
                                                   >> 440         if (cluster_next(&list->tail) == idx) {
                                                   >> 441                 cluster_set_null(&list->head);
                                                   >> 442                 cluster_set_null(&list->tail);
                                                   >> 443         } else
                                                   >> 444                 cluster_set_next_flag(&list->head,
                                                   >> 445                                       cluster_next(&ci[idx]), 0);
                                                   >> 446 
                                                   >> 447         return idx;
                                                   >> 448 }
                                                   >> 449 
448 /* Add a cluster to discard list and schedule     450 /* Add a cluster to discard list and schedule it to do discard */
449 static void swap_cluster_schedule_discard(stru    451 static void swap_cluster_schedule_discard(struct swap_info_struct *si,
450                 struct swap_cluster_info *ci)  !! 452                 unsigned int idx)
451 {                                                 453 {
452         unsigned int idx = cluster_index(si, c << 
453         /*                                        454         /*
454          * If scan_swap_map_slots() can't find    455          * If scan_swap_map_slots() can't find a free cluster, it will check
455          * si->swap_map directly. To make sure    456          * si->swap_map directly. To make sure the discarding cluster isn't
456          * taken by scan_swap_map_slots(), mar    457          * taken by scan_swap_map_slots(), mark the swap entries bad (occupied).
457          * It will be cleared after discard       458          * It will be cleared after discard
458          */                                       459          */
459         memset(si->swap_map + idx * SWAPFILE_C    460         memset(si->swap_map + idx * SWAPFILE_CLUSTER,
460                         SWAP_MAP_BAD, SWAPFILE    461                         SWAP_MAP_BAD, SWAPFILE_CLUSTER);
461                                                   462 
462         VM_BUG_ON(ci->flags & CLUSTER_FLAG_FRE !! 463         cluster_list_add_tail(&si->discard_clusters, si->cluster_info, idx);
463         list_move_tail(&ci->list, &si->discard !! 464 
464         ci->flags = 0;                         << 
465         schedule_work(&si->discard_work);         465         schedule_work(&si->discard_work);
466 }                                                 466 }
467                                                   467 
468 static void __free_cluster(struct swap_info_st !! 468 static void __free_cluster(struct swap_info_struct *si, unsigned long idx)
469 {                                                 469 {
470         lockdep_assert_held(&si->lock);        !! 470         struct swap_cluster_info *ci = si->cluster_info;
471         lockdep_assert_held(&ci->lock);        << 
472                                                   471 
473         if (ci->flags)                         !! 472         cluster_set_flag(ci + idx, CLUSTER_FLAG_FREE);
474                 list_move_tail(&ci->list, &si- !! 473         cluster_list_add_tail(&si->free_clusters, ci, idx);
475         else                                   << 
476                 list_add_tail(&ci->list, &si-> << 
477         ci->flags = CLUSTER_FLAG_FREE;         << 
478         ci->order = 0;                         << 
479 }                                                 474 }
480                                                   475 
481 /*                                                476 /*
482  * Doing discard actually. After a cluster dis    477  * Doing discard actually. After a cluster discard is finished, the cluster
483  * will be added to free cluster list. caller     478  * will be added to free cluster list. caller should hold si->lock.
484 */                                                479 */
485 static void swap_do_scheduled_discard(struct s    480 static void swap_do_scheduled_discard(struct swap_info_struct *si)
486 {                                                 481 {
487         struct swap_cluster_info *ci;          !! 482         struct swap_cluster_info *info, *ci;
488         unsigned int idx;                         483         unsigned int idx;
489                                                   484 
490         while (!list_empty(&si->discard_cluste !! 485         info = si->cluster_info;
491                 ci = list_first_entry(&si->dis !! 486 
492                 list_del(&ci->list);           !! 487         while (!cluster_list_empty(&si->discard_clusters)) {
493                 idx = cluster_index(si, ci);   !! 488                 idx = cluster_list_del_first(&si->discard_clusters, info);
494                 spin_unlock(&si->lock);           489                 spin_unlock(&si->lock);
495                                                   490 
496                 discard_swap_cluster(si, idx *    491                 discard_swap_cluster(si, idx * SWAPFILE_CLUSTER,
497                                 SWAPFILE_CLUST    492                                 SWAPFILE_CLUSTER);
498                                                   493 
499                 spin_lock(&si->lock);             494                 spin_lock(&si->lock);
500                 spin_lock(&ci->lock);          !! 495                 ci = lock_cluster(si, idx * SWAPFILE_CLUSTER);
501                 __free_cluster(si, ci);        !! 496                 __free_cluster(si, idx);
502                 memset(si->swap_map + idx * SW    497                 memset(si->swap_map + idx * SWAPFILE_CLUSTER,
503                                 0, SWAPFILE_CL    498                                 0, SWAPFILE_CLUSTER);
504                 spin_unlock(&ci->lock);        !! 499                 unlock_cluster(ci);
505         }                                         500         }
506 }                                                 501 }
507                                                   502 
508 static void swap_discard_work(struct work_stru    503 static void swap_discard_work(struct work_struct *work)
509 {                                                 504 {
510         struct swap_info_struct *si;              505         struct swap_info_struct *si;
511                                                   506 
512         si = container_of(work, struct swap_in    507         si = container_of(work, struct swap_info_struct, discard_work);
513                                                   508 
514         spin_lock(&si->lock);                     509         spin_lock(&si->lock);
515         swap_do_scheduled_discard(si);            510         swap_do_scheduled_discard(si);
516         spin_unlock(&si->lock);                   511         spin_unlock(&si->lock);
517 }                                                 512 }
518                                                   513 
519 static void swap_users_ref_free(struct percpu_    514 static void swap_users_ref_free(struct percpu_ref *ref)
520 {                                                 515 {
521         struct swap_info_struct *si;              516         struct swap_info_struct *si;
522                                                   517 
523         si = container_of(ref, struct swap_inf    518         si = container_of(ref, struct swap_info_struct, users);
524         complete(&si->comp);                      519         complete(&si->comp);
525 }                                                 520 }
526                                                   521 
527 static void free_cluster(struct swap_info_stru !! 522 static void alloc_cluster(struct swap_info_struct *si, unsigned long idx)
528 {                                                 523 {
529         VM_BUG_ON(ci->count != 0);             !! 524         struct swap_cluster_info *ci = si->cluster_info;
530         lockdep_assert_held(&si->lock);        << 
531         lockdep_assert_held(&ci->lock);        << 
532                                                   525 
533         if (ci->flags & CLUSTER_FLAG_FRAG)     !! 526         VM_BUG_ON(cluster_list_first(&si->free_clusters) != idx);
534                 si->frag_cluster_nr[ci->order] !! 527         cluster_list_del_first(&si->free_clusters, ci);
                                                   >> 528         cluster_set_count_flag(ci + idx, 0, 0);
                                                   >> 529 }
                                                   >> 530 
                                                   >> 531 static void free_cluster(struct swap_info_struct *si, unsigned long idx)
                                                   >> 532 {
                                                   >> 533         struct swap_cluster_info *ci = si->cluster_info + idx;
535                                                   534 
                                                   >> 535         VM_BUG_ON(cluster_count(ci) != 0);
536         /*                                        536         /*
537          * If the swap is discardable, prepare    537          * If the swap is discardable, prepare discard the cluster
538          * instead of free it immediately. The    538          * instead of free it immediately. The cluster will be freed
539          * after discard.                         539          * after discard.
540          */                                       540          */
541         if ((si->flags & (SWP_WRITEOK | SWP_PA    541         if ((si->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) ==
542             (SWP_WRITEOK | SWP_PAGE_DISCARD))     542             (SWP_WRITEOK | SWP_PAGE_DISCARD)) {
543                 swap_cluster_schedule_discard( !! 543                 swap_cluster_schedule_discard(si, idx);
544                 return;                           544                 return;
545         }                                         545         }
546                                                   546 
547         __free_cluster(si, ci);                !! 547         __free_cluster(si, idx);
548 }                                                 548 }
549                                                   549 
550 /*                                                550 /*
551  * The cluster corresponding to page_nr will b !! 551  * The cluster corresponding to page_nr will be used. The cluster will be
552  * added to free cluster list and its usage co !! 552  * removed from free cluster list and its usage counter will be increased.
553  * Only used for initialization.               << 
554  */                                               553  */
555 static void inc_cluster_info_page(struct swap_ !! 554 static void inc_cluster_info_page(struct swap_info_struct *p,
556         struct swap_cluster_info *cluster_info    555         struct swap_cluster_info *cluster_info, unsigned long page_nr)
557 {                                                 556 {
558         unsigned long idx = page_nr / SWAPFILE    557         unsigned long idx = page_nr / SWAPFILE_CLUSTER;
559         struct swap_cluster_info *ci;          << 
560                                                   558 
561         if (!cluster_info)                        559         if (!cluster_info)
562                 return;                           560                 return;
                                                   >> 561         if (cluster_is_free(&cluster_info[idx]))
                                                   >> 562                 alloc_cluster(p, idx);
563                                                   563 
564         ci = cluster_info + idx;               !! 564         VM_BUG_ON(cluster_count(&cluster_info[idx]) >= SWAPFILE_CLUSTER);
565         ci->count++;                           !! 565         cluster_set_count(&cluster_info[idx],
566                                                !! 566                 cluster_count(&cluster_info[idx]) + 1);
567         VM_BUG_ON(ci->count > SWAPFILE_CLUSTER << 
568         VM_BUG_ON(ci->flags);                  << 
569 }                                                 567 }
570                                                   568 
571 /*                                                569 /*
572  * The cluster ci decreases @nr_pages usage. I !! 570  * The cluster corresponding to page_nr decreases one usage. If the usage
573  * which means no page in the cluster is in us !! 571  * counter becomes 0, which means no page in the cluster is in using, we can
574  * the cluster and add it to free cluster list !! 572  * optionally discard the cluster and add it to free cluster list.
575  */                                               573  */
576 static void dec_cluster_info_page(struct swap_ !! 574 static void dec_cluster_info_page(struct swap_info_struct *p,
577                                   struct swap_ !! 575         struct swap_cluster_info *cluster_info, unsigned long page_nr)
578 {                                                 576 {
579         if (!si->cluster_info)                 !! 577         unsigned long idx = page_nr / SWAPFILE_CLUSTER;
580                 return;                        << 
581                                                << 
582         VM_BUG_ON(ci->count < nr_pages);       << 
583         VM_BUG_ON(cluster_is_free(ci));        << 
584         lockdep_assert_held(&si->lock);        << 
585         lockdep_assert_held(&ci->lock);        << 
586         ci->count -= nr_pages;                 << 
587                                                   578 
588         if (!ci->count) {                      !! 579         if (!cluster_info)
589                 free_cluster(si, ci);          << 
590                 return;                           580                 return;
591         }                                      << 
592                                                << 
593         if (!(ci->flags & CLUSTER_FLAG_NONFULL << 
594                 VM_BUG_ON(ci->flags & CLUSTER_ << 
595                 if (ci->flags & CLUSTER_FLAG_F << 
596                         si->frag_cluster_nr[ci << 
597                 list_move_tail(&ci->list, &si- << 
598                 ci->flags = CLUSTER_FLAG_NONFU << 
599         }                                      << 
600 }                                              << 
601                                                << 
602 static bool cluster_reclaim_range(struct swap_ << 
603                                   struct swap_ << 
604                                   unsigned lon << 
605 {                                              << 
606         unsigned char *map = si->swap_map;     << 
607         unsigned long offset;                  << 
608                                                   581 
609         spin_unlock(&ci->lock);                !! 582         VM_BUG_ON(cluster_count(&cluster_info[idx]) == 0);
610         spin_unlock(&si->lock);                !! 583         cluster_set_count(&cluster_info[idx],
                                                   >> 584                 cluster_count(&cluster_info[idx]) - 1);
611                                                   585 
612         for (offset = start; offset < end; off !! 586         if (cluster_count(&cluster_info[idx]) == 0)
613                 switch (READ_ONCE(map[offset]) !! 587                 free_cluster(p, idx);
614                 case 0:                        << 
615                         continue;              << 
616                 case SWAP_HAS_CACHE:           << 
617                         if (__try_to_reclaim_s << 
618                                 continue;      << 
619                         goto out;              << 
620                 default:                       << 
621                         goto out;              << 
622                 }                              << 
623         }                                      << 
624 out:                                           << 
625         spin_lock(&si->lock);                  << 
626         spin_lock(&ci->lock);                  << 
627                                                << 
628         /*                                     << 
629          * Recheck the range no matter reclaim << 
630          * could have been be freed while we a << 
631          */                                    << 
632         for (offset = start; offset < end; off << 
633                 if (READ_ONCE(map[offset]))    << 
634                         return false;          << 
635                                                << 
636         return true;                           << 
637 }                                                 588 }
638                                                   589 
639 static bool cluster_scan_range(struct swap_inf !! 590 /*
640                                struct swap_clu !! 591  * It's possible scan_swap_map_slots() uses a free cluster in the middle of free
641                                unsigned long s !! 592  * cluster list. Avoiding such abuse to avoid list corruption.
                                                   >> 593  */
                                                   >> 594 static bool
                                                   >> 595 scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
                                                   >> 596         unsigned long offset)
642 {                                                 597 {
643         unsigned long offset, end = start + nr !! 598         struct percpu_cluster *percpu_cluster;
644         unsigned char *map = si->swap_map;     !! 599         bool conflict;
645         bool need_reclaim = false;             << 
646                                                   600 
647         for (offset = start; offset < end; off !! 601         offset /= SWAPFILE_CLUSTER;
648                 switch (READ_ONCE(map[offset]) !! 602         conflict = !cluster_list_empty(&si->free_clusters) &&
649                 case 0:                        !! 603                 offset != cluster_list_first(&si->free_clusters) &&
650                         continue;              !! 604                 cluster_is_free(&si->cluster_info[offset]);
651                 case SWAP_HAS_CACHE:           << 
652                         if (!vm_swap_full())   << 
653                                 return false;  << 
654                         need_reclaim = true;   << 
655                         continue;              << 
656                 default:                       << 
657                         return false;          << 
658                 }                              << 
659         }                                      << 
660                                                   605 
661         if (need_reclaim)                      !! 606         if (!conflict)
662                 return cluster_reclaim_range(s !! 607                 return false;
663                                                   608 
                                                   >> 609         percpu_cluster = this_cpu_ptr(si->percpu_cluster);
                                                   >> 610         cluster_set_null(&percpu_cluster->index);
664         return true;                              611         return true;
665 }                                                 612 }
666                                                   613 
667 static void cluster_alloc_range(struct swap_in << 
668                                 unsigned int s << 
669                                 unsigned int o << 
670 {                                              << 
671         unsigned int nr_pages = 1 << order;    << 
672                                                << 
673         if (cluster_is_free(ci)) {             << 
674                 if (nr_pages < SWAPFILE_CLUSTE << 
675                         list_move_tail(&ci->li << 
676                         ci->flags = CLUSTER_FL << 
677                 }                              << 
678                 ci->order = order;             << 
679         }                                      << 
680                                                << 
681         memset(si->swap_map + start, usage, nr << 
682         swap_range_alloc(si, start, nr_pages); << 
683         ci->count += nr_pages;                 << 
684                                                << 
685         if (ci->count == SWAPFILE_CLUSTER) {   << 
686                 VM_BUG_ON(!(ci->flags &        << 
687                           (CLUSTER_FLAG_FREE | << 
688                 if (ci->flags & CLUSTER_FLAG_F << 
689                         si->frag_cluster_nr[ci << 
690                 list_move_tail(&ci->list, &si- << 
691                 ci->flags = CLUSTER_FLAG_FULL; << 
692         }                                      << 
693 }                                              << 
694                                                << 
695 static unsigned int alloc_swap_scan_cluster(st << 
696                                             un << 
697                                             un << 
698 {                                              << 
699         unsigned long start = offset & ~(SWAPF << 
700         unsigned long end = min(start + SWAPFI << 
701         unsigned int nr_pages = 1 << order;    << 
702         struct swap_cluster_info *ci;          << 
703                                                << 
704         if (end < nr_pages)                    << 
705                 return SWAP_NEXT_INVALID;      << 
706         end -= nr_pages;                       << 
707                                                << 
708         ci = lock_cluster(si, offset);         << 
709         if (ci->count + nr_pages > SWAPFILE_CL << 
710                 offset = SWAP_NEXT_INVALID;    << 
711                 goto done;                     << 
712         }                                      << 
713                                                << 
714         while (offset <= end) {                << 
715                 if (cluster_scan_range(si, ci, << 
716                         cluster_alloc_range(si << 
717                         *foundp = offset;      << 
718                         if (ci->count == SWAPF << 
719                                 offset = SWAP_ << 
720                                 goto done;     << 
721                         }                      << 
722                         offset += nr_pages;    << 
723                         break;                 << 
724                 }                              << 
725                 offset += nr_pages;            << 
726         }                                      << 
727         if (offset > end)                      << 
728                 offset = SWAP_NEXT_INVALID;    << 
729 done:                                          << 
730         unlock_cluster(ci);                    << 
731         return offset;                         << 
732 }                                              << 
733                                                << 
734 /* Return true if reclaimed a whole cluster */ << 
735 static void swap_reclaim_full_clusters(struct  << 
736 {                                              << 
737         long to_scan = 1;                      << 
738         unsigned long offset, end;             << 
739         struct swap_cluster_info *ci;          << 
740         unsigned char *map = si->swap_map;     << 
741         int nr_reclaim;                        << 
742                                                << 
743         if (force)                             << 
744                 to_scan = si->inuse_pages / SW << 
745                                                << 
746         while (!list_empty(&si->full_clusters) << 
747                 ci = list_first_entry(&si->ful << 
748                 list_move_tail(&ci->list, &si- << 
749                 offset = cluster_offset(si, ci << 
750                 end = min(si->max, offset + SW << 
751                 to_scan--;                     << 
752                                                << 
753                 spin_unlock(&si->lock);        << 
754                 while (offset < end) {         << 
755                         if (READ_ONCE(map[offs << 
756                                 nr_reclaim = _ << 
757                                                << 
758                                 if (nr_reclaim << 
759                                         offset << 
760                                         contin << 
761                                 }              << 
762                         }                      << 
763                         offset++;              << 
764                 }                              << 
765                 spin_lock(&si->lock);          << 
766                                                << 
767                 if (to_scan <= 0)              << 
768                         break;                 << 
769         }                                      << 
770 }                                              << 
771                                                << 
772 static void swap_reclaim_work(struct work_stru << 
773 {                                              << 
774         struct swap_info_struct *si;           << 
775                                                << 
776         si = container_of(work, struct swap_in << 
777                                                << 
778         spin_lock(&si->lock);                  << 
779         swap_reclaim_full_clusters(si, true);  << 
780         spin_unlock(&si->lock);                << 
781 }                                              << 
782                                                << 
783 /*                                                614 /*
784  * Try to get swap entries with specified orde !! 615  * Try to get a swap entry from current cpu's swap entry pool (a cluster). This
785  * pool (a cluster). This might involve alloca !! 616  * might involve allocating a new cluster for current CPU too.
786  * too.                                        << 
787  */                                               617  */
788 static unsigned long cluster_alloc_swap_entry( !! 618 static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
789                                                !! 619         unsigned long *offset, unsigned long *scan_base)
790 {                                                 620 {
791         struct percpu_cluster *cluster;           621         struct percpu_cluster *cluster;
792         struct swap_cluster_info *ci;             622         struct swap_cluster_info *ci;
793         unsigned int offset, found = 0;        !! 623         unsigned long tmp, max;
794                                                   624 
795 new_cluster:                                      625 new_cluster:
796         lockdep_assert_held(&si->lock);        << 
797         cluster = this_cpu_ptr(si->percpu_clus    626         cluster = this_cpu_ptr(si->percpu_cluster);
798         offset = cluster->next[order];         !! 627         if (cluster_is_null(&cluster->index)) {
799         if (offset) {                          !! 628                 if (!cluster_list_empty(&si->free_clusters)) {
800                 offset = alloc_swap_scan_clust !! 629                         cluster->index = si->free_clusters.head;
801                 if (found)                     !! 630                         cluster->next = cluster_next(&cluster->index) *
802                         goto done;             !! 631                                         SWAPFILE_CLUSTER;
803         }                                      !! 632                 } else if (!cluster_list_empty(&si->discard_clusters)) {
804                                                !! 633                         /*
805         if (!list_empty(&si->free_clusters)) { !! 634                          * we don't have free cluster but have some clusters in
806                 ci = list_first_entry(&si->fre !! 635                          * discarding, do discard now and reclaim them, then
807                 offset = alloc_swap_scan_clust !! 636                          * reread cluster_next_cpu since we dropped si->lock
808                 VM_BUG_ON(!found);             !! 637                          */
809                 goto done;                     !! 638                         swap_do_scheduled_discard(si);
                                                   >> 639                         *scan_base = this_cpu_read(*si->cluster_next_cpu);
                                                   >> 640                         *offset = *scan_base;
                                                   >> 641                         goto new_cluster;
                                                   >> 642                 } else
                                                   >> 643                         return false;
810         }                                         644         }
811                                                   645 
812         /* Try reclaim from full clusters if f !! 646         /*
813         if (vm_swap_full())                    !! 647          * Other CPUs can use our cluster if they can't find a free cluster,
814                 swap_reclaim_full_clusters(si, !! 648          * check if there is still free entry in the cluster
815                                                !! 649          */
816         if (order < PMD_ORDER) {               !! 650         tmp = cluster->next;
817                 unsigned int frags = 0;        !! 651         max = min_t(unsigned long, si->max,
818                                                !! 652                     (cluster_next(&cluster->index) + 1) * SWAPFILE_CLUSTER);
819                 while (!list_empty(&si->nonful !! 653         if (tmp < max) {
820                         ci = list_first_entry( !! 654                 ci = lock_cluster(si, tmp);
821                                                !! 655                 while (tmp < max) {
822                         list_move_tail(&ci->li !! 656                         if (!si->swap_map[tmp])
823                         ci->flags = CLUSTER_FL << 
824                         si->frag_cluster_nr[or << 
825                         offset = alloc_swap_sc << 
826                                                << 
827                         frags++;               << 
828                         if (found)             << 
829                                 break;            657                                 break;
                                                   >> 658                         tmp++;
830                 }                                 659                 }
831                                                !! 660                 unlock_cluster(ci);
832                 if (!found) {                  << 
833                         /*                     << 
834                          * Nonfull clusters ar << 
835                          * here, count them to << 
836                          */                    << 
837                         while (frags < si->fra << 
838                                 ci = list_firs << 
839                                                << 
840                                 /*             << 
841                                  * Rotate the  << 
842                                  * high order  << 
843                                  * this help k << 
844                                  */            << 
845                                 list_move_tail << 
846                                 offset = alloc << 
847                                                << 
848                                 frags++;       << 
849                                 if (found)     << 
850                                         break; << 
851                         }                      << 
852                 }                              << 
853         }                                         661         }
854                                                !! 662         if (tmp >= max) {
855         if (found)                             !! 663                 cluster_set_null(&cluster->index);
856                 goto done;                     << 
857                                                << 
858         if (!list_empty(&si->discard_clusters) << 
859                 /*                             << 
860                  * we don't have free cluster  << 
861                  * discarding, do discard now  << 
862                  * reread cluster_next_cpu sin << 
863                  */                            << 
864                 swap_do_scheduled_discard(si); << 
865                 goto new_cluster;                 664                 goto new_cluster;
866         }                                         665         }
867                                                !! 666         cluster->next = tmp + 1;
868         if (order)                             !! 667         *offset = tmp;
869                 goto done;                     !! 668         *scan_base = tmp;
870                                                !! 669         return true;
871         /* Order 0 stealing from higher order  << 
872         for (int o = 1; o < SWAP_NR_ORDERS; o+ << 
873                 /*                             << 
874                  * Clusters here have at least << 
875                  * allocation, but reclaim may << 
876                  */                            << 
877                 while (!list_empty(&si->frag_c << 
878                         ci = list_first_entry( << 
879                                                << 
880                         offset = alloc_swap_sc << 
881                                                << 
882                         if (found)             << 
883                                 goto done;     << 
884                 }                              << 
885                                                << 
886                 while (!list_empty(&si->nonful << 
887                         ci = list_first_entry( << 
888                                                << 
889                         offset = alloc_swap_sc << 
890                                                << 
891                         if (found)             << 
892                                 goto done;     << 
893                 }                              << 
894         }                                      << 
895                                                << 
896 done:                                          << 
897         cluster->next[order] = offset;         << 
898         return found;                          << 
899 }                                                 670 }
900                                                   671 
901 static void __del_from_avail_list(struct swap_ !! 672 static void __del_from_avail_list(struct swap_info_struct *p)
902 {                                                 673 {
903         int nid;                                  674         int nid;
904                                                   675 
905         assert_spin_locked(&si->lock);         !! 676         assert_spin_locked(&p->lock);
906         for_each_node(nid)                        677         for_each_node(nid)
907                 plist_del(&si->avail_lists[nid !! 678                 plist_del(&p->avail_lists[nid], &swap_avail_heads[nid]);
908 }                                                 679 }
909                                                   680 
910 static void del_from_avail_list(struct swap_in !! 681 static void del_from_avail_list(struct swap_info_struct *p)
911 {                                                 682 {
912         spin_lock(&swap_avail_lock);              683         spin_lock(&swap_avail_lock);
913         __del_from_avail_list(si);             !! 684         __del_from_avail_list(p);
914         spin_unlock(&swap_avail_lock);            685         spin_unlock(&swap_avail_lock);
915 }                                                 686 }
916                                                   687 
917 static void swap_range_alloc(struct swap_info_    688 static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset,
918                              unsigned int nr_e    689                              unsigned int nr_entries)
919 {                                                 690 {
920         unsigned int end = offset + nr_entries    691         unsigned int end = offset + nr_entries - 1;
921                                                   692 
922         if (offset == si->lowest_bit)             693         if (offset == si->lowest_bit)
923                 si->lowest_bit += nr_entries;     694                 si->lowest_bit += nr_entries;
924         if (end == si->highest_bit)               695         if (end == si->highest_bit)
925                 WRITE_ONCE(si->highest_bit, si    696                 WRITE_ONCE(si->highest_bit, si->highest_bit - nr_entries);
926         WRITE_ONCE(si->inuse_pages, si->inuse_ !! 697         si->inuse_pages += nr_entries;
927         if (si->inuse_pages == si->pages) {       698         if (si->inuse_pages == si->pages) {
928                 si->lowest_bit = si->max;         699                 si->lowest_bit = si->max;
929                 si->highest_bit = 0;              700                 si->highest_bit = 0;
930                 del_from_avail_list(si);          701                 del_from_avail_list(si);
931                                                << 
932                 if (vm_swap_full())            << 
933                         schedule_work(&si->rec << 
934         }                                         702         }
935 }                                                 703 }
936                                                   704 
937 static void add_to_avail_list(struct swap_info !! 705 static void add_to_avail_list(struct swap_info_struct *p)
938 {                                                 706 {
939         int nid;                                  707         int nid;
940                                                   708 
941         spin_lock(&swap_avail_lock);              709         spin_lock(&swap_avail_lock);
942         for_each_node(nid)                     !! 710         for_each_node(nid) {
943                 plist_add(&si->avail_lists[nid !! 711                 WARN_ON(!plist_node_empty(&p->avail_lists[nid]));
                                                   >> 712                 plist_add(&p->avail_lists[nid], &swap_avail_heads[nid]);
                                                   >> 713         }
944         spin_unlock(&swap_avail_lock);            714         spin_unlock(&swap_avail_lock);
945 }                                                 715 }
946                                                   716 
947 static void swap_range_free(struct swap_info_s    717 static void swap_range_free(struct swap_info_struct *si, unsigned long offset,
948                             unsigned int nr_en    718                             unsigned int nr_entries)
949 {                                                 719 {
950         unsigned long begin = offset;             720         unsigned long begin = offset;
951         unsigned long end = offset + nr_entrie    721         unsigned long end = offset + nr_entries - 1;
952         void (*swap_slot_free_notify)(struct b    722         void (*swap_slot_free_notify)(struct block_device *, unsigned long);
953         unsigned int i;                        << 
954                                                << 
955         /*                                     << 
956          * Use atomic clear_bit operations onl << 
957          * bitmap_clear to prevent adjacent bi << 
958          */                                    << 
959         for (i = 0; i < nr_entries; i++)       << 
960                 clear_bit(offset + i, si->zero << 
961                                                   723 
962         if (offset < si->lowest_bit)              724         if (offset < si->lowest_bit)
963                 si->lowest_bit = offset;          725                 si->lowest_bit = offset;
964         if (end > si->highest_bit) {              726         if (end > si->highest_bit) {
965                 bool was_full = !si->highest_b    727                 bool was_full = !si->highest_bit;
966                                                   728 
967                 WRITE_ONCE(si->highest_bit, en    729                 WRITE_ONCE(si->highest_bit, end);
968                 if (was_full && (si->flags & S    730                 if (was_full && (si->flags & SWP_WRITEOK))
969                         add_to_avail_list(si);    731                         add_to_avail_list(si);
970         }                                         732         }
                                                   >> 733         atomic_long_add(nr_entries, &nr_swap_pages);
                                                   >> 734         si->inuse_pages -= nr_entries;
971         if (si->flags & SWP_BLKDEV)               735         if (si->flags & SWP_BLKDEV)
972                 swap_slot_free_notify =           736                 swap_slot_free_notify =
973                         si->bdev->bd_disk->fop    737                         si->bdev->bd_disk->fops->swap_slot_free_notify;
974         else                                      738         else
975                 swap_slot_free_notify = NULL;     739                 swap_slot_free_notify = NULL;
976         while (offset <= end) {                   740         while (offset <= end) {
977                 arch_swap_invalidate_page(si->    741                 arch_swap_invalidate_page(si->type, offset);
                                                   >> 742                 frontswap_invalidate_page(si->type, offset);
978                 if (swap_slot_free_notify)        743                 if (swap_slot_free_notify)
979                         swap_slot_free_notify(    744                         swap_slot_free_notify(si->bdev, offset);
980                 offset++;                         745                 offset++;
981         }                                         746         }
982         clear_shadow_from_swap_cache(si->type,    747         clear_shadow_from_swap_cache(si->type, begin, end);
983                                                << 
984         /*                                     << 
985          * Make sure that try_to_unuse() obser << 
986          * only after the above cleanups are d << 
987          */                                    << 
988         smp_wmb();                             << 
989         atomic_long_add(nr_entries, &nr_swap_p << 
990         WRITE_ONCE(si->inuse_pages, si->inuse_ << 
991 }                                                 748 }
992                                                   749 
993 static void set_cluster_next(struct swap_info_    750 static void set_cluster_next(struct swap_info_struct *si, unsigned long next)
994 {                                                 751 {
995         unsigned long prev;                       752         unsigned long prev;
996                                                   753 
997         if (!(si->flags & SWP_SOLIDSTATE)) {      754         if (!(si->flags & SWP_SOLIDSTATE)) {
998                 si->cluster_next = next;          755                 si->cluster_next = next;
999                 return;                           756                 return;
1000         }                                        757         }
1001                                                  758 
1002         prev = this_cpu_read(*si->cluster_nex    759         prev = this_cpu_read(*si->cluster_next_cpu);
1003         /*                                       760         /*
1004          * Cross the swap address space size     761          * Cross the swap address space size aligned trunk, choose
1005          * another trunk randomly to avoid lo    762          * another trunk randomly to avoid lock contention on swap
1006          * address space if possible.            763          * address space if possible.
1007          */                                      764          */
1008         if ((prev >> SWAP_ADDRESS_SPACE_SHIFT    765         if ((prev >> SWAP_ADDRESS_SPACE_SHIFT) !=
1009             (next >> SWAP_ADDRESS_SPACE_SHIFT    766             (next >> SWAP_ADDRESS_SPACE_SHIFT)) {
1010                 /* No free swap slots availab    767                 /* No free swap slots available */
1011                 if (si->highest_bit <= si->lo    768                 if (si->highest_bit <= si->lowest_bit)
1012                         return;                  769                         return;
1013                 next = get_random_u32_inclusi !! 770                 next = si->lowest_bit +
                                                   >> 771                         prandom_u32_max(si->highest_bit - si->lowest_bit + 1);
1014                 next = ALIGN_DOWN(next, SWAP_    772                 next = ALIGN_DOWN(next, SWAP_ADDRESS_SPACE_PAGES);
1015                 next = max_t(unsigned int, ne    773                 next = max_t(unsigned int, next, si->lowest_bit);
1016         }                                        774         }
1017         this_cpu_write(*si->cluster_next_cpu,    775         this_cpu_write(*si->cluster_next_cpu, next);
1018 }                                                776 }
1019                                                  777 
1020 static bool swap_offset_available_and_locked( << 
1021                                               << 
1022 {                                             << 
1023         if (data_race(!si->swap_map[offset])) << 
1024                 spin_lock(&si->lock);         << 
1025                 return true;                  << 
1026         }                                     << 
1027                                               << 
1028         if (vm_swap_full() && READ_ONCE(si->s << 
1029                 spin_lock(&si->lock);         << 
1030                 return true;                  << 
1031         }                                     << 
1032                                               << 
1033         return false;                         << 
1034 }                                             << 
1035                                               << 
1036 static int cluster_alloc_swap(struct swap_inf << 
1037                              unsigned char us << 
1038                              swp_entry_t slot << 
1039 {                                             << 
1040         int n_ret = 0;                        << 
1041                                               << 
1042         VM_BUG_ON(!si->cluster_info);         << 
1043                                               << 
1044         while (n_ret < nr) {                  << 
1045                 unsigned long offset = cluste << 
1046                                               << 
1047                 if (!offset)                  << 
1048                         break;                << 
1049                 slots[n_ret++] = swp_entry(si << 
1050         }                                     << 
1051                                               << 
1052         return n_ret;                         << 
1053 }                                             << 
1054                                               << 
1055 static int scan_swap_map_slots(struct swap_in    778 static int scan_swap_map_slots(struct swap_info_struct *si,
1056                                unsigned char     779                                unsigned char usage, int nr,
1057                                swp_entry_t sl !! 780                                swp_entry_t slots[])
1058 {                                                781 {
                                                   >> 782         struct swap_cluster_info *ci;
1059         unsigned long offset;                    783         unsigned long offset;
1060         unsigned long scan_base;                 784         unsigned long scan_base;
1061         unsigned long last_in_cluster = 0;       785         unsigned long last_in_cluster = 0;
1062         int latency_ration = LATENCY_LIMIT;      786         int latency_ration = LATENCY_LIMIT;
1063         unsigned int nr_pages = 1 << order;   << 
1064         int n_ret = 0;                           787         int n_ret = 0;
1065         bool scanned_many = false;               788         bool scanned_many = false;
1066                                                  789 
1067         /*                                       790         /*
1068          * We try to cluster swap pages by al    791          * We try to cluster swap pages by allocating them sequentially
1069          * in swap.  Once we've allocated SWA    792          * in swap.  Once we've allocated SWAPFILE_CLUSTER pages this
1070          * way, however, we resort to first-f    793          * way, however, we resort to first-free allocation, starting
1071          * a new cluster.  This prevents us f    794          * a new cluster.  This prevents us from scattering swap pages
1072          * all over the entire swap partition    795          * all over the entire swap partition, so that we reduce
1073          * overall disk seek times between sw    796          * overall disk seek times between swap pages.  -- sct
1074          * But we do now try to find an empty    797          * But we do now try to find an empty cluster.  -Andrea
1075          * And we let swap pages go all over     798          * And we let swap pages go all over an SSD partition.  Hugh
1076          */                                      799          */
1077                                                  800 
1078         if (order > 0) {                      << 
1079                 /*                            << 
1080                  * Should not even be attempt << 
1081                  * page swap is disabled.  Wa << 
1082                  */                           << 
1083                 if (!IS_ENABLED(CONFIG_THP_SW << 
1084                     nr_pages > SWAPFILE_CLUST << 
1085                         VM_WARN_ON_ONCE(1);   << 
1086                         return 0;             << 
1087                 }                             << 
1088                                               << 
1089                 /*                            << 
1090                  * Swapfile is not block devi << 
1091                  * to allocate large entries. << 
1092                  */                           << 
1093                 if (!(si->flags & SWP_BLKDEV) << 
1094                         return 0;             << 
1095         }                                     << 
1096                                               << 
1097         if (si->cluster_info)                 << 
1098                 return cluster_alloc_swap(si, << 
1099                                               << 
1100         si->flags += SWP_SCANNING;               801         si->flags += SWP_SCANNING;
1101                                               !! 802         /*
1102         /* For HDD, sequential access is more !! 803          * Use percpu scan base for SSD to reduce lock contention on
1103         scan_base = si->cluster_next;         !! 804          * cluster and swap cache.  For HDD, sequential access is more
                                                   >> 805          * important.
                                                   >> 806          */
                                                   >> 807         if (si->flags & SWP_SOLIDSTATE)
                                                   >> 808                 scan_base = this_cpu_read(*si->cluster_next_cpu);
                                                   >> 809         else
                                                   >> 810                 scan_base = si->cluster_next;
1104         offset = scan_base;                      811         offset = scan_base;
1105                                                  812 
1106         if (unlikely(!si->cluster_nr--)) {    !! 813         /* SSD algorithm */
                                                   >> 814         if (si->cluster_info) {
                                                   >> 815                 if (!scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
                                                   >> 816                         goto scan;
                                                   >> 817         } else if (unlikely(!si->cluster_nr--)) {
1107                 if (si->pages - si->inuse_pag    818                 if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) {
1108                         si->cluster_nr = SWAP    819                         si->cluster_nr = SWAPFILE_CLUSTER - 1;
1109                         goto checks;             820                         goto checks;
1110                 }                                821                 }
1111                                                  822 
1112                 spin_unlock(&si->lock);          823                 spin_unlock(&si->lock);
1113                                                  824 
1114                 /*                               825                 /*
1115                  * If seek is expensive, star    826                  * If seek is expensive, start searching for new cluster from
1116                  * start of partition, to min    827                  * start of partition, to minimize the span of allocated swap.
                                                   >> 828                  * If seek is cheap, that is the SWP_SOLIDSTATE si->cluster_info
                                                   >> 829                  * case, just handled by scan_swap_map_try_ssd_cluster() above.
1117                  */                              830                  */
1118                 scan_base = offset = si->lowe    831                 scan_base = offset = si->lowest_bit;
1119                 last_in_cluster = offset + SW    832                 last_in_cluster = offset + SWAPFILE_CLUSTER - 1;
1120                                                  833 
1121                 /* Locate the first empty (un    834                 /* Locate the first empty (unaligned) cluster */
1122                 for (; last_in_cluster <= REA !! 835                 for (; last_in_cluster <= si->highest_bit; offset++) {
1123                         if (si->swap_map[offs    836                         if (si->swap_map[offset])
1124                                 last_in_clust    837                                 last_in_cluster = offset + SWAPFILE_CLUSTER;
1125                         else if (offset == la    838                         else if (offset == last_in_cluster) {
1126                                 spin_lock(&si    839                                 spin_lock(&si->lock);
1127                                 offset -= SWA    840                                 offset -= SWAPFILE_CLUSTER - 1;
1128                                 si->cluster_n    841                                 si->cluster_next = offset;
1129                                 si->cluster_n    842                                 si->cluster_nr = SWAPFILE_CLUSTER - 1;
1130                                 goto checks;     843                                 goto checks;
1131                         }                        844                         }
1132                         if (unlikely(--latenc    845                         if (unlikely(--latency_ration < 0)) {
1133                                 cond_resched(    846                                 cond_resched();
1134                                 latency_ratio    847                                 latency_ration = LATENCY_LIMIT;
1135                         }                        848                         }
1136                 }                                849                 }
1137                                                  850 
1138                 offset = scan_base;              851                 offset = scan_base;
1139                 spin_lock(&si->lock);            852                 spin_lock(&si->lock);
1140                 si->cluster_nr = SWAPFILE_CLU    853                 si->cluster_nr = SWAPFILE_CLUSTER - 1;
1141         }                                        854         }
1142                                                  855 
1143 checks:                                          856 checks:
                                                   >> 857         if (si->cluster_info) {
                                                   >> 858                 while (scan_swap_map_ssd_cluster_conflict(si, offset)) {
                                                   >> 859                 /* take a break if we already got some slots */
                                                   >> 860                         if (n_ret)
                                                   >> 861                                 goto done;
                                                   >> 862                         if (!scan_swap_map_try_ssd_cluster(si, &offset,
                                                   >> 863                                                         &scan_base))
                                                   >> 864                                 goto scan;
                                                   >> 865                 }
                                                   >> 866         }
1144         if (!(si->flags & SWP_WRITEOK))          867         if (!(si->flags & SWP_WRITEOK))
1145                 goto no_page;                    868                 goto no_page;
1146         if (!si->highest_bit)                    869         if (!si->highest_bit)
1147                 goto no_page;                    870                 goto no_page;
1148         if (offset > si->highest_bit)            871         if (offset > si->highest_bit)
1149                 scan_base = offset = si->lowe    872                 scan_base = offset = si->lowest_bit;
1150                                                  873 
                                                   >> 874         ci = lock_cluster(si, offset);
1151         /* reuse swap entry of cache-only swa    875         /* reuse swap entry of cache-only swap if not busy. */
1152         if (vm_swap_full() && si->swap_map[of    876         if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
1153                 int swap_was_freed;              877                 int swap_was_freed;
                                                   >> 878                 unlock_cluster(ci);
1154                 spin_unlock(&si->lock);          879                 spin_unlock(&si->lock);
1155                 swap_was_freed = __try_to_rec !! 880                 swap_was_freed = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY);
1156                 spin_lock(&si->lock);            881                 spin_lock(&si->lock);
1157                 /* entry was freed successful    882                 /* entry was freed successfully, try to use this again */
1158                 if (swap_was_freed > 0)       !! 883                 if (swap_was_freed)
1159                         goto checks;             884                         goto checks;
1160                 goto scan; /* check next one     885                 goto scan; /* check next one */
1161         }                                        886         }
1162                                                  887 
1163         if (si->swap_map[offset]) {              888         if (si->swap_map[offset]) {
                                                   >> 889                 unlock_cluster(ci);
1164                 if (!n_ret)                      890                 if (!n_ret)
1165                         goto scan;               891                         goto scan;
1166                 else                             892                 else
1167                         goto done;               893                         goto done;
1168         }                                        894         }
1169         memset(si->swap_map + offset, usage,  !! 895         WRITE_ONCE(si->swap_map[offset], usage);
                                                   >> 896         inc_cluster_info_page(si, si->cluster_info, offset);
                                                   >> 897         unlock_cluster(ci);
1170                                                  898 
1171         swap_range_alloc(si, offset, nr_pages !! 899         swap_range_alloc(si, offset, 1);
1172         slots[n_ret++] = swp_entry(si->type,     900         slots[n_ret++] = swp_entry(si->type, offset);
1173                                                  901 
1174         /* got enough slots or reach max slot    902         /* got enough slots or reach max slots? */
1175         if ((n_ret == nr) || (offset >= si->h    903         if ((n_ret == nr) || (offset >= si->highest_bit))
1176                 goto done;                       904                 goto done;
1177                                                  905 
1178         /* search for next available slot */     906         /* search for next available slot */
1179                                                  907 
1180         /* time to take a break? */              908         /* time to take a break? */
1181         if (unlikely(--latency_ration < 0)) {    909         if (unlikely(--latency_ration < 0)) {
1182                 if (n_ret)                       910                 if (n_ret)
1183                         goto done;               911                         goto done;
1184                 spin_unlock(&si->lock);          912                 spin_unlock(&si->lock);
1185                 cond_resched();                  913                 cond_resched();
1186                 spin_lock(&si->lock);            914                 spin_lock(&si->lock);
1187                 latency_ration = LATENCY_LIMI    915                 latency_ration = LATENCY_LIMIT;
1188         }                                        916         }
1189                                                  917 
1190         if (si->cluster_nr && !si->swap_map[+ !! 918         /* try to get more slots in cluster */
                                                   >> 919         if (si->cluster_info) {
                                                   >> 920                 if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
                                                   >> 921                         goto checks;
                                                   >> 922         } else if (si->cluster_nr && !si->swap_map[++offset]) {
1191                 /* non-ssd case, still more s    923                 /* non-ssd case, still more slots in cluster? */
1192                 --si->cluster_nr;                924                 --si->cluster_nr;
1193                 goto checks;                     925                 goto checks;
1194         }                                        926         }
1195                                                  927 
1196         /*                                       928         /*
1197          * Even if there's no free clusters a    929          * Even if there's no free clusters available (fragmented),
1198          * try to scan a little more quickly     930          * try to scan a little more quickly with lock held unless we
1199          * have scanned too many slots alread    931          * have scanned too many slots already.
1200          */                                      932          */
1201         if (!scanned_many) {                     933         if (!scanned_many) {
1202                 unsigned long scan_limit;        934                 unsigned long scan_limit;
1203                                                  935 
1204                 if (offset < scan_base)          936                 if (offset < scan_base)
1205                         scan_limit = scan_bas    937                         scan_limit = scan_base;
1206                 else                             938                 else
1207                         scan_limit = si->high    939                         scan_limit = si->highest_bit;
1208                 for (; offset <= scan_limit &    940                 for (; offset <= scan_limit && --latency_ration > 0;
1209                      offset++) {                 941                      offset++) {
1210                         if (!si->swap_map[off    942                         if (!si->swap_map[offset])
1211                                 goto checks;     943                                 goto checks;
1212                 }                                944                 }
1213         }                                        945         }
1214                                                  946 
1215 done:                                            947 done:
1216         if (order == 0)                       !! 948         set_cluster_next(si, offset + 1);
1217                 set_cluster_next(si, offset + << 
1218         si->flags -= SWP_SCANNING;               949         si->flags -= SWP_SCANNING;
1219         return n_ret;                            950         return n_ret;
1220                                                  951 
1221 scan:                                            952 scan:
1222         VM_WARN_ON(order > 0);                << 
1223         spin_unlock(&si->lock);                  953         spin_unlock(&si->lock);
1224         while (++offset <= READ_ONCE(si->high    954         while (++offset <= READ_ONCE(si->highest_bit)) {
                                                   >> 955                 if (data_race(!si->swap_map[offset])) {
                                                   >> 956                         spin_lock(&si->lock);
                                                   >> 957                         goto checks;
                                                   >> 958                 }
                                                   >> 959                 if (vm_swap_full() &&
                                                   >> 960                     READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) {
                                                   >> 961                         spin_lock(&si->lock);
                                                   >> 962                         goto checks;
                                                   >> 963                 }
1225                 if (unlikely(--latency_ration    964                 if (unlikely(--latency_ration < 0)) {
1226                         cond_resched();          965                         cond_resched();
1227                         latency_ration = LATE    966                         latency_ration = LATENCY_LIMIT;
1228                         scanned_many = true;     967                         scanned_many = true;
1229                 }                                968                 }
1230                 if (swap_offset_available_and << 
1231                         goto checks;          << 
1232         }                                        969         }
1233         offset = si->lowest_bit;                 970         offset = si->lowest_bit;
1234         while (offset < scan_base) {             971         while (offset < scan_base) {
                                                   >> 972                 if (data_race(!si->swap_map[offset])) {
                                                   >> 973                         spin_lock(&si->lock);
                                                   >> 974                         goto checks;
                                                   >> 975                 }
                                                   >> 976                 if (vm_swap_full() &&
                                                   >> 977                     READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) {
                                                   >> 978                         spin_lock(&si->lock);
                                                   >> 979                         goto checks;
                                                   >> 980                 }
1235                 if (unlikely(--latency_ration    981                 if (unlikely(--latency_ration < 0)) {
1236                         cond_resched();          982                         cond_resched();
1237                         latency_ration = LATE    983                         latency_ration = LATENCY_LIMIT;
1238                         scanned_many = true;     984                         scanned_many = true;
1239                 }                                985                 }
1240                 if (swap_offset_available_and << 
1241                         goto checks;          << 
1242                 offset++;                        986                 offset++;
1243         }                                        987         }
1244         spin_lock(&si->lock);                    988         spin_lock(&si->lock);
1245                                                  989 
1246 no_page:                                         990 no_page:
1247         si->flags -= SWP_SCANNING;               991         si->flags -= SWP_SCANNING;
1248         return n_ret;                            992         return n_ret;
1249 }                                                993 }
1250                                                  994 
1251 int get_swap_pages(int n_goal, swp_entry_t sw !! 995 static int swap_alloc_cluster(struct swap_info_struct *si, swp_entry_t *slot)
1252 {                                                996 {
1253         int order = swap_entry_order(entry_or !! 997         unsigned long idx;
1254         unsigned long size = 1 << order;      !! 998         struct swap_cluster_info *ci;
                                                   >> 999         unsigned long offset;
                                                   >> 1000 
                                                   >> 1001         /*
                                                   >> 1002          * Should not even be attempting cluster allocations when huge
                                                   >> 1003          * page swap is disabled.  Warn and fail the allocation.
                                                   >> 1004          */
                                                   >> 1005         if (!IS_ENABLED(CONFIG_THP_SWAP)) {
                                                   >> 1006                 VM_WARN_ON_ONCE(1);
                                                   >> 1007                 return 0;
                                                   >> 1008         }
                                                   >> 1009 
                                                   >> 1010         if (cluster_list_empty(&si->free_clusters))
                                                   >> 1011                 return 0;
                                                   >> 1012 
                                                   >> 1013         idx = cluster_list_first(&si->free_clusters);
                                                   >> 1014         offset = idx * SWAPFILE_CLUSTER;
                                                   >> 1015         ci = lock_cluster(si, offset);
                                                   >> 1016         alloc_cluster(si, idx);
                                                   >> 1017         cluster_set_count_flag(ci, SWAPFILE_CLUSTER, CLUSTER_FLAG_HUGE);
                                                   >> 1018 
                                                   >> 1019         memset(si->swap_map + offset, SWAP_HAS_CACHE, SWAPFILE_CLUSTER);
                                                   >> 1020         unlock_cluster(ci);
                                                   >> 1021         swap_range_alloc(si, offset, SWAPFILE_CLUSTER);
                                                   >> 1022         *slot = swp_entry(si->type, offset);
                                                   >> 1023 
                                                   >> 1024         return 1;
                                                   >> 1025 }
                                                   >> 1026 
                                                   >> 1027 static void swap_free_cluster(struct swap_info_struct *si, unsigned long idx)
                                                   >> 1028 {
                                                   >> 1029         unsigned long offset = idx * SWAPFILE_CLUSTER;
                                                   >> 1030         struct swap_cluster_info *ci;
                                                   >> 1031 
                                                   >> 1032         ci = lock_cluster(si, offset);
                                                   >> 1033         memset(si->swap_map + offset, 0, SWAPFILE_CLUSTER);
                                                   >> 1034         cluster_set_count_flag(ci, 0, 0);
                                                   >> 1035         free_cluster(si, idx);
                                                   >> 1036         unlock_cluster(ci);
                                                   >> 1037         swap_range_free(si, offset, SWAPFILE_CLUSTER);
                                                   >> 1038 }
                                                   >> 1039 
                                                   >> 1040 int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
                                                   >> 1041 {
                                                   >> 1042         unsigned long size = swap_entry_size(entry_size);
1255         struct swap_info_struct *si, *next;      1043         struct swap_info_struct *si, *next;
1256         long avail_pgs;                          1044         long avail_pgs;
1257         int n_ret = 0;                           1045         int n_ret = 0;
1258         int node;                                1046         int node;
1259                                                  1047 
                                                   >> 1048         /* Only single cluster request supported */
                                                   >> 1049         WARN_ON_ONCE(n_goal > 1 && size == SWAPFILE_CLUSTER);
                                                   >> 1050 
1260         spin_lock(&swap_avail_lock);             1051         spin_lock(&swap_avail_lock);
1261                                                  1052 
1262         avail_pgs = atomic_long_read(&nr_swap    1053         avail_pgs = atomic_long_read(&nr_swap_pages) / size;
1263         if (avail_pgs <= 0) {                    1054         if (avail_pgs <= 0) {
1264                 spin_unlock(&swap_avail_lock)    1055                 spin_unlock(&swap_avail_lock);
1265                 goto noswap;                     1056                 goto noswap;
1266         }                                        1057         }
1267                                                  1058 
1268         n_goal = min3((long)n_goal, (long)SWA    1059         n_goal = min3((long)n_goal, (long)SWAP_BATCH, avail_pgs);
1269                                                  1060 
1270         atomic_long_sub(n_goal * size, &nr_sw    1061         atomic_long_sub(n_goal * size, &nr_swap_pages);
1271                                                  1062 
1272 start_over:                                      1063 start_over:
1273         node = numa_node_id();                   1064         node = numa_node_id();
1274         plist_for_each_entry_safe(si, next, &    1065         plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) {
1275                 /* requeue si to after same-p    1066                 /* requeue si to after same-priority siblings */
1276                 plist_requeue(&si->avail_list    1067                 plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]);
1277                 spin_unlock(&swap_avail_lock)    1068                 spin_unlock(&swap_avail_lock);
1278                 spin_lock(&si->lock);            1069                 spin_lock(&si->lock);
1279                 if (!si->highest_bit || !(si-    1070                 if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) {
1280                         spin_lock(&swap_avail    1071                         spin_lock(&swap_avail_lock);
1281                         if (plist_node_empty(    1072                         if (plist_node_empty(&si->avail_lists[node])) {
1282                                 spin_unlock(&    1073                                 spin_unlock(&si->lock);
1283                                 goto nextsi;     1074                                 goto nextsi;
1284                         }                        1075                         }
1285                         WARN(!si->highest_bit    1076                         WARN(!si->highest_bit,
1286                              "swap_info %d in    1077                              "swap_info %d in list but !highest_bit\n",
1287                              si->type);          1078                              si->type);
1288                         WARN(!(si->flags & SW    1079                         WARN(!(si->flags & SWP_WRITEOK),
1289                              "swap_info %d in    1080                              "swap_info %d in list but !SWP_WRITEOK\n",
1290                              si->type);          1081                              si->type);
1291                         __del_from_avail_list    1082                         __del_from_avail_list(si);
1292                         spin_unlock(&si->lock    1083                         spin_unlock(&si->lock);
1293                         goto nextsi;             1084                         goto nextsi;
1294                 }                                1085                 }
1295                 n_ret = scan_swap_map_slots(s !! 1086                 if (size == SWAPFILE_CLUSTER) {
1296                                             n !! 1087                         if (si->flags & SWP_BLKDEV)
                                                   >> 1088                                 n_ret = swap_alloc_cluster(si, swp_entries);
                                                   >> 1089                 } else
                                                   >> 1090                         n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
                                                   >> 1091                                                     n_goal, swp_entries);
1297                 spin_unlock(&si->lock);          1092                 spin_unlock(&si->lock);
1298                 if (n_ret || size > 1)        !! 1093                 if (n_ret || size == SWAPFILE_CLUSTER)
1299                         goto check_out;          1094                         goto check_out;
                                                   >> 1095                 pr_debug("scan_swap_map of si %d failed to find offset\n",
                                                   >> 1096                         si->type);
1300                 cond_resched();                  1097                 cond_resched();
1301                                                  1098 
1302                 spin_lock(&swap_avail_lock);     1099                 spin_lock(&swap_avail_lock);
1303 nextsi:                                          1100 nextsi:
1304                 /*                               1101                 /*
1305                  * if we got here, it's likel    1102                  * if we got here, it's likely that si was almost full before,
1306                  * and since scan_swap_map_sl    1103                  * and since scan_swap_map_slots() can drop the si->lock,
1307                  * multiple callers probably     1104                  * multiple callers probably all tried to get a page from the
1308                  * same si and it filled up b    1105                  * same si and it filled up before we could get one; or, the si
1309                  * filled up between us dropp    1106                  * filled up between us dropping swap_avail_lock and taking
1310                  * si->lock. Since we dropped    1107                  * si->lock. Since we dropped the swap_avail_lock, the
1311                  * swap_avail_head list may h    1108                  * swap_avail_head list may have been modified; so if next is
1312                  * still in the swap_avail_he    1109                  * still in the swap_avail_head list then try it, otherwise
1313                  * start over if we have not     1110                  * start over if we have not gotten any slots.
1314                  */                              1111                  */
1315                 if (plist_node_empty(&next->a    1112                 if (plist_node_empty(&next->avail_lists[node]))
1316                         goto start_over;         1113                         goto start_over;
1317         }                                        1114         }
1318                                                  1115 
1319         spin_unlock(&swap_avail_lock);           1116         spin_unlock(&swap_avail_lock);
1320                                                  1117 
1321 check_out:                                       1118 check_out:
1322         if (n_ret < n_goal)                      1119         if (n_ret < n_goal)
1323                 atomic_long_add((long)(n_goal    1120                 atomic_long_add((long)(n_goal - n_ret) * size,
1324                                 &nr_swap_page    1121                                 &nr_swap_pages);
1325 noswap:                                          1122 noswap:
1326         return n_ret;                            1123         return n_ret;
1327 }                                                1124 }
1328                                                  1125 
1329 static struct swap_info_struct *_swap_info_ge !! 1126 static struct swap_info_struct *__swap_info_get(swp_entry_t entry)
1330 {                                                1127 {
1331         struct swap_info_struct *si;          !! 1128         struct swap_info_struct *p;
1332         unsigned long offset;                    1129         unsigned long offset;
1333                                                  1130 
1334         if (!entry.val)                          1131         if (!entry.val)
1335                 goto out;                        1132                 goto out;
1336         si = swp_swap_info(entry);            !! 1133         p = swp_swap_info(entry);
1337         if (!si)                              !! 1134         if (!p)
1338                 goto bad_nofile;                 1135                 goto bad_nofile;
1339         if (data_race(!(si->flags & SWP_USED) !! 1136         if (data_race(!(p->flags & SWP_USED)))
1340                 goto bad_device;                 1137                 goto bad_device;
1341         offset = swp_offset(entry);              1138         offset = swp_offset(entry);
1342         if (offset >= si->max)                !! 1139         if (offset >= p->max)
1343                 goto bad_offset;                 1140                 goto bad_offset;
1344         if (data_race(!si->swap_map[swp_offse !! 1141         return p;
1345                 goto bad_free;                << 
1346         return si;                            << 
1347                                                  1142 
1348 bad_free:                                     << 
1349         pr_err("%s: %s%08lx\n", __func__, Unu << 
1350         goto out;                             << 
1351 bad_offset:                                      1143 bad_offset:
1352         pr_err("%s: %s%08lx\n", __func__, Bad    1144         pr_err("%s: %s%08lx\n", __func__, Bad_offset, entry.val);
1353         goto out;                                1145         goto out;
1354 bad_device:                                      1146 bad_device:
1355         pr_err("%s: %s%08lx\n", __func__, Unu    1147         pr_err("%s: %s%08lx\n", __func__, Unused_file, entry.val);
1356         goto out;                                1148         goto out;
1357 bad_nofile:                                      1149 bad_nofile:
1358         pr_err("%s: %s%08lx\n", __func__, Bad    1150         pr_err("%s: %s%08lx\n", __func__, Bad_file, entry.val);
1359 out:                                             1151 out:
1360         return NULL;                             1152         return NULL;
1361 }                                                1153 }
1362                                                  1154 
                                                   >> 1155 static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
                                                   >> 1156 {
                                                   >> 1157         struct swap_info_struct *p;
                                                   >> 1158 
                                                   >> 1159         p = __swap_info_get(entry);
                                                   >> 1160         if (!p)
                                                   >> 1161                 goto out;
                                                   >> 1162         if (data_race(!p->swap_map[swp_offset(entry)]))
                                                   >> 1163                 goto bad_free;
                                                   >> 1164         return p;
                                                   >> 1165 
                                                   >> 1166 bad_free:
                                                   >> 1167         pr_err("%s: %s%08lx\n", __func__, Unused_offset, entry.val);
                                                   >> 1168 out:
                                                   >> 1169         return NULL;
                                                   >> 1170 }
                                                   >> 1171 
                                                   >> 1172 static struct swap_info_struct *swap_info_get(swp_entry_t entry)
                                                   >> 1173 {
                                                   >> 1174         struct swap_info_struct *p;
                                                   >> 1175 
                                                   >> 1176         p = _swap_info_get(entry);
                                                   >> 1177         if (p)
                                                   >> 1178                 spin_lock(&p->lock);
                                                   >> 1179         return p;
                                                   >> 1180 }
                                                   >> 1181 
1363 static struct swap_info_struct *swap_info_get    1182 static struct swap_info_struct *swap_info_get_cont(swp_entry_t entry,
1364                                         struc    1183                                         struct swap_info_struct *q)
1365 {                                                1184 {
1366         struct swap_info_struct *p;              1185         struct swap_info_struct *p;
1367                                                  1186 
1368         p = _swap_info_get(entry);               1187         p = _swap_info_get(entry);
1369                                                  1188 
1370         if (p != q) {                            1189         if (p != q) {
1371                 if (q != NULL)                   1190                 if (q != NULL)
1372                         spin_unlock(&q->lock)    1191                         spin_unlock(&q->lock);
1373                 if (p != NULL)                   1192                 if (p != NULL)
1374                         spin_lock(&p->lock);     1193                         spin_lock(&p->lock);
1375         }                                        1194         }
1376         return p;                                1195         return p;
1377 }                                                1196 }
1378                                                  1197 
1379 static unsigned char __swap_entry_free_locked !! 1198 static unsigned char __swap_entry_free_locked(struct swap_info_struct *p,
1380                                                  1199                                               unsigned long offset,
1381                                                  1200                                               unsigned char usage)
1382 {                                                1201 {
1383         unsigned char count;                     1202         unsigned char count;
1384         unsigned char has_cache;                 1203         unsigned char has_cache;
1385                                                  1204 
1386         count = si->swap_map[offset];         !! 1205         count = p->swap_map[offset];
1387                                                  1206 
1388         has_cache = count & SWAP_HAS_CACHE;      1207         has_cache = count & SWAP_HAS_CACHE;
1389         count &= ~SWAP_HAS_CACHE;                1208         count &= ~SWAP_HAS_CACHE;
1390                                                  1209 
1391         if (usage == SWAP_HAS_CACHE) {           1210         if (usage == SWAP_HAS_CACHE) {
1392                 VM_BUG_ON(!has_cache);           1211                 VM_BUG_ON(!has_cache);
1393                 has_cache = 0;                   1212                 has_cache = 0;
1394         } else if (count == SWAP_MAP_SHMEM) {    1213         } else if (count == SWAP_MAP_SHMEM) {
1395                 /*                               1214                 /*
1396                  * Or we could insist on shme    1215                  * Or we could insist on shmem.c using a special
1397                  * swap_shmem_free() and free    1216                  * swap_shmem_free() and free_shmem_swap_and_cache()...
1398                  */                              1217                  */
1399                 count = 0;                       1218                 count = 0;
1400         } else if ((count & ~COUNT_CONTINUED)    1219         } else if ((count & ~COUNT_CONTINUED) <= SWAP_MAP_MAX) {
1401                 if (count == COUNT_CONTINUED)    1220                 if (count == COUNT_CONTINUED) {
1402                         if (swap_count_contin !! 1221                         if (swap_count_continued(p, offset, count))
1403                                 count = SWAP_    1222                                 count = SWAP_MAP_MAX | COUNT_CONTINUED;
1404                         else                     1223                         else
1405                                 count = SWAP_    1224                                 count = SWAP_MAP_MAX;
1406                 } else                           1225                 } else
1407                         count--;                 1226                         count--;
1408         }                                        1227         }
1409                                                  1228 
1410         usage = count | has_cache;               1229         usage = count | has_cache;
1411         if (usage)                               1230         if (usage)
1412                 WRITE_ONCE(si->swap_map[offse !! 1231                 WRITE_ONCE(p->swap_map[offset], usage);
1413         else                                     1232         else
1414                 WRITE_ONCE(si->swap_map[offse !! 1233                 WRITE_ONCE(p->swap_map[offset], SWAP_HAS_CACHE);
1415                                                  1234 
1416         return usage;                            1235         return usage;
1417 }                                                1236 }
1418                                                  1237 
1419 /*                                               1238 /*
1420  * When we get a swap entry, if there aren't     1239  * When we get a swap entry, if there aren't some other ways to
1421  * prevent swapoff, such as the folio in swap !! 1240  * prevent swapoff, such as the folio in swap cache is locked, page
1422  * reader side is locked, etc., the swap entr !! 1241  * table lock is held, etc., the swap entry may become invalid because
1423  * because of swapoff.  Then, we need to encl !! 1242  * of swapoff.  Then, we need to enclose all swap related functions
1424  * functions with get_swap_device() and put_s !! 1243  * with get_swap_device() and put_swap_device(), unless the swap
1425  * swap functions call get/put_swap_device()  !! 1244  * functions call get/put_swap_device() by themselves.
1426  *                                            !! 1245  *
1427  * RCU reader side lock (including any spinlo !! 1246  * Note that when only holding the PTL, swapoff might succeed immediately
1428  * prevent swapoff, because synchronize_rcu() !! 1247  * after freeing a swap entry. Therefore, immediately after
1429  * before freeing data structures.            !! 1248  * __swap_entry_free(), the swap info might become stale and should not
                                                   >> 1249  * be touched without a prior get_swap_device().
1430  *                                               1250  *
1431  * Check whether swap entry is valid in the s    1251  * Check whether swap entry is valid in the swap device.  If so,
1432  * return pointer to swap_info_struct, and ke    1252  * return pointer to swap_info_struct, and keep the swap entry valid
1433  * via preventing the swap device from being     1253  * via preventing the swap device from being swapoff, until
1434  * put_swap_device() is called.  Otherwise re    1254  * put_swap_device() is called.  Otherwise return NULL.
1435  *                                               1255  *
1436  * Notice that swapoff or swapoff+swapon can     1256  * Notice that swapoff or swapoff+swapon can still happen before the
1437  * percpu_ref_tryget_live() in get_swap_devic    1257  * percpu_ref_tryget_live() in get_swap_device() or after the
1438  * percpu_ref_put() in put_swap_device() if t    1258  * percpu_ref_put() in put_swap_device() if there isn't any other way
1439  * to prevent swapoff.  The caller must be pr    1259  * to prevent swapoff.  The caller must be prepared for that.  For
1440  * example, the following situation is possib    1260  * example, the following situation is possible.
1441  *                                               1261  *
1442  *   CPU1                               CPU2     1262  *   CPU1                               CPU2
1443  *   do_swap_page()                              1263  *   do_swap_page()
1444  *     ...                              swapo    1264  *     ...                              swapoff+swapon
1445  *     __read_swap_cache_async()                 1265  *     __read_swap_cache_async()
1446  *       swapcache_prepare()                     1266  *       swapcache_prepare()
1447  *         __swap_duplicate()                    1267  *         __swap_duplicate()
1448  *           // check swap_map                   1268  *           // check swap_map
1449  *     // verify PTE not changed                 1269  *     // verify PTE not changed
1450  *                                               1270  *
1451  * In __swap_duplicate(), the swap_map need t    1271  * In __swap_duplicate(), the swap_map need to be checked before
1452  * changing partly because the specified swap    1272  * changing partly because the specified swap entry may be for another
1453  * swap device which has been swapoff.  And i    1273  * swap device which has been swapoff.  And in do_swap_page(), after
1454  * the page is read from the swap device, the    1274  * the page is read from the swap device, the PTE is verified not
1455  * changed with the page table locked to chec    1275  * changed with the page table locked to check whether the swap device
1456  * has been swapoff or swapoff+swapon.           1276  * has been swapoff or swapoff+swapon.
1457  */                                              1277  */
1458 struct swap_info_struct *get_swap_device(swp_    1278 struct swap_info_struct *get_swap_device(swp_entry_t entry)
1459 {                                                1279 {
1460         struct swap_info_struct *si;             1280         struct swap_info_struct *si;
1461         unsigned long offset;                    1281         unsigned long offset;
1462                                                  1282 
1463         if (!entry.val)                          1283         if (!entry.val)
1464                 goto out;                        1284                 goto out;
1465         si = swp_swap_info(entry);               1285         si = swp_swap_info(entry);
1466         if (!si)                                 1286         if (!si)
1467                 goto bad_nofile;                 1287                 goto bad_nofile;
1468         if (!percpu_ref_tryget_live(&si->user    1288         if (!percpu_ref_tryget_live(&si->users))
1469                 goto out;                        1289                 goto out;
1470         /*                                       1290         /*
1471          * Guarantee the si->users are checke    1291          * Guarantee the si->users are checked before accessing other
1472          * fields of swap_info_struct.           1292          * fields of swap_info_struct.
1473          *                                       1293          *
1474          * Paired with the spin_unlock() afte    1294          * Paired with the spin_unlock() after setup_swap_info() in
1475          * enable_swap_info().                   1295          * enable_swap_info().
1476          */                                      1296          */
1477         smp_rmb();                               1297         smp_rmb();
1478         offset = swp_offset(entry);              1298         offset = swp_offset(entry);
1479         if (offset >= si->max)                   1299         if (offset >= si->max)
1480                 goto put_out;                    1300                 goto put_out;
1481                                                  1301 
1482         return si;                               1302         return si;
1483 bad_nofile:                                      1303 bad_nofile:
1484         pr_err("%s: %s%08lx\n", __func__, Bad    1304         pr_err("%s: %s%08lx\n", __func__, Bad_file, entry.val);
1485 out:                                             1305 out:
1486         return NULL;                             1306         return NULL;
1487 put_out:                                         1307 put_out:
1488         pr_err("%s: %s%08lx\n", __func__, Bad << 
1489         percpu_ref_put(&si->users);              1308         percpu_ref_put(&si->users);
1490         return NULL;                             1309         return NULL;
1491 }                                                1310 }
1492                                                  1311 
1493 static unsigned char __swap_entry_free(struct !! 1312 static unsigned char __swap_entry_free(struct swap_info_struct *p,
1494                                        swp_en    1313                                        swp_entry_t entry)
1495 {                                                1314 {
1496         struct swap_cluster_info *ci;            1315         struct swap_cluster_info *ci;
1497         unsigned long offset = swp_offset(ent    1316         unsigned long offset = swp_offset(entry);
1498         unsigned char usage;                     1317         unsigned char usage;
1499                                                  1318 
1500         ci = lock_cluster_or_swap_info(si, of !! 1319         ci = lock_cluster_or_swap_info(p, offset);
1501         usage = __swap_entry_free_locked(si,  !! 1320         usage = __swap_entry_free_locked(p, offset, 1);
1502         unlock_cluster_or_swap_info(si, ci);  !! 1321         unlock_cluster_or_swap_info(p, ci);
1503         if (!usage)                              1322         if (!usage)
1504                 free_swap_slot(entry);           1323                 free_swap_slot(entry);
1505                                                  1324 
1506         return usage;                            1325         return usage;
1507 }                                                1326 }
1508                                                  1327 
1509 static bool __swap_entries_free(struct swap_i !! 1328 static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry)
1510                 swp_entry_t entry, int nr)    << 
1511 {                                                1329 {
1512         unsigned long offset = swp_offset(ent << 
1513         unsigned int type = swp_type(entry);  << 
1514         struct swap_cluster_info *ci;            1330         struct swap_cluster_info *ci;
1515         bool has_cache = false;               << 
1516         unsigned char count;                  << 
1517         int i;                                << 
1518                                               << 
1519         if (nr <= 1 || swap_count(data_race(s << 
1520                 goto fallback;                << 
1521         /* cross into another cluster */      << 
1522         if (nr > SWAPFILE_CLUSTER - offset %  << 
1523                 goto fallback;                << 
1524                                               << 
1525         ci = lock_cluster_or_swap_info(si, of << 
1526         if (!swap_is_last_map(si, offset, nr, << 
1527                 unlock_cluster_or_swap_info(s << 
1528                 goto fallback;                << 
1529         }                                     << 
1530         for (i = 0; i < nr; i++)              << 
1531                 WRITE_ONCE(si->swap_map[offse << 
1532         unlock_cluster_or_swap_info(si, ci);  << 
1533                                               << 
1534         if (!has_cache) {                     << 
1535                 for (i = 0; i < nr; i++)      << 
1536                         zswap_invalidate(swp_ << 
1537                 spin_lock(&si->lock);         << 
1538                 swap_entry_range_free(si, ent << 
1539                 spin_unlock(&si->lock);       << 
1540         }                                     << 
1541         return has_cache;                     << 
1542                                               << 
1543 fallback:                                     << 
1544         for (i = 0; i < nr; i++) {            << 
1545                 if (data_race(si->swap_map[of << 
1546                         count = __swap_entry_ << 
1547                         if (count == SWAP_HAS << 
1548                                 has_cache = t << 
1549                 } else {                      << 
1550                         WARN_ON_ONCE(1);      << 
1551                 }                             << 
1552         }                                     << 
1553         return has_cache;                     << 
1554 }                                             << 
1555                                               << 
1556 /*                                            << 
1557  * Drop the last HAS_CACHE flag of swap entri << 
1558  * ensure all entries belong to the same cgro << 
1559  */                                           << 
1560 static void swap_entry_range_free(struct swap << 
1561                                   unsigned in << 
1562 {                                             << 
1563         unsigned long offset = swp_offset(ent    1331         unsigned long offset = swp_offset(entry);
1564         unsigned char *map = si->swap_map + o !! 1332         unsigned char count;
1565         unsigned char *map_end = map + nr_pag << 
1566         struct swap_cluster_info *ci;         << 
1567                                                  1333 
1568         ci = lock_cluster(si, offset);        !! 1334         ci = lock_cluster(p, offset);
1569         do {                                  !! 1335         count = p->swap_map[offset];
1570                 VM_BUG_ON(*map != SWAP_HAS_CA !! 1336         VM_BUG_ON(count != SWAP_HAS_CACHE);
1571                 *map = 0;                     !! 1337         p->swap_map[offset] = 0;
1572         } while (++map < map_end);            !! 1338         dec_cluster_info_page(p, p->cluster_info, offset);
1573         dec_cluster_info_page(si, ci, nr_page << 
1574         unlock_cluster(ci);                      1339         unlock_cluster(ci);
1575                                                  1340 
1576         mem_cgroup_uncharge_swap(entry, nr_pa !! 1341         mem_cgroup_uncharge_swap(entry, 1);
1577         swap_range_free(si, offset, nr_pages) !! 1342         swap_range_free(p, offset, 1);
1578 }                                             << 
1579                                               << 
1580 static void cluster_swap_free_nr(struct swap_ << 
1581                 unsigned long offset, int nr_ << 
1582                 unsigned char usage)          << 
1583 {                                             << 
1584         struct swap_cluster_info *ci;         << 
1585         DECLARE_BITMAP(to_free, BITS_PER_LONG << 
1586         int i, nr;                            << 
1587                                               << 
1588         ci = lock_cluster_or_swap_info(si, of << 
1589         while (nr_pages) {                    << 
1590                 nr = min(BITS_PER_LONG, nr_pa << 
1591                 for (i = 0; i < nr; i++) {    << 
1592                         if (!__swap_entry_fre << 
1593                                 bitmap_set(to << 
1594                 }                             << 
1595                 if (!bitmap_empty(to_free, BI << 
1596                         unlock_cluster_or_swa << 
1597                         for_each_set_bit(i, t << 
1598                                 free_swap_slo << 
1599                         if (nr == nr_pages)   << 
1600                                 return;       << 
1601                         bitmap_clear(to_free, << 
1602                         ci = lock_cluster_or_ << 
1603                 }                             << 
1604                 offset += nr;                 << 
1605                 nr_pages -= nr;               << 
1606         }                                     << 
1607         unlock_cluster_or_swap_info(si, ci);  << 
1608 }                                                1343 }
1609                                                  1344 
1610 /*                                               1345 /*
1611  * Caller has made sure that the swap device     1346  * Caller has made sure that the swap device corresponding to entry
1612  * is still around or has not been recycled.     1347  * is still around or has not been recycled.
1613  */                                              1348  */
1614 void swap_free_nr(swp_entry_t entry, int nr_p !! 1349 void swap_free(swp_entry_t entry)
1615 {                                                1350 {
1616         int nr;                               !! 1351         struct swap_info_struct *p;
1617         struct swap_info_struct *sis;         << 
1618         unsigned long offset = swp_offset(ent << 
1619                                               << 
1620         sis = _swap_info_get(entry);          << 
1621         if (!sis)                             << 
1622                 return;                       << 
1623                                                  1352 
1624         while (nr_pages) {                    !! 1353         p = _swap_info_get(entry);
1625                 nr = min_t(int, nr_pages, SWA !! 1354         if (p)
1626                 cluster_swap_free_nr(sis, off !! 1355                 __swap_entry_free(p, entry);
1627                 offset += nr;                 << 
1628                 nr_pages -= nr;               << 
1629         }                                     << 
1630 }                                                1356 }
1631                                                  1357 
1632 /*                                               1358 /*
1633  * Called after dropping swapcache to decreas    1359  * Called after dropping swapcache to decrease refcnt to swap entries.
1634  */                                              1360  */
1635 void put_swap_folio(struct folio *folio, swp_ !! 1361 void put_swap_page(struct page *page, swp_entry_t entry)
1636 {                                                1362 {
1637         unsigned long offset = swp_offset(ent    1363         unsigned long offset = swp_offset(entry);
                                                   >> 1364         unsigned long idx = offset / SWAPFILE_CLUSTER;
1638         struct swap_cluster_info *ci;            1365         struct swap_cluster_info *ci;
1639         struct swap_info_struct *si;             1366         struct swap_info_struct *si;
1640         int size = 1 << swap_entry_order(foli !! 1367         unsigned char *map;
                                                   >> 1368         unsigned int i, free_entries = 0;
                                                   >> 1369         unsigned char val;
                                                   >> 1370         int size = swap_entry_size(thp_nr_pages(page));
1641                                                  1371 
1642         si = _swap_info_get(entry);              1372         si = _swap_info_get(entry);
1643         if (!si)                                 1373         if (!si)
1644                 return;                          1374                 return;
1645                                                  1375 
1646         ci = lock_cluster_or_swap_info(si, of    1376         ci = lock_cluster_or_swap_info(si, offset);
1647         if (size > 1 && swap_is_has_cache(si, !! 1377         if (size == SWAPFILE_CLUSTER) {
1648                 unlock_cluster_or_swap_info(s !! 1378                 VM_BUG_ON(!cluster_is_huge(ci));
1649                 spin_lock(&si->lock);         !! 1379                 map = si->swap_map + offset;
1650                 swap_entry_range_free(si, ent !! 1380                 for (i = 0; i < SWAPFILE_CLUSTER; i++) {
1651                 spin_unlock(&si->lock);       !! 1381                         val = map[i];
1652                 return;                       !! 1382                         VM_BUG_ON(!(val & SWAP_HAS_CACHE));
                                                   >> 1383                         if (val == SWAP_HAS_CACHE)
                                                   >> 1384                                 free_entries++;
                                                   >> 1385                 }
                                                   >> 1386                 cluster_clear_huge(ci);
                                                   >> 1387                 if (free_entries == SWAPFILE_CLUSTER) {
                                                   >> 1388                         unlock_cluster_or_swap_info(si, ci);
                                                   >> 1389                         spin_lock(&si->lock);
                                                   >> 1390                         mem_cgroup_uncharge_swap(entry, SWAPFILE_CLUSTER);
                                                   >> 1391                         swap_free_cluster(si, idx);
                                                   >> 1392                         spin_unlock(&si->lock);
                                                   >> 1393                         return;
                                                   >> 1394                 }
1653         }                                        1395         }
1654         for (int i = 0; i < size; i++, entry. !! 1396         for (i = 0; i < size; i++, entry.val++) {
1655                 if (!__swap_entry_free_locked    1397                 if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE)) {
1656                         unlock_cluster_or_swa    1398                         unlock_cluster_or_swap_info(si, ci);
1657                         free_swap_slot(entry)    1399                         free_swap_slot(entry);
1658                         if (i == size - 1)       1400                         if (i == size - 1)
1659                                 return;          1401                                 return;
1660                         lock_cluster_or_swap_    1402                         lock_cluster_or_swap_info(si, offset);
1661                 }                                1403                 }
1662         }                                        1404         }
1663         unlock_cluster_or_swap_info(si, ci);     1405         unlock_cluster_or_swap_info(si, ci);
1664 }                                                1406 }
1665                                                  1407 
                                                   >> 1408 #ifdef CONFIG_THP_SWAP
                                                   >> 1409 int split_swap_cluster(swp_entry_t entry)
                                                   >> 1410 {
                                                   >> 1411         struct swap_info_struct *si;
                                                   >> 1412         struct swap_cluster_info *ci;
                                                   >> 1413         unsigned long offset = swp_offset(entry);
                                                   >> 1414 
                                                   >> 1415         si = _swap_info_get(entry);
                                                   >> 1416         if (!si)
                                                   >> 1417                 return -EBUSY;
                                                   >> 1418         ci = lock_cluster(si, offset);
                                                   >> 1419         cluster_clear_huge(ci);
                                                   >> 1420         unlock_cluster(ci);
                                                   >> 1421         return 0;
                                                   >> 1422 }
                                                   >> 1423 #endif
                                                   >> 1424 
1666 static int swp_entry_cmp(const void *ent1, co    1425 static int swp_entry_cmp(const void *ent1, const void *ent2)
1667 {                                                1426 {
1668         const swp_entry_t *e1 = ent1, *e2 = e    1427         const swp_entry_t *e1 = ent1, *e2 = ent2;
1669                                                  1428 
1670         return (int)swp_type(*e1) - (int)swp_    1429         return (int)swp_type(*e1) - (int)swp_type(*e2);
1671 }                                                1430 }
1672                                                  1431 
1673 void swapcache_free_entries(swp_entry_t *entr    1432 void swapcache_free_entries(swp_entry_t *entries, int n)
1674 {                                                1433 {
1675         struct swap_info_struct *p, *prev;       1434         struct swap_info_struct *p, *prev;
1676         int i;                                   1435         int i;
1677                                                  1436 
1678         if (n <= 0)                              1437         if (n <= 0)
1679                 return;                          1438                 return;
1680                                                  1439 
1681         prev = NULL;                             1440         prev = NULL;
1682         p = NULL;                                1441         p = NULL;
1683                                                  1442 
1684         /*                                       1443         /*
1685          * Sort swap entries by swap device,     1444          * Sort swap entries by swap device, so each lock is only taken once.
1686          * nr_swapfiles isn't absolutely corr    1445          * nr_swapfiles isn't absolutely correct, but the overhead of sort() is
1687          * so low that it isn't necessary to     1446          * so low that it isn't necessary to optimize further.
1688          */                                      1447          */
1689         if (nr_swapfiles > 1)                    1448         if (nr_swapfiles > 1)
1690                 sort(entries, n, sizeof(entri    1449                 sort(entries, n, sizeof(entries[0]), swp_entry_cmp, NULL);
1691         for (i = 0; i < n; ++i) {                1450         for (i = 0; i < n; ++i) {
1692                 p = swap_info_get_cont(entrie    1451                 p = swap_info_get_cont(entries[i], prev);
1693                 if (p)                           1452                 if (p)
1694                         swap_entry_range_free !! 1453                         swap_entry_free(p, entries[i]);
1695                 prev = p;                        1454                 prev = p;
1696         }                                        1455         }
1697         if (p)                                   1456         if (p)
1698                 spin_unlock(&p->lock);           1457                 spin_unlock(&p->lock);
1699 }                                                1458 }
1700                                                  1459 
                                                   >> 1460 /*
                                                   >> 1461  * How many references to page are currently swapped out?
                                                   >> 1462  * This does not give an exact answer when swap count is continued,
                                                   >> 1463  * but does include the high COUNT_CONTINUED flag to allow for that.
                                                   >> 1464  */
                                                   >> 1465 int page_swapcount(struct page *page)
                                                   >> 1466 {
                                                   >> 1467         int count = 0;
                                                   >> 1468         struct swap_info_struct *p;
                                                   >> 1469         struct swap_cluster_info *ci;
                                                   >> 1470         swp_entry_t entry;
                                                   >> 1471         unsigned long offset;
                                                   >> 1472 
                                                   >> 1473         entry.val = page_private(page);
                                                   >> 1474         p = _swap_info_get(entry);
                                                   >> 1475         if (p) {
                                                   >> 1476                 offset = swp_offset(entry);
                                                   >> 1477                 ci = lock_cluster_or_swap_info(p, offset);
                                                   >> 1478                 count = swap_count(p->swap_map[offset]);
                                                   >> 1479                 unlock_cluster_or_swap_info(p, ci);
                                                   >> 1480         }
                                                   >> 1481         return count;
                                                   >> 1482 }
                                                   >> 1483 
1701 int __swap_count(swp_entry_t entry)              1484 int __swap_count(swp_entry_t entry)
1702 {                                                1485 {
1703         struct swap_info_struct *si = swp_swa !! 1486         struct swap_info_struct *si;
1704         pgoff_t offset = swp_offset(entry);      1487         pgoff_t offset = swp_offset(entry);
                                                   >> 1488         int count = 0;
1705                                                  1489 
1706         return swap_count(si->swap_map[offset !! 1490         si = get_swap_device(entry);
                                                   >> 1491         if (si) {
                                                   >> 1492                 count = swap_count(si->swap_map[offset]);
                                                   >> 1493                 put_swap_device(si);
                                                   >> 1494         }
                                                   >> 1495         return count;
1707 }                                                1496 }
1708                                                  1497 
1709 /*                                            !! 1498 static int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
1710  * How many references to @entry are currentl << 
1711  * This does not give an exact answer when sw << 
1712  * but does include the high COUNT_CONTINUED  << 
1713  */                                           << 
1714 int swap_swapcount(struct swap_info_struct *s << 
1715 {                                                1499 {
                                                   >> 1500         int count = 0;
1716         pgoff_t offset = swp_offset(entry);      1501         pgoff_t offset = swp_offset(entry);
1717         struct swap_cluster_info *ci;            1502         struct swap_cluster_info *ci;
1718         int count;                            << 
1719                                                  1503 
1720         ci = lock_cluster_or_swap_info(si, of    1504         ci = lock_cluster_or_swap_info(si, offset);
1721         count = swap_count(si->swap_map[offse    1505         count = swap_count(si->swap_map[offset]);
1722         unlock_cluster_or_swap_info(si, ci);     1506         unlock_cluster_or_swap_info(si, ci);
1723         return count;                            1507         return count;
1724 }                                                1508 }
1725                                                  1509 
1726 /*                                               1510 /*
1727  * How many references to @entry are currentl    1511  * How many references to @entry are currently swapped out?
                                                   >> 1512  * This does not give an exact answer when swap count is continued,
                                                   >> 1513  * but does include the high COUNT_CONTINUED flag to allow for that.
                                                   >> 1514  */
                                                   >> 1515 int __swp_swapcount(swp_entry_t entry)
                                                   >> 1516 {
                                                   >> 1517         int count = 0;
                                                   >> 1518         struct swap_info_struct *si;
                                                   >> 1519 
                                                   >> 1520         si = get_swap_device(entry);
                                                   >> 1521         if (si) {
                                                   >> 1522                 count = swap_swapcount(si, entry);
                                                   >> 1523                 put_swap_device(si);
                                                   >> 1524         }
                                                   >> 1525         return count;
                                                   >> 1526 }
                                                   >> 1527 
                                                   >> 1528 /*
                                                   >> 1529  * How many references to @entry are currently swapped out?
1728  * This considers COUNT_CONTINUED so it retur    1530  * This considers COUNT_CONTINUED so it returns exact answer.
1729  */                                              1531  */
1730 int swp_swapcount(swp_entry_t entry)             1532 int swp_swapcount(swp_entry_t entry)
1731 {                                                1533 {
1732         int count, tmp_count, n;                 1534         int count, tmp_count, n;
1733         struct swap_info_struct *si;          !! 1535         struct swap_info_struct *p;
1734         struct swap_cluster_info *ci;            1536         struct swap_cluster_info *ci;
1735         struct page *page;                       1537         struct page *page;
1736         pgoff_t offset;                          1538         pgoff_t offset;
1737         unsigned char *map;                      1539         unsigned char *map;
1738                                                  1540 
1739         si = _swap_info_get(entry);           !! 1541         p = _swap_info_get(entry);
1740         if (!si)                              !! 1542         if (!p)
1741                 return 0;                        1543                 return 0;
1742                                                  1544 
1743         offset = swp_offset(entry);              1545         offset = swp_offset(entry);
1744                                                  1546 
1745         ci = lock_cluster_or_swap_info(si, of !! 1547         ci = lock_cluster_or_swap_info(p, offset);
1746                                                  1548 
1747         count = swap_count(si->swap_map[offse !! 1549         count = swap_count(p->swap_map[offset]);
1748         if (!(count & COUNT_CONTINUED))          1550         if (!(count & COUNT_CONTINUED))
1749                 goto out;                        1551                 goto out;
1750                                                  1552 
1751         count &= ~COUNT_CONTINUED;               1553         count &= ~COUNT_CONTINUED;
1752         n = SWAP_MAP_MAX + 1;                    1554         n = SWAP_MAP_MAX + 1;
1753                                                  1555 
1754         page = vmalloc_to_page(si->swap_map + !! 1556         page = vmalloc_to_page(p->swap_map + offset);
1755         offset &= ~PAGE_MASK;                    1557         offset &= ~PAGE_MASK;
1756         VM_BUG_ON(page_private(page) != SWP_C    1558         VM_BUG_ON(page_private(page) != SWP_CONTINUED);
1757                                                  1559 
1758         do {                                     1560         do {
1759                 page = list_next_entry(page,     1561                 page = list_next_entry(page, lru);
1760                 map = kmap_local_page(page);  !! 1562                 map = kmap_atomic(page);
1761                 tmp_count = map[offset];         1563                 tmp_count = map[offset];
1762                 kunmap_local(map);            !! 1564                 kunmap_atomic(map);
1763                                                  1565 
1764                 count += (tmp_count & ~COUNT_    1566                 count += (tmp_count & ~COUNT_CONTINUED) * n;
1765                 n *= (SWAP_CONT_MAX + 1);        1567                 n *= (SWAP_CONT_MAX + 1);
1766         } while (tmp_count & COUNT_CONTINUED)    1568         } while (tmp_count & COUNT_CONTINUED);
1767 out:                                             1569 out:
1768         unlock_cluster_or_swap_info(si, ci);  !! 1570         unlock_cluster_or_swap_info(p, ci);
1769         return count;                            1571         return count;
1770 }                                                1572 }
1771                                                  1573 
1772 static bool swap_page_trans_huge_swapped(stru    1574 static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
1773                                          swp_ !! 1575                                          swp_entry_t entry)
1774 {                                                1576 {
1775         struct swap_cluster_info *ci;            1577         struct swap_cluster_info *ci;
1776         unsigned char *map = si->swap_map;       1578         unsigned char *map = si->swap_map;
1777         unsigned int nr_pages = 1 << order;   << 
1778         unsigned long roffset = swp_offset(en    1579         unsigned long roffset = swp_offset(entry);
1779         unsigned long offset = round_down(rof !! 1580         unsigned long offset = round_down(roffset, SWAPFILE_CLUSTER);
1780         int i;                                   1581         int i;
1781         bool ret = false;                        1582         bool ret = false;
1782                                                  1583 
1783         ci = lock_cluster_or_swap_info(si, of    1584         ci = lock_cluster_or_swap_info(si, offset);
1784         if (!ci || nr_pages == 1) {           !! 1585         if (!ci || !cluster_is_huge(ci)) {
1785                 if (swap_count(map[roffset]))    1586                 if (swap_count(map[roffset]))
1786                         ret = true;              1587                         ret = true;
1787                 goto unlock_out;                 1588                 goto unlock_out;
1788         }                                        1589         }
1789         for (i = 0; i < nr_pages; i++) {      !! 1590         for (i = 0; i < SWAPFILE_CLUSTER; i++) {
1790                 if (swap_count(map[offset + i    1591                 if (swap_count(map[offset + i])) {
1791                         ret = true;              1592                         ret = true;
1792                         break;                   1593                         break;
1793                 }                                1594                 }
1794         }                                        1595         }
1795 unlock_out:                                      1596 unlock_out:
1796         unlock_cluster_or_swap_info(si, ci);     1597         unlock_cluster_or_swap_info(si, ci);
1797         return ret;                              1598         return ret;
1798 }                                                1599 }
1799                                                  1600 
1800 static bool folio_swapped(struct folio *folio !! 1601 static bool page_swapped(struct page *page)
1801 {                                                1602 {
1802         swp_entry_t entry = folio->swap;      !! 1603         swp_entry_t entry;
1803         struct swap_info_struct *si = _swap_i !! 1604         struct swap_info_struct *si;
1804                                                  1605 
1805         if (!si)                              !! 1606         if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!PageTransCompound(page)))
1806                 return false;                 !! 1607                 return page_swapcount(page) != 0;
                                                   >> 1608 
                                                   >> 1609         page = compound_head(page);
                                                   >> 1610         entry.val = page_private(page);
                                                   >> 1611         si = _swap_info_get(entry);
                                                   >> 1612         if (si)
                                                   >> 1613                 return swap_page_trans_huge_swapped(si, entry);
                                                   >> 1614         return false;
                                                   >> 1615 }
1807                                                  1616 
1808         if (!IS_ENABLED(CONFIG_THP_SWAP) || l !! 1617 static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount,
1809                 return swap_swapcount(si, ent !! 1618                                          int *total_swapcount)
                                                   >> 1619 {
                                                   >> 1620         int i, map_swapcount, _total_mapcount, _total_swapcount;
                                                   >> 1621         unsigned long offset = 0;
                                                   >> 1622         struct swap_info_struct *si;
                                                   >> 1623         struct swap_cluster_info *ci = NULL;
                                                   >> 1624         unsigned char *map = NULL;
                                                   >> 1625         int mapcount, swapcount = 0;
                                                   >> 1626 
                                                   >> 1627         /* hugetlbfs shouldn't call it */
                                                   >> 1628         VM_BUG_ON_PAGE(PageHuge(page), page);
                                                   >> 1629 
                                                   >> 1630         if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!PageTransCompound(page))) {
                                                   >> 1631                 mapcount = page_trans_huge_mapcount(page, total_mapcount);
                                                   >> 1632                 if (PageSwapCache(page))
                                                   >> 1633                         swapcount = page_swapcount(page);
                                                   >> 1634                 if (total_swapcount)
                                                   >> 1635                         *total_swapcount = swapcount;
                                                   >> 1636                 return mapcount + swapcount;
                                                   >> 1637         }
                                                   >> 1638 
                                                   >> 1639         page = compound_head(page);
                                                   >> 1640 
                                                   >> 1641         _total_mapcount = _total_swapcount = map_swapcount = 0;
                                                   >> 1642         if (PageSwapCache(page)) {
                                                   >> 1643                 swp_entry_t entry;
1810                                                  1644 
1811         return swap_page_trans_huge_swapped(s !! 1645                 entry.val = page_private(page);
                                                   >> 1646                 si = _swap_info_get(entry);
                                                   >> 1647                 if (si) {
                                                   >> 1648                         map = si->swap_map;
                                                   >> 1649                         offset = swp_offset(entry);
                                                   >> 1650                 }
                                                   >> 1651         }
                                                   >> 1652         if (map)
                                                   >> 1653                 ci = lock_cluster(si, offset);
                                                   >> 1654         for (i = 0; i < HPAGE_PMD_NR; i++) {
                                                   >> 1655                 mapcount = atomic_read(&page[i]._mapcount) + 1;
                                                   >> 1656                 _total_mapcount += mapcount;
                                                   >> 1657                 if (map) {
                                                   >> 1658                         swapcount = swap_count(map[offset + i]);
                                                   >> 1659                         _total_swapcount += swapcount;
                                                   >> 1660                 }
                                                   >> 1661                 map_swapcount = max(map_swapcount, mapcount + swapcount);
                                                   >> 1662         }
                                                   >> 1663         unlock_cluster(ci);
                                                   >> 1664         if (PageDoubleMap(page)) {
                                                   >> 1665                 map_swapcount -= 1;
                                                   >> 1666                 _total_mapcount -= HPAGE_PMD_NR;
                                                   >> 1667         }
                                                   >> 1668         mapcount = compound_mapcount(page);
                                                   >> 1669         map_swapcount += mapcount;
                                                   >> 1670         _total_mapcount += mapcount;
                                                   >> 1671         if (total_mapcount)
                                                   >> 1672                 *total_mapcount = _total_mapcount;
                                                   >> 1673         if (total_swapcount)
                                                   >> 1674                 *total_swapcount = _total_swapcount;
                                                   >> 1675 
                                                   >> 1676         return map_swapcount;
1812 }                                                1677 }
1813                                                  1678 
1814 static bool folio_swapcache_freeable(struct f !! 1679 /*
                                                   >> 1680  * We can write to an anon page without COW if there are no other references
                                                   >> 1681  * to it.  And as a side-effect, free up its swap: because the old content
                                                   >> 1682  * on disk will never be read, and seeking back there to write new content
                                                   >> 1683  * later would only waste time away from clustering.
                                                   >> 1684  *
                                                   >> 1685  * NOTE: total_map_swapcount should not be relied upon by the caller if
                                                   >> 1686  * reuse_swap_page() returns false, but it may be always overwritten
                                                   >> 1687  * (see the other implementation for CONFIG_SWAP=n).
                                                   >> 1688  */
                                                   >> 1689 bool reuse_swap_page(struct page *page, int *total_map_swapcount)
1815 {                                                1690 {
1816         VM_BUG_ON_FOLIO(!folio_test_locked(fo !! 1691         int count, total_mapcount, total_swapcount;
1817                                                  1692 
1818         if (!folio_test_swapcache(folio))     !! 1693         VM_BUG_ON_PAGE(!PageLocked(page), page);
1819                 return false;                 !! 1694         if (unlikely(PageKsm(page)))
1820         if (folio_test_writeback(folio))      << 
1821                 return false;                    1695                 return false;
                                                   >> 1696         count = page_trans_huge_map_swapcount(page, &total_mapcount,
                                                   >> 1697                                               &total_swapcount);
                                                   >> 1698         if (total_map_swapcount)
                                                   >> 1699                 *total_map_swapcount = total_mapcount + total_swapcount;
                                                   >> 1700         if (count == 1 && PageSwapCache(page) &&
                                                   >> 1701             (likely(!PageTransCompound(page)) ||
                                                   >> 1702              /* The remaining swap count will be freed soon */
                                                   >> 1703              total_swapcount == page_swapcount(page))) {
                                                   >> 1704                 if (!PageWriteback(page)) {
                                                   >> 1705                         page = compound_head(page);
                                                   >> 1706                         delete_from_swap_cache(page);
                                                   >> 1707                         SetPageDirty(page);
                                                   >> 1708                 } else {
                                                   >> 1709                         swp_entry_t entry;
                                                   >> 1710                         struct swap_info_struct *p;
                                                   >> 1711 
                                                   >> 1712                         entry.val = page_private(page);
                                                   >> 1713                         p = swap_info_get(entry);
                                                   >> 1714                         if (p->flags & SWP_STABLE_WRITES) {
                                                   >> 1715                                 spin_unlock(&p->lock);
                                                   >> 1716                                 return false;
                                                   >> 1717                         }
                                                   >> 1718                         spin_unlock(&p->lock);
                                                   >> 1719                 }
                                                   >> 1720         }
                                                   >> 1721 
                                                   >> 1722         return count <= 1;
                                                   >> 1723 }
                                                   >> 1724 
                                                   >> 1725 /*
                                                   >> 1726  * If swap is getting full, or if there are no more mappings of this page,
                                                   >> 1727  * then try_to_free_swap is called to free its swap space.
                                                   >> 1728  */
                                                   >> 1729 int try_to_free_swap(struct page *page)
                                                   >> 1730 {
                                                   >> 1731         VM_BUG_ON_PAGE(!PageLocked(page), page);
                                                   >> 1732 
                                                   >> 1733         if (!PageSwapCache(page))
                                                   >> 1734                 return 0;
                                                   >> 1735         if (PageWriteback(page))
                                                   >> 1736                 return 0;
                                                   >> 1737         if (page_swapped(page))
                                                   >> 1738                 return 0;
1822                                                  1739 
1823         /*                                       1740         /*
1824          * Once hibernation has begun to crea    1741          * Once hibernation has begun to create its image of memory,
1825          * there's a danger that one of the c !! 1742          * there's a danger that one of the calls to try_to_free_swap()
1826          * - most probably a call from __try_    1743          * - most probably a call from __try_to_reclaim_swap() while
1827          * hibernation is allocating its own     1744          * hibernation is allocating its own swap pages for the image,
1828          * but conceivably even a call from m    1745          * but conceivably even a call from memory reclaim - will free
1829          * the swap from a folio which has al !! 1746          * the swap from a page which has already been recorded in the
1830          * image as a clean swapcache folio,  !! 1747          * image as a clean swapcache page, and then reuse its swap for
1831          * another page of the image.  On wak    1748          * another page of the image.  On waking from hibernation, the
1832          * original folio might be freed unde !! 1749          * original page might be freed under memory pressure, then
1833          * later read back in from swap, now     1750          * later read back in from swap, now with the wrong data.
1834          *                                       1751          *
1835          * Hibernation suspends storage while    1752          * Hibernation suspends storage while it is writing the image
1836          * to disk so check that here.           1753          * to disk so check that here.
1837          */                                      1754          */
1838         if (pm_suspended_storage())              1755         if (pm_suspended_storage())
1839                 return false;                 !! 1756                 return 0;
1840                                               << 
1841         return true;                          << 
1842 }                                             << 
1843                                               << 
1844 /**                                           << 
1845  * folio_free_swap() - Free the swap space us << 
1846  * @folio: The folio to remove.               << 
1847  *                                            << 
1848  * If swap is getting full, or if there are n << 
1849  * then call folio_free_swap to free its swap << 
1850  *                                            << 
1851  * Return: true if we were able to release th << 
1852  */                                           << 
1853 bool folio_free_swap(struct folio *folio)     << 
1854 {                                             << 
1855         if (!folio_swapcache_freeable(folio)) << 
1856                 return false;                 << 
1857         if (folio_swapped(folio))             << 
1858                 return false;                 << 
1859                                                  1757 
1860         delete_from_swap_cache(folio);        !! 1758         page = compound_head(page);
1861         folio_set_dirty(folio);               !! 1759         delete_from_swap_cache(page);
1862         return true;                          !! 1760         SetPageDirty(page);
                                                   >> 1761         return 1;
1863 }                                                1762 }
1864                                                  1763 
1865 /**                                           !! 1764 /*
1866  * free_swap_and_cache_nr() - Release referen !! 1765  * Free the swap entry like above, but also try to
1867  *                            reclaim their c !! 1766  * free the page cache entry if it is the last user.
1868  * @entry: First entry of range.              << 
1869  * @nr: Number of entries in range.           << 
1870  *                                            << 
1871  * For each swap entry in the contiguous rang << 
1872  * entries become free, try to reclaim their  << 
1873  * offset range is defined by [entry.offset,  << 
1874  */                                              1767  */
1875 void free_swap_and_cache_nr(swp_entry_t entry !! 1768 int free_swap_and_cache(swp_entry_t entry)
1876 {                                                1769 {
1877         const unsigned long start_offset = sw !! 1770         struct swap_info_struct *p;
1878         const unsigned long end_offset = star !! 1771         unsigned char count;
1879         struct swap_info_struct *si;          << 
1880         bool any_only_cache = false;          << 
1881         unsigned long offset;                 << 
1882                                                  1772 
1883         if (non_swap_entry(entry))               1773         if (non_swap_entry(entry))
1884                 return;                       !! 1774                 return 1;
1885                                               << 
1886         si = get_swap_device(entry);          << 
1887         if (!si)                              << 
1888                 return;                       << 
1889                                                  1775 
1890         if (WARN_ON(end_offset > si->max))    !! 1776         p = get_swap_device(entry);
1891                 goto out;                     !! 1777         if (p) {
1892                                               !! 1778                 if (WARN_ON(data_race(!p->swap_map[swp_offset(entry)]))) {
1893         /*                                    !! 1779                         put_swap_device(p);
1894          * First free all entries in the rang !! 1780                         return 0;
1895          */                                   << 
1896         any_only_cache = __swap_entries_free( << 
1897                                               << 
1898         /*                                    << 
1899          * Short-circuit the below loop if no << 
1900          * reference drop to zero.            << 
1901          */                                   << 
1902         if (!any_only_cache)                  << 
1903                 goto out;                     << 
1904                                               << 
1905         /*                                    << 
1906          * Now go back over the range trying  << 
1907          * more efficient for large folios be << 
1908          * the swap once per folio in the com << 
1909          * __swap_entry_free() and __try_to_r << 
1910          * latter will get a reference and lo << 
1911          * page but will only succeed once th << 
1912          * zero.                              << 
1913          */                                   << 
1914         for (offset = start_offset; offset <  << 
1915                 nr = 1;                       << 
1916                 if (READ_ONCE(si->swap_map[of << 
1917                         /*                    << 
1918                          * Folios are always  << 
1919                          * advance forward to << 
1920                          * folio was found fo << 
1921                          * in this case. Nega << 
1922                          * but could not be r << 
1923                          * to the next bounda << 
1924                          */                   << 
1925                         nr = __try_to_reclaim << 
1926                                               << 
1927                         if (nr == 0)          << 
1928                                 nr = 1;       << 
1929                         else if (nr < 0)      << 
1930                                 nr = -nr;     << 
1931                         nr = ALIGN(offset + 1 << 
1932                 }                                1781                 }
1933         }                                     << 
1934                                                  1782 
1935 out:                                          !! 1783                 count = __swap_entry_free(p, entry);
1936         put_swap_device(si);                  !! 1784                 if (count == SWAP_HAS_CACHE &&
                                                   >> 1785                     !swap_page_trans_huge_swapped(p, entry))
                                                   >> 1786                         __try_to_reclaim_swap(p, swp_offset(entry),
                                                   >> 1787                                               TTRS_UNMAPPED | TTRS_FULL);
                                                   >> 1788                 put_swap_device(p);
                                                   >> 1789         }
                                                   >> 1790         return p != NULL;
1937 }                                                1791 }
1938                                                  1792 
1939 #ifdef CONFIG_HIBERNATION                        1793 #ifdef CONFIG_HIBERNATION
1940                                                  1794 
1941 swp_entry_t get_swap_page_of_type(int type)      1795 swp_entry_t get_swap_page_of_type(int type)
1942 {                                                1796 {
1943         struct swap_info_struct *si = swap_ty    1797         struct swap_info_struct *si = swap_type_to_swap_info(type);
1944         swp_entry_t entry = {0};                 1798         swp_entry_t entry = {0};
1945                                                  1799 
1946         if (!si)                                 1800         if (!si)
1947                 goto fail;                       1801                 goto fail;
1948                                                  1802 
1949         /* This is called for allocating swap    1803         /* This is called for allocating swap entry, not cache */
1950         spin_lock(&si->lock);                    1804         spin_lock(&si->lock);
1951         if ((si->flags & SWP_WRITEOK) && scan !! 1805         if ((si->flags & SWP_WRITEOK) && scan_swap_map_slots(si, 1, 1, &entry))
1952                 atomic_long_dec(&nr_swap_page    1806                 atomic_long_dec(&nr_swap_pages);
1953         spin_unlock(&si->lock);                  1807         spin_unlock(&si->lock);
1954 fail:                                            1808 fail:
1955         return entry;                            1809         return entry;
1956 }                                                1810 }
1957                                                  1811 
1958 /*                                               1812 /*
1959  * Find the swap type that corresponds to giv    1813  * Find the swap type that corresponds to given device (if any).
1960  *                                               1814  *
1961  * @offset - number of the PAGE_SIZE-sized bl    1815  * @offset - number of the PAGE_SIZE-sized block of the device, starting
1962  * from 0, in which the swap header is expect    1816  * from 0, in which the swap header is expected to be located.
1963  *                                               1817  *
1964  * This is needed for the suspend to disk (ak    1818  * This is needed for the suspend to disk (aka swsusp).
1965  */                                              1819  */
1966 int swap_type_of(dev_t device, sector_t offse    1820 int swap_type_of(dev_t device, sector_t offset)
1967 {                                                1821 {
1968         int type;                                1822         int type;
1969                                                  1823 
1970         if (!device)                             1824         if (!device)
1971                 return -1;                       1825                 return -1;
1972                                                  1826 
1973         spin_lock(&swap_lock);                   1827         spin_lock(&swap_lock);
1974         for (type = 0; type < nr_swapfiles; t    1828         for (type = 0; type < nr_swapfiles; type++) {
1975                 struct swap_info_struct *sis     1829                 struct swap_info_struct *sis = swap_info[type];
1976                                                  1830 
1977                 if (!(sis->flags & SWP_WRITEO    1831                 if (!(sis->flags & SWP_WRITEOK))
1978                         continue;                1832                         continue;
1979                                                  1833 
1980                 if (device == sis->bdev->bd_d    1834                 if (device == sis->bdev->bd_dev) {
1981                         struct swap_extent *s    1835                         struct swap_extent *se = first_se(sis);
1982                                                  1836 
1983                         if (se->start_block =    1837                         if (se->start_block == offset) {
1984                                 spin_unlock(&    1838                                 spin_unlock(&swap_lock);
1985                                 return type;     1839                                 return type;
1986                         }                        1840                         }
1987                 }                                1841                 }
1988         }                                        1842         }
1989         spin_unlock(&swap_lock);                 1843         spin_unlock(&swap_lock);
1990         return -ENODEV;                          1844         return -ENODEV;
1991 }                                                1845 }
1992                                                  1846 
1993 int find_first_swap(dev_t *device)               1847 int find_first_swap(dev_t *device)
1994 {                                                1848 {
1995         int type;                                1849         int type;
1996                                                  1850 
1997         spin_lock(&swap_lock);                   1851         spin_lock(&swap_lock);
1998         for (type = 0; type < nr_swapfiles; t    1852         for (type = 0; type < nr_swapfiles; type++) {
1999                 struct swap_info_struct *sis     1853                 struct swap_info_struct *sis = swap_info[type];
2000                                                  1854 
2001                 if (!(sis->flags & SWP_WRITEO    1855                 if (!(sis->flags & SWP_WRITEOK))
2002                         continue;                1856                         continue;
2003                 *device = sis->bdev->bd_dev;     1857                 *device = sis->bdev->bd_dev;
2004                 spin_unlock(&swap_lock);         1858                 spin_unlock(&swap_lock);
2005                 return type;                     1859                 return type;
2006         }                                        1860         }
2007         spin_unlock(&swap_lock);                 1861         spin_unlock(&swap_lock);
2008         return -ENODEV;                          1862         return -ENODEV;
2009 }                                                1863 }
2010                                                  1864 
2011 /*                                               1865 /*
2012  * Get the (PAGE_SIZE) block corresponding to    1866  * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev
2013  * corresponding to given index in swap_info     1867  * corresponding to given index in swap_info (swap type).
2014  */                                              1868  */
2015 sector_t swapdev_block(int type, pgoff_t offs    1869 sector_t swapdev_block(int type, pgoff_t offset)
2016 {                                                1870 {
2017         struct swap_info_struct *si = swap_ty    1871         struct swap_info_struct *si = swap_type_to_swap_info(type);
2018         struct swap_extent *se;                  1872         struct swap_extent *se;
2019                                                  1873 
2020         if (!si || !(si->flags & SWP_WRITEOK)    1874         if (!si || !(si->flags & SWP_WRITEOK))
2021                 return 0;                        1875                 return 0;
2022         se = offset_to_swap_extent(si, offset    1876         se = offset_to_swap_extent(si, offset);
2023         return se->start_block + (offset - se    1877         return se->start_block + (offset - se->start_page);
2024 }                                                1878 }
2025                                                  1879 
2026 /*                                               1880 /*
2027  * Return either the total number of swap pag    1881  * Return either the total number of swap pages of given type, or the number
2028  * of free pages of that type (depending on @    1882  * of free pages of that type (depending on @free)
2029  *                                               1883  *
2030  * This is needed for software suspend           1884  * This is needed for software suspend
2031  */                                              1885  */
2032 unsigned int count_swap_pages(int type, int f    1886 unsigned int count_swap_pages(int type, int free)
2033 {                                                1887 {
2034         unsigned int n = 0;                      1888         unsigned int n = 0;
2035                                                  1889 
2036         spin_lock(&swap_lock);                   1890         spin_lock(&swap_lock);
2037         if ((unsigned int)type < nr_swapfiles    1891         if ((unsigned int)type < nr_swapfiles) {
2038                 struct swap_info_struct *sis     1892                 struct swap_info_struct *sis = swap_info[type];
2039                                                  1893 
2040                 spin_lock(&sis->lock);           1894                 spin_lock(&sis->lock);
2041                 if (sis->flags & SWP_WRITEOK)    1895                 if (sis->flags & SWP_WRITEOK) {
2042                         n = sis->pages;          1896                         n = sis->pages;
2043                         if (free)                1897                         if (free)
2044                                 n -= sis->inu    1898                                 n -= sis->inuse_pages;
2045                 }                                1899                 }
2046                 spin_unlock(&sis->lock);         1900                 spin_unlock(&sis->lock);
2047         }                                        1901         }
2048         spin_unlock(&swap_lock);                 1902         spin_unlock(&swap_lock);
2049         return n;                                1903         return n;
2050 }                                                1904 }
2051 #endif /* CONFIG_HIBERNATION */                  1905 #endif /* CONFIG_HIBERNATION */
2052                                                  1906 
2053 static inline int pte_same_as_swp(pte_t pte,     1907 static inline int pte_same_as_swp(pte_t pte, pte_t swp_pte)
2054 {                                                1908 {
2055         return pte_same(pte_swp_clear_flags(p    1909         return pte_same(pte_swp_clear_flags(pte), swp_pte);
2056 }                                                1910 }
2057                                                  1911 
2058 /*                                               1912 /*
2059  * No need to decide whether this PTE shares     1913  * No need to decide whether this PTE shares the swap entry with others,
2060  * just let do_wp_page work it out if a write    1914  * just let do_wp_page work it out if a write is requested later - to
2061  * force COW, vm_page_prot omits write permis    1915  * force COW, vm_page_prot omits write permission from any private vma.
2062  */                                              1916  */
2063 static int unuse_pte(struct vm_area_struct *v    1917 static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
2064                 unsigned long addr, swp_entry !! 1918                 unsigned long addr, swp_entry_t entry, struct page *page)
2065 {                                                1919 {
2066         struct page *page;                    !! 1920         struct page *swapcache;
2067         struct folio *swapcache;              << 
2068         spinlock_t *ptl;                         1921         spinlock_t *ptl;
2069         pte_t *pte, new_pte, old_pte;         !! 1922         pte_t *pte;
2070         bool hwpoisoned = false;              << 
2071         int ret = 1;                             1923         int ret = 1;
2072                                                  1924 
2073         swapcache = folio;                    !! 1925         swapcache = page;
2074         folio = ksm_might_need_to_copy(folio, !! 1926         page = ksm_might_need_to_copy(page, vma, addr);
2075         if (unlikely(!folio))                 !! 1927         if (unlikely(!page))
2076                 return -ENOMEM;                  1928                 return -ENOMEM;
2077         else if (unlikely(folio == ERR_PTR(-E << 
2078                 hwpoisoned = true;            << 
2079                 folio = swapcache;            << 
2080         }                                     << 
2081                                               << 
2082         page = folio_file_page(folio, swp_off << 
2083         if (PageHWPoison(page))               << 
2084                 hwpoisoned = true;            << 
2085                                                  1929 
2086         pte = pte_offset_map_lock(vma->vm_mm,    1930         pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
2087         if (unlikely(!pte || !pte_same_as_swp !! 1931         if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) {
2088                                               << 
2089                 ret = 0;                         1932                 ret = 0;
2090                 goto out;                        1933                 goto out;
2091         }                                        1934         }
2092                                                  1935 
2093         old_pte = ptep_get(pte);              << 
2094                                               << 
2095         if (unlikely(hwpoisoned || !folio_tes << 
2096                 swp_entry_t swp_entry;        << 
2097                                               << 
2098                 dec_mm_counter(vma->vm_mm, MM << 
2099                 if (hwpoisoned) {             << 
2100                         swp_entry = make_hwpo << 
2101                 } else {                      << 
2102                         swp_entry = make_pois << 
2103                 }                             << 
2104                 new_pte = swp_entry_to_pte(sw << 
2105                 ret = 0;                      << 
2106                 goto setpte;                  << 
2107         }                                     << 
2108                                               << 
2109         /*                                    << 
2110          * Some architectures may have to res << 
2111          * when reading from swap. This metad << 
2112          * so this must be called before swap << 
2113          */                                   << 
2114         arch_swap_restore(folio_swap(entry, f << 
2115                                               << 
2116         dec_mm_counter(vma->vm_mm, MM_SWAPENT    1936         dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
2117         inc_mm_counter(vma->vm_mm, MM_ANONPAG    1937         inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
2118         folio_get(folio);                     !! 1938         get_page(page);
2119         if (folio == swapcache) {             !! 1939         set_pte_at(vma->vm_mm, addr, pte,
2120                 rmap_t rmap_flags = RMAP_NONE !! 1940                    pte_mkold(mk_pte(page, vma->vm_page_prot)));
2121                                               !! 1941         if (page == swapcache) {
2122                 /*                            !! 1942                 page_add_anon_rmap(page, vma, addr, false);
2123                  * See do_swap_page(): writeb << 
2124                  * However, we do a folio_wai << 
2125                  * call and have the folio lo << 
2126                  */                           << 
2127                 VM_BUG_ON_FOLIO(folio_test_wr << 
2128                 if (pte_swp_exclusive(old_pte << 
2129                         rmap_flags |= RMAP_EX << 
2130                 /*                            << 
2131                  * We currently only expect s << 
2132                  * fully exclusive or fully s << 
2133                  * here, we have to be carefu << 
2134                  */                           << 
2135                 if (!folio_test_anon(folio))  << 
2136                         VM_WARN_ON_ONCE(folio << 
2137                         VM_WARN_ON_FOLIO(!fol << 
2138                         folio_add_new_anon_rm << 
2139                 } else {                      << 
2140                         folio_add_anon_rmap_p << 
2141                 }                             << 
2142         } else { /* ksm created a completely     1943         } else { /* ksm created a completely new copy */
2143                 folio_add_new_anon_rmap(folio !! 1944                 page_add_new_anon_rmap(page, vma, addr, false);
2144                 folio_add_lru_vma(folio, vma) !! 1945                 lru_cache_add_inactive_or_unevictable(page, vma);
2145         }                                        1946         }
2146         new_pte = pte_mkold(mk_pte(page, vma- << 
2147         if (pte_swp_soft_dirty(old_pte))      << 
2148                 new_pte = pte_mksoft_dirty(ne << 
2149         if (pte_swp_uffd_wp(old_pte))         << 
2150                 new_pte = pte_mkuffd_wp(new_p << 
2151 setpte:                                       << 
2152         set_pte_at(vma->vm_mm, addr, pte, new << 
2153         swap_free(entry);                        1947         swap_free(entry);
2154 out:                                             1948 out:
2155         if (pte)                              !! 1949         pte_unmap_unlock(pte, ptl);
2156                 pte_unmap_unlock(pte, ptl);   !! 1950         if (page != swapcache) {
2157         if (folio != swapcache) {             !! 1951                 unlock_page(page);
2158                 folio_unlock(folio);          !! 1952                 put_page(page);
2159                 folio_put(folio);             << 
2160         }                                        1953         }
2161         return ret;                              1954         return ret;
2162 }                                                1955 }
2163                                                  1956 
2164 static int unuse_pte_range(struct vm_area_str    1957 static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
2165                         unsigned long addr, u    1958                         unsigned long addr, unsigned long end,
2166                         unsigned int type)    !! 1959                         unsigned int type, bool frontswap,
                                                   >> 1960                         unsigned long *fs_pages_to_unuse)
2167 {                                                1961 {
2168         pte_t *pte = NULL;                    !! 1962         struct page *page;
                                                   >> 1963         swp_entry_t entry;
                                                   >> 1964         pte_t *pte;
2169         struct swap_info_struct *si;             1965         struct swap_info_struct *si;
                                                   >> 1966         unsigned long offset;
                                                   >> 1967         int ret = 0;
                                                   >> 1968         volatile unsigned char *swap_map;
2170                                                  1969 
2171         si = swap_info[type];                    1970         si = swap_info[type];
                                                   >> 1971         pte = pte_offset_map(pmd, addr);
2172         do {                                     1972         do {
2173                 struct folio *folio;          !! 1973                 if (!is_swap_pte(*pte))
2174                 unsigned long offset;         << 
2175                 unsigned char swp_count;      << 
2176                 swp_entry_t entry;            << 
2177                 int ret;                      << 
2178                 pte_t ptent;                  << 
2179                                               << 
2180                 if (!pte++) {                 << 
2181                         pte = pte_offset_map( << 
2182                         if (!pte)             << 
2183                                 break;        << 
2184                 }                             << 
2185                                               << 
2186                 ptent = ptep_get_lockless(pte << 
2187                                               << 
2188                 if (!is_swap_pte(ptent))      << 
2189                         continue;                1974                         continue;
2190                                                  1975 
2191                 entry = pte_to_swp_entry(pten !! 1976                 entry = pte_to_swp_entry(*pte);
2192                 if (swp_type(entry) != type)     1977                 if (swp_type(entry) != type)
2193                         continue;                1978                         continue;
2194                                                  1979 
2195                 offset = swp_offset(entry);      1980                 offset = swp_offset(entry);
2196                 pte_unmap(pte);               !! 1981                 if (frontswap && !frontswap_test(si, offset))
2197                 pte = NULL;                   !! 1982                         continue;
2198                                                  1983 
2199                 folio = swap_cache_get_folio( !! 1984                 pte_unmap(pte);
2200                 if (!folio) {                 !! 1985                 swap_map = &si->swap_map[offset];
                                                   >> 1986                 page = lookup_swap_cache(entry, vma, addr);
                                                   >> 1987                 if (!page) {
2201                         struct vm_fault vmf =    1988                         struct vm_fault vmf = {
2202                                 .vma = vma,      1989                                 .vma = vma,
2203                                 .address = ad    1990                                 .address = addr,
2204                                 .real_address << 
2205                                 .pmd = pmd,      1991                                 .pmd = pmd,
2206                         };                       1992                         };
2207                                                  1993 
2208                         folio = swapin_readah !! 1994                         page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
2209                                                  1995                                                 &vmf);
2210                 }                                1996                 }
2211                 if (!folio) {                 !! 1997                 if (!page) {
2212                         swp_count = READ_ONCE !! 1998                         if (*swap_map == 0 || *swap_map == SWAP_MAP_BAD)
2213                         if (swp_count == 0 || !! 1999                                 goto try_next;
2214                                 continue;     << 
2215                         return -ENOMEM;          2000                         return -ENOMEM;
2216                 }                                2001                 }
2217                                                  2002 
2218                 folio_lock(folio);            !! 2003                 lock_page(page);
2219                 folio_wait_writeback(folio);  !! 2004                 wait_on_page_writeback(page);
2220                 ret = unuse_pte(vma, pmd, add !! 2005                 ret = unuse_pte(vma, pmd, addr, entry, page);
2221                 if (ret < 0) {                   2006                 if (ret < 0) {
2222                         folio_unlock(folio);  !! 2007                         unlock_page(page);
2223                         folio_put(folio);     !! 2008                         put_page(page);
2224                         return ret;           !! 2009                         goto out;
2225                 }                                2010                 }
2226                                                  2011 
2227                 folio_free_swap(folio);       !! 2012                 try_to_free_swap(page);
2228                 folio_unlock(folio);          !! 2013                 unlock_page(page);
2229                 folio_put(folio);             !! 2014                 put_page(page);
2230         } while (addr += PAGE_SIZE, addr != e << 
2231                                                  2015 
2232         if (pte)                              !! 2016                 if (*fs_pages_to_unuse && !--(*fs_pages_to_unuse)) {
2233                 pte_unmap(pte);               !! 2017                         ret = FRONTSWAP_PAGES_UNUSED;
2234         return 0;                             !! 2018                         goto out;
                                                   >> 2019                 }
                                                   >> 2020 try_next:
                                                   >> 2021                 pte = pte_offset_map(pmd, addr);
                                                   >> 2022         } while (pte++, addr += PAGE_SIZE, addr != end);
                                                   >> 2023         pte_unmap(pte - 1);
                                                   >> 2024 
                                                   >> 2025         ret = 0;
                                                   >> 2026 out:
                                                   >> 2027         return ret;
2235 }                                                2028 }
2236                                                  2029 
2237 static inline int unuse_pmd_range(struct vm_a    2030 static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
2238                                 unsigned long    2031                                 unsigned long addr, unsigned long end,
2239                                 unsigned int  !! 2032                                 unsigned int type, bool frontswap,
                                                   >> 2033                                 unsigned long *fs_pages_to_unuse)
2240 {                                                2034 {
2241         pmd_t *pmd;                              2035         pmd_t *pmd;
2242         unsigned long next;                      2036         unsigned long next;
2243         int ret;                                 2037         int ret;
2244                                                  2038 
2245         pmd = pmd_offset(pud, addr);             2039         pmd = pmd_offset(pud, addr);
2246         do {                                     2040         do {
2247                 cond_resched();                  2041                 cond_resched();
2248                 next = pmd_addr_end(addr, end    2042                 next = pmd_addr_end(addr, end);
2249                 ret = unuse_pte_range(vma, pm !! 2043                 if (pmd_none_or_trans_huge_or_clear_bad(pmd))
                                                   >> 2044                         continue;
                                                   >> 2045                 ret = unuse_pte_range(vma, pmd, addr, next, type,
                                                   >> 2046                                       frontswap, fs_pages_to_unuse);
2250                 if (ret)                         2047                 if (ret)
2251                         return ret;              2048                         return ret;
2252         } while (pmd++, addr = next, addr !=     2049         } while (pmd++, addr = next, addr != end);
2253         return 0;                                2050         return 0;
2254 }                                                2051 }
2255                                                  2052 
2256 static inline int unuse_pud_range(struct vm_a    2053 static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d,
2257                                 unsigned long    2054                                 unsigned long addr, unsigned long end,
2258                                 unsigned int  !! 2055                                 unsigned int type, bool frontswap,
                                                   >> 2056                                 unsigned long *fs_pages_to_unuse)
2259 {                                                2057 {
2260         pud_t *pud;                              2058         pud_t *pud;
2261         unsigned long next;                      2059         unsigned long next;
2262         int ret;                                 2060         int ret;
2263                                                  2061 
2264         pud = pud_offset(p4d, addr);             2062         pud = pud_offset(p4d, addr);
2265         do {                                     2063         do {
2266                 next = pud_addr_end(addr, end    2064                 next = pud_addr_end(addr, end);
2267                 if (pud_none_or_clear_bad(pud    2065                 if (pud_none_or_clear_bad(pud))
2268                         continue;                2066                         continue;
2269                 ret = unuse_pmd_range(vma, pu !! 2067                 ret = unuse_pmd_range(vma, pud, addr, next, type,
                                                   >> 2068                                       frontswap, fs_pages_to_unuse);
2270                 if (ret)                         2069                 if (ret)
2271                         return ret;              2070                         return ret;
2272         } while (pud++, addr = next, addr !=     2071         } while (pud++, addr = next, addr != end);
2273         return 0;                                2072         return 0;
2274 }                                                2073 }
2275                                                  2074 
2276 static inline int unuse_p4d_range(struct vm_a    2075 static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd,
2277                                 unsigned long    2076                                 unsigned long addr, unsigned long end,
2278                                 unsigned int  !! 2077                                 unsigned int type, bool frontswap,
                                                   >> 2078                                 unsigned long *fs_pages_to_unuse)
2279 {                                                2079 {
2280         p4d_t *p4d;                              2080         p4d_t *p4d;
2281         unsigned long next;                      2081         unsigned long next;
2282         int ret;                                 2082         int ret;
2283                                                  2083 
2284         p4d = p4d_offset(pgd, addr);             2084         p4d = p4d_offset(pgd, addr);
2285         do {                                     2085         do {
2286                 next = p4d_addr_end(addr, end    2086                 next = p4d_addr_end(addr, end);
2287                 if (p4d_none_or_clear_bad(p4d    2087                 if (p4d_none_or_clear_bad(p4d))
2288                         continue;                2088                         continue;
2289                 ret = unuse_pud_range(vma, p4 !! 2089                 ret = unuse_pud_range(vma, p4d, addr, next, type,
                                                   >> 2090                                       frontswap, fs_pages_to_unuse);
2290                 if (ret)                         2091                 if (ret)
2291                         return ret;              2092                         return ret;
2292         } while (p4d++, addr = next, addr !=     2093         } while (p4d++, addr = next, addr != end);
2293         return 0;                                2094         return 0;
2294 }                                                2095 }
2295                                                  2096 
2296 static int unuse_vma(struct vm_area_struct *v !! 2097 static int unuse_vma(struct vm_area_struct *vma, unsigned int type,
                                                   >> 2098                      bool frontswap, unsigned long *fs_pages_to_unuse)
2297 {                                                2099 {
2298         pgd_t *pgd;                              2100         pgd_t *pgd;
2299         unsigned long addr, end, next;           2101         unsigned long addr, end, next;
2300         int ret;                                 2102         int ret;
2301                                                  2103 
2302         addr = vma->vm_start;                    2104         addr = vma->vm_start;
2303         end = vma->vm_end;                       2105         end = vma->vm_end;
2304                                                  2106 
2305         pgd = pgd_offset(vma->vm_mm, addr);      2107         pgd = pgd_offset(vma->vm_mm, addr);
2306         do {                                     2108         do {
2307                 next = pgd_addr_end(addr, end    2109                 next = pgd_addr_end(addr, end);
2308                 if (pgd_none_or_clear_bad(pgd    2110                 if (pgd_none_or_clear_bad(pgd))
2309                         continue;                2111                         continue;
2310                 ret = unuse_p4d_range(vma, pg !! 2112                 ret = unuse_p4d_range(vma, pgd, addr, next, type,
                                                   >> 2113                                       frontswap, fs_pages_to_unuse);
2311                 if (ret)                         2114                 if (ret)
2312                         return ret;              2115                         return ret;
2313         } while (pgd++, addr = next, addr !=     2116         } while (pgd++, addr = next, addr != end);
2314         return 0;                                2117         return 0;
2315 }                                                2118 }
2316                                                  2119 
2317 static int unuse_mm(struct mm_struct *mm, uns !! 2120 static int unuse_mm(struct mm_struct *mm, unsigned int type,
                                                   >> 2121                     bool frontswap, unsigned long *fs_pages_to_unuse)
2318 {                                                2122 {
2319         struct vm_area_struct *vma;              2123         struct vm_area_struct *vma;
2320         int ret = 0;                             2124         int ret = 0;
2321         VMA_ITERATOR(vmi, mm, 0);             << 
2322                                                  2125 
2323         mmap_read_lock(mm);                      2126         mmap_read_lock(mm);
2324         for_each_vma(vmi, vma) {              !! 2127         for (vma = mm->mmap; vma; vma = vma->vm_next) {
2325                 if (vma->anon_vma && !is_vm_h    2128                 if (vma->anon_vma && !is_vm_hugetlb_page(vma)) {
2326                         ret = unuse_vma(vma,  !! 2129                         ret = unuse_vma(vma, type, frontswap,
                                                   >> 2130                                         fs_pages_to_unuse);
2327                         if (ret)                 2131                         if (ret)
2328                                 break;           2132                                 break;
2329                 }                                2133                 }
2330                                               << 
2331                 cond_resched();                  2134                 cond_resched();
2332         }                                        2135         }
2333         mmap_read_unlock(mm);                    2136         mmap_read_unlock(mm);
2334         return ret;                              2137         return ret;
2335 }                                                2138 }
2336                                                  2139 
2337 /*                                               2140 /*
2338  * Scan swap_map from current position to nex !! 2141  * Scan swap_map (or frontswap_map if frontswap parameter is true)
2339  * Return 0 if there are no inuse entries aft !! 2142  * from current position to next entry still in use. Return 0
2340  * the map.                                   !! 2143  * if there are no inuse entries after prev till end of the map.
2341  */                                              2144  */
2342 static unsigned int find_next_to_unuse(struct    2145 static unsigned int find_next_to_unuse(struct swap_info_struct *si,
2343                                         unsig !! 2146                                         unsigned int prev, bool frontswap)
2344 {                                                2147 {
2345         unsigned int i;                          2148         unsigned int i;
2346         unsigned char count;                     2149         unsigned char count;
2347                                                  2150 
2348         /*                                       2151         /*
2349          * No need for swap_lock here: we're     2152          * No need for swap_lock here: we're just looking
2350          * for whether an entry is in use, no    2153          * for whether an entry is in use, not modifying it; false
2351          * hits are okay, and sys_swapoff() h    2154          * hits are okay, and sys_swapoff() has already prevented new
2352          * allocations from this area (while     2155          * allocations from this area (while holding swap_lock).
2353          */                                      2156          */
2354         for (i = prev + 1; i < si->max; i++)     2157         for (i = prev + 1; i < si->max; i++) {
2355                 count = READ_ONCE(si->swap_ma    2158                 count = READ_ONCE(si->swap_map[i]);
2356                 if (count && swap_count(count    2159                 if (count && swap_count(count) != SWAP_MAP_BAD)
2357                         break;                !! 2160                         if (!frontswap || frontswap_test(si, i))
                                                   >> 2161                                 break;
2358                 if ((i % LATENCY_LIMIT) == 0)    2162                 if ((i % LATENCY_LIMIT) == 0)
2359                         cond_resched();          2163                         cond_resched();
2360         }                                        2164         }
2361                                                  2165 
2362         if (i == si->max)                        2166         if (i == si->max)
2363                 i = 0;                           2167                 i = 0;
2364                                                  2168 
2365         return i;                                2169         return i;
2366 }                                                2170 }
2367                                                  2171 
2368 static int try_to_unuse(unsigned int type)    !! 2172 /*
                                                   >> 2173  * If the boolean frontswap is true, only unuse pages_to_unuse pages;
                                                   >> 2174  * pages_to_unuse==0 means all pages; ignored if frontswap is false
                                                   >> 2175  */
                                                   >> 2176 int try_to_unuse(unsigned int type, bool frontswap,
                                                   >> 2177                  unsigned long pages_to_unuse)
2369 {                                                2178 {
2370         struct mm_struct *prev_mm;               2179         struct mm_struct *prev_mm;
2371         struct mm_struct *mm;                    2180         struct mm_struct *mm;
2372         struct list_head *p;                     2181         struct list_head *p;
2373         int retval = 0;                          2182         int retval = 0;
2374         struct swap_info_struct *si = swap_in    2183         struct swap_info_struct *si = swap_info[type];
2375         struct folio *folio;                  !! 2184         struct page *page;
2376         swp_entry_t entry;                       2185         swp_entry_t entry;
2377         unsigned int i;                          2186         unsigned int i;
2378                                                  2187 
2379         if (!READ_ONCE(si->inuse_pages))         2188         if (!READ_ONCE(si->inuse_pages))
2380                 goto success;                 !! 2189                 return 0;
                                                   >> 2190 
                                                   >> 2191         if (!frontswap)
                                                   >> 2192                 pages_to_unuse = 0;
2381                                                  2193 
2382 retry:                                           2194 retry:
2383         retval = shmem_unuse(type);           !! 2195         retval = shmem_unuse(type, frontswap, &pages_to_unuse);
2384         if (retval)                              2196         if (retval)
2385                 return retval;                !! 2197                 goto out;
2386                                                  2198 
2387         prev_mm = &init_mm;                      2199         prev_mm = &init_mm;
2388         mmget(prev_mm);                          2200         mmget(prev_mm);
2389                                                  2201 
2390         spin_lock(&mmlist_lock);                 2202         spin_lock(&mmlist_lock);
2391         p = &init_mm.mmlist;                     2203         p = &init_mm.mmlist;
2392         while (READ_ONCE(si->inuse_pages) &&     2204         while (READ_ONCE(si->inuse_pages) &&
2393                !signal_pending(current) &&       2205                !signal_pending(current) &&
2394                (p = p->next) != &init_mm.mmli    2206                (p = p->next) != &init_mm.mmlist) {
2395                                                  2207 
2396                 mm = list_entry(p, struct mm_    2208                 mm = list_entry(p, struct mm_struct, mmlist);
2397                 if (!mmget_not_zero(mm))         2209                 if (!mmget_not_zero(mm))
2398                         continue;                2210                         continue;
2399                 spin_unlock(&mmlist_lock);       2211                 spin_unlock(&mmlist_lock);
2400                 mmput(prev_mm);                  2212                 mmput(prev_mm);
2401                 prev_mm = mm;                    2213                 prev_mm = mm;
2402                 retval = unuse_mm(mm, type);  !! 2214                 retval = unuse_mm(mm, type, frontswap, &pages_to_unuse);
                                                   >> 2215 
2403                 if (retval) {                    2216                 if (retval) {
2404                         mmput(prev_mm);          2217                         mmput(prev_mm);
2405                         return retval;        !! 2218                         goto out;
2406                 }                                2219                 }
2407                                                  2220 
2408                 /*                               2221                 /*
2409                  * Make sure that we aren't c    2222                  * Make sure that we aren't completely killing
2410                  * interactive performance.      2223                  * interactive performance.
2411                  */                              2224                  */
2412                 cond_resched();                  2225                 cond_resched();
2413                 spin_lock(&mmlist_lock);         2226                 spin_lock(&mmlist_lock);
2414         }                                        2227         }
2415         spin_unlock(&mmlist_lock);               2228         spin_unlock(&mmlist_lock);
2416                                                  2229 
2417         mmput(prev_mm);                          2230         mmput(prev_mm);
2418                                                  2231 
2419         i = 0;                                   2232         i = 0;
2420         while (READ_ONCE(si->inuse_pages) &&     2233         while (READ_ONCE(si->inuse_pages) &&
2421                !signal_pending(current) &&       2234                !signal_pending(current) &&
2422                (i = find_next_to_unuse(si, i) !! 2235                (i = find_next_to_unuse(si, i, frontswap)) != 0) {
2423                                                  2236 
2424                 entry = swp_entry(type, i);      2237                 entry = swp_entry(type, i);
2425                 folio = filemap_get_folio(swa !! 2238                 page = find_get_page(swap_address_space(entry), i);
2426                 if (IS_ERR(folio))            !! 2239                 if (!page)
2427                         continue;                2240                         continue;
2428                                                  2241 
2429                 /*                               2242                 /*
2430                  * It is conceivable that a r !! 2243                  * It is conceivable that a racing task removed this page from
2431                  * swap cache just before we  !! 2244                  * swap cache just before we acquired the page lock. The page
2432                  * might even be back in swap    2245                  * might even be back in swap cache on another swap area. But
2433                  * that is okay, folio_free_s !! 2246                  * that is okay, try_to_free_swap() only removes stale pages.
                                                   >> 2247                  */
                                                   >> 2248                 lock_page(page);
                                                   >> 2249                 wait_on_page_writeback(page);
                                                   >> 2250                 try_to_free_swap(page);
                                                   >> 2251                 unlock_page(page);
                                                   >> 2252                 put_page(page);
                                                   >> 2253 
                                                   >> 2254                 /*
                                                   >> 2255                  * For frontswap, we just need to unuse pages_to_unuse, if
                                                   >> 2256                  * it was specified. Need not check frontswap again here as
                                                   >> 2257                  * we already zeroed out pages_to_unuse if not frontswap.
2434                  */                              2258                  */
2435                 folio_lock(folio);            !! 2259                 if (pages_to_unuse && --pages_to_unuse == 0)
2436                 folio_wait_writeback(folio);  !! 2260                         goto out;
2437                 folio_free_swap(folio);       << 
2438                 folio_unlock(folio);          << 
2439                 folio_put(folio);             << 
2440         }                                        2261         }
2441                                                  2262 
2442         /*                                       2263         /*
2443          * Lets check again to see if there a    2264          * Lets check again to see if there are still swap entries in the map.
2444          * If yes, we would need to do retry     2265          * If yes, we would need to do retry the unuse logic again.
2445          * Under global memory pressure, swap    2266          * Under global memory pressure, swap entries can be reinserted back
2446          * into process space after the mmlis    2267          * into process space after the mmlist loop above passes over them.
2447          *                                       2268          *
2448          * Limit the number of retries? No: w !! 2269          * Limit the number of retries? No: when mmget_not_zero() above fails,
2449          * above fails, that mm is likely to  !! 2270          * that mm is likely to be freeing swap from exit_mmap(), which proceeds
2450          * exit_mmap(), which proceeds at its !! 2271          * at its own independent pace; and even shmem_writepage() could have
2451          * and even shmem_writepage() could h !! 2272          * been preempted after get_swap_page(), temporarily hiding that swap.
2452          * folio_alloc_swap(), temporarily hi !! 2273          * It's easy and robust (though cpu-intensive) just to keep retrying.
2453          * and robust (though cpu-intensive)  << 
2454          */                                      2274          */
2455         if (READ_ONCE(si->inuse_pages)) {        2275         if (READ_ONCE(si->inuse_pages)) {
2456                 if (!signal_pending(current))    2276                 if (!signal_pending(current))
2457                         goto retry;              2277                         goto retry;
2458                 return -EINTR;                !! 2278                 retval = -EINTR;
2459         }                                        2279         }
2460                                               !! 2280 out:
2461 success:                                      !! 2281         return (retval == FRONTSWAP_PAGES_UNUSED) ? 0 : retval;
2462         /*                                    << 
2463          * Make sure that further cleanups af << 
2464          * after swap_range_free() reduces si << 
2465          */                                   << 
2466         smp_mb();                             << 
2467         return 0;                             << 
2468 }                                                2282 }
2469                                                  2283 
2470 /*                                               2284 /*
2471  * After a successful try_to_unuse, if no swa    2285  * After a successful try_to_unuse, if no swap is now in use, we know
2472  * we can empty the mmlist.  swap_lock must b    2286  * we can empty the mmlist.  swap_lock must be held on entry and exit.
2473  * Note that mmlist_lock nests inside swap_lo    2287  * Note that mmlist_lock nests inside swap_lock, and an mm must be
2474  * added to the mmlist just after page_duplic    2288  * added to the mmlist just after page_duplicate - before would be racy.
2475  */                                              2289  */
2476 static void drain_mmlist(void)                   2290 static void drain_mmlist(void)
2477 {                                                2291 {
2478         struct list_head *p, *next;              2292         struct list_head *p, *next;
2479         unsigned int type;                       2293         unsigned int type;
2480                                                  2294 
2481         for (type = 0; type < nr_swapfiles; t    2295         for (type = 0; type < nr_swapfiles; type++)
2482                 if (swap_info[type]->inuse_pa    2296                 if (swap_info[type]->inuse_pages)
2483                         return;                  2297                         return;
2484         spin_lock(&mmlist_lock);                 2298         spin_lock(&mmlist_lock);
2485         list_for_each_safe(p, next, &init_mm.    2299         list_for_each_safe(p, next, &init_mm.mmlist)
2486                 list_del_init(p);                2300                 list_del_init(p);
2487         spin_unlock(&mmlist_lock);               2301         spin_unlock(&mmlist_lock);
2488 }                                                2302 }
2489                                                  2303 
2490 /*                                               2304 /*
2491  * Free all of a swapdev's extent information    2305  * Free all of a swapdev's extent information
2492  */                                              2306  */
2493 static void destroy_swap_extents(struct swap_    2307 static void destroy_swap_extents(struct swap_info_struct *sis)
2494 {                                                2308 {
2495         while (!RB_EMPTY_ROOT(&sis->swap_exte    2309         while (!RB_EMPTY_ROOT(&sis->swap_extent_root)) {
2496                 struct rb_node *rb = sis->swa    2310                 struct rb_node *rb = sis->swap_extent_root.rb_node;
2497                 struct swap_extent *se = rb_e    2311                 struct swap_extent *se = rb_entry(rb, struct swap_extent, rb_node);
2498                                                  2312 
2499                 rb_erase(rb, &sis->swap_exten    2313                 rb_erase(rb, &sis->swap_extent_root);
2500                 kfree(se);                       2314                 kfree(se);
2501         }                                        2315         }
2502                                                  2316 
2503         if (sis->flags & SWP_ACTIVATED) {        2317         if (sis->flags & SWP_ACTIVATED) {
2504                 struct file *swap_file = sis-    2318                 struct file *swap_file = sis->swap_file;
2505                 struct address_space *mapping    2319                 struct address_space *mapping = swap_file->f_mapping;
2506                                                  2320 
2507                 sis->flags &= ~SWP_ACTIVATED;    2321                 sis->flags &= ~SWP_ACTIVATED;
2508                 if (mapping->a_ops->swap_deac    2322                 if (mapping->a_ops->swap_deactivate)
2509                         mapping->a_ops->swap_    2323                         mapping->a_ops->swap_deactivate(swap_file);
2510         }                                        2324         }
2511 }                                                2325 }
2512                                                  2326 
2513 /*                                               2327 /*
2514  * Add a block range (and the corresponding p    2328  * Add a block range (and the corresponding page range) into this swapdev's
2515  * extent tree.                                  2329  * extent tree.
2516  *                                               2330  *
2517  * This function rather assumes that it is ca    2331  * This function rather assumes that it is called in ascending page order.
2518  */                                              2332  */
2519 int                                              2333 int
2520 add_swap_extent(struct swap_info_struct *sis,    2334 add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
2521                 unsigned long nr_pages, secto    2335                 unsigned long nr_pages, sector_t start_block)
2522 {                                                2336 {
2523         struct rb_node **link = &sis->swap_ex    2337         struct rb_node **link = &sis->swap_extent_root.rb_node, *parent = NULL;
2524         struct swap_extent *se;                  2338         struct swap_extent *se;
2525         struct swap_extent *new_se;              2339         struct swap_extent *new_se;
2526                                                  2340 
2527         /*                                       2341         /*
2528          * place the new node at the right mo    2342          * place the new node at the right most since the
2529          * function is called in ascending pa    2343          * function is called in ascending page order.
2530          */                                      2344          */
2531         while (*link) {                          2345         while (*link) {
2532                 parent = *link;                  2346                 parent = *link;
2533                 link = &parent->rb_right;        2347                 link = &parent->rb_right;
2534         }                                        2348         }
2535                                                  2349 
2536         if (parent) {                            2350         if (parent) {
2537                 se = rb_entry(parent, struct     2351                 se = rb_entry(parent, struct swap_extent, rb_node);
2538                 BUG_ON(se->start_page + se->n    2352                 BUG_ON(se->start_page + se->nr_pages != start_page);
2539                 if (se->start_block + se->nr_    2353                 if (se->start_block + se->nr_pages == start_block) {
2540                         /* Merge it */           2354                         /* Merge it */
2541                         se->nr_pages += nr_pa    2355                         se->nr_pages += nr_pages;
2542                         return 0;                2356                         return 0;
2543                 }                                2357                 }
2544         }                                        2358         }
2545                                                  2359 
2546         /* No merge, insert a new extent. */     2360         /* No merge, insert a new extent. */
2547         new_se = kmalloc(sizeof(*se), GFP_KER    2361         new_se = kmalloc(sizeof(*se), GFP_KERNEL);
2548         if (new_se == NULL)                      2362         if (new_se == NULL)
2549                 return -ENOMEM;                  2363                 return -ENOMEM;
2550         new_se->start_page = start_page;         2364         new_se->start_page = start_page;
2551         new_se->nr_pages = nr_pages;             2365         new_se->nr_pages = nr_pages;
2552         new_se->start_block = start_block;       2366         new_se->start_block = start_block;
2553                                                  2367 
2554         rb_link_node(&new_se->rb_node, parent    2368         rb_link_node(&new_se->rb_node, parent, link);
2555         rb_insert_color(&new_se->rb_node, &si    2369         rb_insert_color(&new_se->rb_node, &sis->swap_extent_root);
2556         return 1;                                2370         return 1;
2557 }                                                2371 }
2558 EXPORT_SYMBOL_GPL(add_swap_extent);              2372 EXPORT_SYMBOL_GPL(add_swap_extent);
2559                                                  2373 
2560 /*                                               2374 /*
2561  * A `swap extent' is a simple thing which ma    2375  * A `swap extent' is a simple thing which maps a contiguous range of pages
2562  * onto a contiguous range of disk blocks.  A !! 2376  * onto a contiguous range of disk blocks.  An ordered list of swap extents
2563  * built at swapon time and is then used at s !! 2377  * is built at swapon time and is then used at swap_writepage/swap_readpage
2564  * time for locating where on disk a page bel    2378  * time for locating where on disk a page belongs.
2565  *                                               2379  *
2566  * If the swapfile is an S_ISBLK block device    2380  * If the swapfile is an S_ISBLK block device, a single extent is installed.
2567  * This is done so that the main operating co    2381  * This is done so that the main operating code can treat S_ISBLK and S_ISREG
2568  * swap files identically.                       2382  * swap files identically.
2569  *                                               2383  *
2570  * Whether the swapdev is an S_ISREG file or     2384  * Whether the swapdev is an S_ISREG file or an S_ISBLK blockdev, the swap
2571  * extent rbtree operates in PAGE_SIZE disk b !! 2385  * extent list operates in PAGE_SIZE disk blocks.  Both S_ISREG and S_ISBLK
2572  * swapfiles are handled *identically* after     2386  * swapfiles are handled *identically* after swapon time.
2573  *                                               2387  *
2574  * For S_ISREG swapfiles, setup_swap_extents(    2388  * For S_ISREG swapfiles, setup_swap_extents() will walk all the file's blocks
2575  * and will parse them into a rbtree, in PAGE !! 2389  * and will parse them into an ordered extent list, in PAGE_SIZE chunks.  If
2576  * blocks are found which do not fall within  !! 2390  * some stray blocks are found which do not fall within the PAGE_SIZE alignment
2577  * requirements, they are simply tossed out -    2391  * requirements, they are simply tossed out - we will never use those blocks
2578  * for swapping.                                 2392  * for swapping.
2579  *                                               2393  *
2580  * For all swap devices we set S_SWAPFILE acr    2394  * For all swap devices we set S_SWAPFILE across the life of the swapon.  This
2581  * prevents users from writing to the swap de    2395  * prevents users from writing to the swap device, which will corrupt memory.
2582  *                                               2396  *
2583  * The amount of disk space which a single sw    2397  * The amount of disk space which a single swap extent represents varies.
2584  * Typically it is in the 1-4 megabyte range.    2398  * Typically it is in the 1-4 megabyte range.  So we can have hundreds of
2585  * extents in the rbtree. - akpm.             !! 2399  * extents in the list.  To avoid much list walking, we cache the previous
                                                   >> 2400  * search location in `curr_swap_extent', and start new searches from there.
                                                   >> 2401  * This is extremely effective.  The average number of iterations in
                                                   >> 2402  * map_swap_page() has been measured at about 0.3 per page.  - akpm.
2586  */                                              2403  */
2587 static int setup_swap_extents(struct swap_inf    2404 static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
2588 {                                                2405 {
2589         struct file *swap_file = sis->swap_fi    2406         struct file *swap_file = sis->swap_file;
2590         struct address_space *mapping = swap_    2407         struct address_space *mapping = swap_file->f_mapping;
2591         struct inode *inode = mapping->host;     2408         struct inode *inode = mapping->host;
2592         int ret;                                 2409         int ret;
2593                                                  2410 
2594         if (S_ISBLK(inode->i_mode)) {            2411         if (S_ISBLK(inode->i_mode)) {
2595                 ret = add_swap_extent(sis, 0,    2412                 ret = add_swap_extent(sis, 0, sis->max, 0);
2596                 *span = sis->pages;              2413                 *span = sis->pages;
2597                 return ret;                      2414                 return ret;
2598         }                                        2415         }
2599                                                  2416 
2600         if (mapping->a_ops->swap_activate) {     2417         if (mapping->a_ops->swap_activate) {
2601                 ret = mapping->a_ops->swap_ac    2418                 ret = mapping->a_ops->swap_activate(sis, swap_file, span);
2602                 if (ret < 0)                  !! 2419                 if (ret >= 0)
2603                         return ret;           !! 2420                         sis->flags |= SWP_ACTIVATED;
2604                 sis->flags |= SWP_ACTIVATED;  !! 2421                 if (!ret) {
2605                 if ((sis->flags & SWP_FS_OPS) !! 2422                         sis->flags |= SWP_FS_OPS;
2606                     sio_pool_init() != 0) {   !! 2423                         ret = add_swap_extent(sis, 0, sis->max, 0);
2607                         destroy_swap_extents( !! 2424                         *span = sis->pages;
2608                         return -ENOMEM;       << 
2609                 }                                2425                 }
2610                 return ret;                      2426                 return ret;
2611         }                                        2427         }
2612                                                  2428 
2613         return generic_swapfile_activate(sis,    2429         return generic_swapfile_activate(sis, swap_file, span);
2614 }                                                2430 }
2615                                                  2431 
2616 static int swap_node(struct swap_info_struct  !! 2432 static int swap_node(struct swap_info_struct *p)
2617 {                                                2433 {
2618         struct block_device *bdev;               2434         struct block_device *bdev;
2619                                                  2435 
2620         if (si->bdev)                         !! 2436         if (p->bdev)
2621                 bdev = si->bdev;              !! 2437                 bdev = p->bdev;
2622         else                                     2438         else
2623                 bdev = si->swap_file->f_inode !! 2439                 bdev = p->swap_file->f_inode->i_sb->s_bdev;
2624                                                  2440 
2625         return bdev ? bdev->bd_disk->node_id     2441         return bdev ? bdev->bd_disk->node_id : NUMA_NO_NODE;
2626 }                                                2442 }
2627                                                  2443 
2628 static void setup_swap_info(struct swap_info_ !! 2444 static void setup_swap_info(struct swap_info_struct *p, int prio,
2629                             unsigned char *sw    2445                             unsigned char *swap_map,
2630                             struct swap_clust !! 2446                             struct swap_cluster_info *cluster_info)
2631                             unsigned long *ze << 
2632 {                                                2447 {
2633         int i;                                   2448         int i;
2634                                                  2449 
2635         if (prio >= 0)                           2450         if (prio >= 0)
2636                 si->prio = prio;              !! 2451                 p->prio = prio;
2637         else                                     2452         else
2638                 si->prio = --least_priority;  !! 2453                 p->prio = --least_priority;
2639         /*                                       2454         /*
2640          * the plist prio is negated because     2455          * the plist prio is negated because plist ordering is
2641          * low-to-high, while swap ordering i    2456          * low-to-high, while swap ordering is high-to-low
2642          */                                      2457          */
2643         si->list.prio = -si->prio;            !! 2458         p->list.prio = -p->prio;
2644         for_each_node(i) {                       2459         for_each_node(i) {
2645                 if (si->prio >= 0)            !! 2460                 if (p->prio >= 0)
2646                         si->avail_lists[i].pr !! 2461                         p->avail_lists[i].prio = -p->prio;
2647                 else {                           2462                 else {
2648                         if (swap_node(si) ==  !! 2463                         if (swap_node(p) == i)
2649                                 si->avail_lis !! 2464                                 p->avail_lists[i].prio = 1;
2650                         else                     2465                         else
2651                                 si->avail_lis !! 2466                                 p->avail_lists[i].prio = -p->prio;
2652                 }                                2467                 }
2653         }                                        2468         }
2654         si->swap_map = swap_map;              !! 2469         p->swap_map = swap_map;
2655         si->cluster_info = cluster_info;      !! 2470         p->cluster_info = cluster_info;
2656         si->zeromap = zeromap;                << 
2657 }                                                2471 }
2658                                                  2472 
2659 static void _enable_swap_info(struct swap_inf !! 2473 static void _enable_swap_info(struct swap_info_struct *p)
2660 {                                                2474 {
2661         si->flags |= SWP_WRITEOK;             !! 2475         p->flags |= SWP_WRITEOK;
2662         atomic_long_add(si->pages, &nr_swap_p !! 2476         atomic_long_add(p->pages, &nr_swap_pages);
2663         total_swap_pages += si->pages;        !! 2477         total_swap_pages += p->pages;
2664                                                  2478 
2665         assert_spin_locked(&swap_lock);          2479         assert_spin_locked(&swap_lock);
2666         /*                                       2480         /*
2667          * both lists are plists, and thus pr    2481          * both lists are plists, and thus priority ordered.
2668          * swap_active_head needs to be prior    2482          * swap_active_head needs to be priority ordered for swapoff(),
2669          * which on removal of any swap_info_    2483          * which on removal of any swap_info_struct with an auto-assigned
2670          * (i.e. negative) priority increment    2484          * (i.e. negative) priority increments the auto-assigned priority
2671          * of any lower-priority swap_info_st    2485          * of any lower-priority swap_info_structs.
2672          * swap_avail_head needs to be priori !! 2486          * swap_avail_head needs to be priority ordered for get_swap_page(),
2673          * which allocates swap pages from th    2487          * which allocates swap pages from the highest available priority
2674          * swap_info_struct.                     2488          * swap_info_struct.
2675          */                                      2489          */
2676         plist_add(&si->list, &swap_active_hea !! 2490         plist_add(&p->list, &swap_active_head);
2677                                               !! 2491         add_to_avail_list(p);
2678         /* add to available list iff swap dev << 
2679         if (si->highest_bit)                  << 
2680                 add_to_avail_list(si);        << 
2681 }                                                2492 }
2682                                                  2493 
2683 static void enable_swap_info(struct swap_info !! 2494 static void enable_swap_info(struct swap_info_struct *p, int prio,
2684                                 unsigned char    2495                                 unsigned char *swap_map,
2685                                 struct swap_c    2496                                 struct swap_cluster_info *cluster_info,
2686                                 unsigned long !! 2497                                 unsigned long *frontswap_map)
2687 {                                                2498 {
                                                   >> 2499         frontswap_init(p->type, frontswap_map);
2688         spin_lock(&swap_lock);                   2500         spin_lock(&swap_lock);
2689         spin_lock(&si->lock);                 !! 2501         spin_lock(&p->lock);
2690         setup_swap_info(si, prio, swap_map, c !! 2502         setup_swap_info(p, prio, swap_map, cluster_info);
2691         spin_unlock(&si->lock);               !! 2503         spin_unlock(&p->lock);
2692         spin_unlock(&swap_lock);                 2504         spin_unlock(&swap_lock);
2693         /*                                       2505         /*
2694          * Finished initializing swap device,    2506          * Finished initializing swap device, now it's safe to reference it.
2695          */                                      2507          */
2696         percpu_ref_resurrect(&si->users);     !! 2508         percpu_ref_resurrect(&p->users);
2697         spin_lock(&swap_lock);                   2509         spin_lock(&swap_lock);
2698         spin_lock(&si->lock);                 !! 2510         spin_lock(&p->lock);
2699         _enable_swap_info(si);                !! 2511         _enable_swap_info(p);
2700         spin_unlock(&si->lock);               !! 2512         spin_unlock(&p->lock);
2701         spin_unlock(&swap_lock);                 2513         spin_unlock(&swap_lock);
2702 }                                                2514 }
2703                                                  2515 
2704 static void reinsert_swap_info(struct swap_in !! 2516 static void reinsert_swap_info(struct swap_info_struct *p)
2705 {                                                2517 {
2706         spin_lock(&swap_lock);                   2518         spin_lock(&swap_lock);
2707         spin_lock(&si->lock);                 !! 2519         spin_lock(&p->lock);
2708         setup_swap_info(si, si->prio, si->swa !! 2520         setup_swap_info(p, p->prio, p->swap_map, p->cluster_info);
2709         _enable_swap_info(si);                !! 2521         _enable_swap_info(p);
2710         spin_unlock(&si->lock);               !! 2522         spin_unlock(&p->lock);
2711         spin_unlock(&swap_lock);                 2523         spin_unlock(&swap_lock);
2712 }                                                2524 }
2713                                                  2525 
2714 static bool __has_usable_swap(void)           << 
2715 {                                             << 
2716         return !plist_head_empty(&swap_active << 
2717 }                                             << 
2718                                               << 
2719 bool has_usable_swap(void)                       2526 bool has_usable_swap(void)
2720 {                                                2527 {
2721         bool ret;                             !! 2528         bool ret = true;
2722                                                  2529 
2723         spin_lock(&swap_lock);                   2530         spin_lock(&swap_lock);
2724         ret = __has_usable_swap();            !! 2531         if (plist_head_empty(&swap_active_head))
                                                   >> 2532                 ret = false;
2725         spin_unlock(&swap_lock);                 2533         spin_unlock(&swap_lock);
2726         return ret;                              2534         return ret;
2727 }                                                2535 }
2728                                                  2536 
2729 SYSCALL_DEFINE1(swapoff, const char __user *,    2537 SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
2730 {                                                2538 {
2731         struct swap_info_struct *p = NULL;       2539         struct swap_info_struct *p = NULL;
2732         unsigned char *swap_map;                 2540         unsigned char *swap_map;
2733         unsigned long *zeromap;               << 
2734         struct swap_cluster_info *cluster_inf    2541         struct swap_cluster_info *cluster_info;
                                                   >> 2542         unsigned long *frontswap_map;
2735         struct file *swap_file, *victim;         2543         struct file *swap_file, *victim;
2736         struct address_space *mapping;           2544         struct address_space *mapping;
2737         struct inode *inode;                     2545         struct inode *inode;
2738         struct filename *pathname;               2546         struct filename *pathname;
2739         int err, found = 0;                      2547         int err, found = 0;
                                                   >> 2548         unsigned int old_block_size;
2740                                                  2549 
2741         if (!capable(CAP_SYS_ADMIN))             2550         if (!capable(CAP_SYS_ADMIN))
2742                 return -EPERM;                   2551                 return -EPERM;
2743                                                  2552 
2744         BUG_ON(!current->mm);                    2553         BUG_ON(!current->mm);
2745                                                  2554 
2746         pathname = getname(specialfile);         2555         pathname = getname(specialfile);
2747         if (IS_ERR(pathname))                    2556         if (IS_ERR(pathname))
2748                 return PTR_ERR(pathname);        2557                 return PTR_ERR(pathname);
2749                                                  2558 
2750         victim = file_open_name(pathname, O_R    2559         victim = file_open_name(pathname, O_RDWR|O_LARGEFILE, 0);
2751         err = PTR_ERR(victim);                   2560         err = PTR_ERR(victim);
2752         if (IS_ERR(victim))                      2561         if (IS_ERR(victim))
2753                 goto out;                        2562                 goto out;
2754                                                  2563 
2755         mapping = victim->f_mapping;             2564         mapping = victim->f_mapping;
2756         spin_lock(&swap_lock);                   2565         spin_lock(&swap_lock);
2757         plist_for_each_entry(p, &swap_active_    2566         plist_for_each_entry(p, &swap_active_head, list) {
2758                 if (p->flags & SWP_WRITEOK) {    2567                 if (p->flags & SWP_WRITEOK) {
2759                         if (p->swap_file->f_m    2568                         if (p->swap_file->f_mapping == mapping) {
2760                                 found = 1;       2569                                 found = 1;
2761                                 break;           2570                                 break;
2762                         }                        2571                         }
2763                 }                                2572                 }
2764         }                                        2573         }
2765         if (!found) {                            2574         if (!found) {
2766                 err = -EINVAL;                   2575                 err = -EINVAL;
2767                 spin_unlock(&swap_lock);         2576                 spin_unlock(&swap_lock);
2768                 goto out_dput;                   2577                 goto out_dput;
2769         }                                        2578         }
2770         if (!security_vm_enough_memory_mm(cur    2579         if (!security_vm_enough_memory_mm(current->mm, p->pages))
2771                 vm_unacct_memory(p->pages);      2580                 vm_unacct_memory(p->pages);
2772         else {                                   2581         else {
2773                 err = -ENOMEM;                   2582                 err = -ENOMEM;
2774                 spin_unlock(&swap_lock);         2583                 spin_unlock(&swap_lock);
2775                 goto out_dput;                   2584                 goto out_dput;
2776         }                                        2585         }
2777         spin_lock(&p->lock);                     2586         spin_lock(&p->lock);
2778         del_from_avail_list(p);                  2587         del_from_avail_list(p);
2779         if (p->prio < 0) {                       2588         if (p->prio < 0) {
2780                 struct swap_info_struct *si =    2589                 struct swap_info_struct *si = p;
2781                 int nid;                         2590                 int nid;
2782                                                  2591 
2783                 plist_for_each_entry_continue    2592                 plist_for_each_entry_continue(si, &swap_active_head, list) {
2784                         si->prio++;              2593                         si->prio++;
2785                         si->list.prio--;         2594                         si->list.prio--;
2786                         for_each_node(nid) {     2595                         for_each_node(nid) {
2787                                 if (si->avail    2596                                 if (si->avail_lists[nid].prio != 1)
2788                                         si->a    2597                                         si->avail_lists[nid].prio--;
2789                         }                        2598                         }
2790                 }                                2599                 }
2791                 least_priority++;                2600                 least_priority++;
2792         }                                        2601         }
2793         plist_del(&p->list, &swap_active_head    2602         plist_del(&p->list, &swap_active_head);
2794         atomic_long_sub(p->pages, &nr_swap_pa    2603         atomic_long_sub(p->pages, &nr_swap_pages);
2795         total_swap_pages -= p->pages;            2604         total_swap_pages -= p->pages;
2796         p->flags &= ~SWP_WRITEOK;                2605         p->flags &= ~SWP_WRITEOK;
2797         spin_unlock(&p->lock);                   2606         spin_unlock(&p->lock);
2798         spin_unlock(&swap_lock);                 2607         spin_unlock(&swap_lock);
2799                                                  2608 
2800         disable_swap_slots_cache_lock();         2609         disable_swap_slots_cache_lock();
2801                                                  2610 
2802         set_current_oom_origin();                2611         set_current_oom_origin();
2803         err = try_to_unuse(p->type);          !! 2612         err = try_to_unuse(p->type, false, 0); /* force unuse all pages */
2804         clear_current_oom_origin();              2613         clear_current_oom_origin();
2805                                                  2614 
2806         if (err) {                               2615         if (err) {
2807                 /* re-insert swap space back     2616                 /* re-insert swap space back into swap_list */
2808                 reinsert_swap_info(p);           2617                 reinsert_swap_info(p);
2809                 reenable_swap_slots_cache_unl    2618                 reenable_swap_slots_cache_unlock();
2810                 goto out_dput;                   2619                 goto out_dput;
2811         }                                        2620         }
2812                                                  2621 
2813         reenable_swap_slots_cache_unlock();      2622         reenable_swap_slots_cache_unlock();
2814                                                  2623 
2815         /*                                       2624         /*
2816          * Wait for swap operations protected    2625          * Wait for swap operations protected by get/put_swap_device()
2817          * to complete.  Because of synchroni !! 2626          * to complete.
2818          * operations protected by RCU reader !! 2627          *
2819          * spinlock) will be waited too.  Thi !! 2628          * We need synchronize_rcu() here to protect the accessing to
2820          * prevent folio_test_swapcache() and !! 2629          * the swap cache data structure.
2821          * operations from racing with swapof << 
2822          */                                      2630          */
2823         percpu_ref_kill(&p->users);              2631         percpu_ref_kill(&p->users);
2824         synchronize_rcu();                       2632         synchronize_rcu();
2825         wait_for_completion(&p->comp);           2633         wait_for_completion(&p->comp);
2826                                                  2634 
2827         flush_work(&p->discard_work);            2635         flush_work(&p->discard_work);
2828         flush_work(&p->reclaim_work);         << 
2829                                                  2636 
2830         destroy_swap_extents(p);                 2637         destroy_swap_extents(p);
2831         if (p->flags & SWP_CONTINUED)            2638         if (p->flags & SWP_CONTINUED)
2832                 free_swap_count_continuations    2639                 free_swap_count_continuations(p);
2833                                                  2640 
2834         if (!p->bdev || !bdev_nonrot(p->bdev) !! 2641         if (!p->bdev || !blk_queue_nonrot(bdev_get_queue(p->bdev)))
2835                 atomic_dec(&nr_rotate_swap);     2642                 atomic_dec(&nr_rotate_swap);
2836                                                  2643 
2837         mutex_lock(&swapon_mutex);               2644         mutex_lock(&swapon_mutex);
2838         spin_lock(&swap_lock);                   2645         spin_lock(&swap_lock);
2839         spin_lock(&p->lock);                     2646         spin_lock(&p->lock);
2840         drain_mmlist();                          2647         drain_mmlist();
2841                                                  2648 
2842         /* wait for anyone still in scan_swap    2649         /* wait for anyone still in scan_swap_map_slots */
2843         p->highest_bit = 0;             /* cu    2650         p->highest_bit = 0;             /* cuts scans short */
2844         while (p->flags >= SWP_SCANNING) {       2651         while (p->flags >= SWP_SCANNING) {
2845                 spin_unlock(&p->lock);           2652                 spin_unlock(&p->lock);
2846                 spin_unlock(&swap_lock);         2653                 spin_unlock(&swap_lock);
2847                 schedule_timeout_uninterrupti    2654                 schedule_timeout_uninterruptible(1);
2848                 spin_lock(&swap_lock);           2655                 spin_lock(&swap_lock);
2849                 spin_lock(&p->lock);             2656                 spin_lock(&p->lock);
2850         }                                        2657         }
2851                                                  2658 
2852         swap_file = p->swap_file;                2659         swap_file = p->swap_file;
                                                   >> 2660         old_block_size = p->old_block_size;
2853         p->swap_file = NULL;                     2661         p->swap_file = NULL;
2854         p->max = 0;                              2662         p->max = 0;
2855         swap_map = p->swap_map;                  2663         swap_map = p->swap_map;
2856         p->swap_map = NULL;                      2664         p->swap_map = NULL;
2857         zeromap = p->zeromap;                 << 
2858         p->zeromap = NULL;                    << 
2859         cluster_info = p->cluster_info;          2665         cluster_info = p->cluster_info;
2860         p->cluster_info = NULL;                  2666         p->cluster_info = NULL;
                                                   >> 2667         frontswap_map = frontswap_map_get(p);
2861         spin_unlock(&p->lock);                   2668         spin_unlock(&p->lock);
2862         spin_unlock(&swap_lock);                 2669         spin_unlock(&swap_lock);
2863         arch_swap_invalidate_area(p->type);      2670         arch_swap_invalidate_area(p->type);
2864         zswap_swapoff(p->type);               !! 2671         frontswap_invalidate_area(p->type);
                                                   >> 2672         frontswap_map_set(p, NULL);
2865         mutex_unlock(&swapon_mutex);             2673         mutex_unlock(&swapon_mutex);
2866         free_percpu(p->percpu_cluster);          2674         free_percpu(p->percpu_cluster);
2867         p->percpu_cluster = NULL;                2675         p->percpu_cluster = NULL;
2868         free_percpu(p->cluster_next_cpu);        2676         free_percpu(p->cluster_next_cpu);
2869         p->cluster_next_cpu = NULL;              2677         p->cluster_next_cpu = NULL;
2870         vfree(swap_map);                         2678         vfree(swap_map);
2871         kvfree(zeromap);                      << 
2872         kvfree(cluster_info);                    2679         kvfree(cluster_info);
                                                   >> 2680         kvfree(frontswap_map);
2873         /* Destroy swap account information *    2681         /* Destroy swap account information */
2874         swap_cgroup_swapoff(p->type);            2682         swap_cgroup_swapoff(p->type);
2875         exit_swap_address_space(p->type);        2683         exit_swap_address_space(p->type);
2876                                                  2684 
2877         inode = mapping->host;                   2685         inode = mapping->host;
                                                   >> 2686         if (S_ISBLK(inode->i_mode)) {
                                                   >> 2687                 struct block_device *bdev = I_BDEV(inode);
                                                   >> 2688 
                                                   >> 2689                 set_blocksize(bdev, old_block_size);
                                                   >> 2690                 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
                                                   >> 2691         }
2878                                                  2692 
2879         inode_lock(inode);                       2693         inode_lock(inode);
2880         inode->i_flags &= ~S_SWAPFILE;           2694         inode->i_flags &= ~S_SWAPFILE;
2881         inode_unlock(inode);                     2695         inode_unlock(inode);
2882         filp_close(swap_file, NULL);             2696         filp_close(swap_file, NULL);
2883                                                  2697 
2884         /*                                       2698         /*
2885          * Clear the SWP_USED flag after all     2699          * Clear the SWP_USED flag after all resources are freed so that swapon
2886          * can reuse this swap_info in alloc_    2700          * can reuse this swap_info in alloc_swap_info() safely.  It is ok to
2887          * not hold p->lock after we cleared     2701          * not hold p->lock after we cleared its SWP_WRITEOK.
2888          */                                      2702          */
2889         spin_lock(&swap_lock);                   2703         spin_lock(&swap_lock);
2890         p->flags = 0;                            2704         p->flags = 0;
2891         spin_unlock(&swap_lock);                 2705         spin_unlock(&swap_lock);
2892                                                  2706 
2893         err = 0;                                 2707         err = 0;
2894         atomic_inc(&proc_poll_event);            2708         atomic_inc(&proc_poll_event);
2895         wake_up_interruptible(&proc_poll_wait    2709         wake_up_interruptible(&proc_poll_wait);
2896                                                  2710 
2897 out_dput:                                        2711 out_dput:
2898         filp_close(victim, NULL);                2712         filp_close(victim, NULL);
2899 out:                                             2713 out:
2900         putname(pathname);                       2714         putname(pathname);
2901         return err;                              2715         return err;
2902 }                                                2716 }
2903                                                  2717 
2904 #ifdef CONFIG_PROC_FS                            2718 #ifdef CONFIG_PROC_FS
2905 static __poll_t swaps_poll(struct file *file,    2719 static __poll_t swaps_poll(struct file *file, poll_table *wait)
2906 {                                                2720 {
2907         struct seq_file *seq = file->private_    2721         struct seq_file *seq = file->private_data;
2908                                                  2722 
2909         poll_wait(file, &proc_poll_wait, wait    2723         poll_wait(file, &proc_poll_wait, wait);
2910                                                  2724 
2911         if (seq->poll_event != atomic_read(&p    2725         if (seq->poll_event != atomic_read(&proc_poll_event)) {
2912                 seq->poll_event = atomic_read    2726                 seq->poll_event = atomic_read(&proc_poll_event);
2913                 return EPOLLIN | EPOLLRDNORM     2727                 return EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLPRI;
2914         }                                        2728         }
2915                                                  2729 
2916         return EPOLLIN | EPOLLRDNORM;            2730         return EPOLLIN | EPOLLRDNORM;
2917 }                                                2731 }
2918                                                  2732 
2919 /* iterator */                                   2733 /* iterator */
2920 static void *swap_start(struct seq_file *swap    2734 static void *swap_start(struct seq_file *swap, loff_t *pos)
2921 {                                                2735 {
2922         struct swap_info_struct *si;             2736         struct swap_info_struct *si;
2923         int type;                                2737         int type;
2924         loff_t l = *pos;                         2738         loff_t l = *pos;
2925                                                  2739 
2926         mutex_lock(&swapon_mutex);               2740         mutex_lock(&swapon_mutex);
2927                                                  2741 
2928         if (!l)                                  2742         if (!l)
2929                 return SEQ_START_TOKEN;          2743                 return SEQ_START_TOKEN;
2930                                                  2744 
2931         for (type = 0; (si = swap_type_to_swa    2745         for (type = 0; (si = swap_type_to_swap_info(type)); type++) {
2932                 if (!(si->flags & SWP_USED) |    2746                 if (!(si->flags & SWP_USED) || !si->swap_map)
2933                         continue;                2747                         continue;
2934                 if (!--l)                        2748                 if (!--l)
2935                         return si;               2749                         return si;
2936         }                                        2750         }
2937                                                  2751 
2938         return NULL;                             2752         return NULL;
2939 }                                                2753 }
2940                                                  2754 
2941 static void *swap_next(struct seq_file *swap,    2755 static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
2942 {                                                2756 {
2943         struct swap_info_struct *si = v;         2757         struct swap_info_struct *si = v;
2944         int type;                                2758         int type;
2945                                                  2759 
2946         if (v == SEQ_START_TOKEN)                2760         if (v == SEQ_START_TOKEN)
2947                 type = 0;                        2761                 type = 0;
2948         else                                     2762         else
2949                 type = si->type + 1;             2763                 type = si->type + 1;
2950                                                  2764 
2951         ++(*pos);                                2765         ++(*pos);
2952         for (; (si = swap_type_to_swap_info(t    2766         for (; (si = swap_type_to_swap_info(type)); type++) {
2953                 if (!(si->flags & SWP_USED) |    2767                 if (!(si->flags & SWP_USED) || !si->swap_map)
2954                         continue;                2768                         continue;
2955                 return si;                       2769                 return si;
2956         }                                        2770         }
2957                                                  2771 
2958         return NULL;                             2772         return NULL;
2959 }                                                2773 }
2960                                                  2774 
2961 static void swap_stop(struct seq_file *swap,     2775 static void swap_stop(struct seq_file *swap, void *v)
2962 {                                                2776 {
2963         mutex_unlock(&swapon_mutex);             2777         mutex_unlock(&swapon_mutex);
2964 }                                                2778 }
2965                                                  2779 
2966 static int swap_show(struct seq_file *swap, v    2780 static int swap_show(struct seq_file *swap, void *v)
2967 {                                                2781 {
2968         struct swap_info_struct *si = v;         2782         struct swap_info_struct *si = v;
2969         struct file *file;                       2783         struct file *file;
2970         int len;                                 2784         int len;
2971         unsigned long bytes, inuse;           !! 2785         unsigned int bytes, inuse;
2972                                                  2786 
2973         if (si == SEQ_START_TOKEN) {             2787         if (si == SEQ_START_TOKEN) {
2974                 seq_puts(swap, "Filename\t\t\    2788                 seq_puts(swap, "Filename\t\t\t\tType\t\tSize\t\tUsed\t\tPriority\n");
2975                 return 0;                        2789                 return 0;
2976         }                                        2790         }
2977                                                  2791 
2978         bytes = K(si->pages);                 !! 2792         bytes = si->pages << (PAGE_SHIFT - 10);
2979         inuse = K(READ_ONCE(si->inuse_pages)) !! 2793         inuse = si->inuse_pages << (PAGE_SHIFT - 10);
2980                                                  2794 
2981         file = si->swap_file;                    2795         file = si->swap_file;
2982         len = seq_file_path(swap, file, " \t\    2796         len = seq_file_path(swap, file, " \t\n\\");
2983         seq_printf(swap, "%*s%s\t%lu\t%s%lu\t !! 2797         seq_printf(swap, "%*s%s\t%u\t%s%u\t%s%d\n",
2984                         len < 40 ? 40 - len :    2798                         len < 40 ? 40 - len : 1, " ",
2985                         S_ISBLK(file_inode(fi    2799                         S_ISBLK(file_inode(file)->i_mode) ?
2986                                 "partition" :    2800                                 "partition" : "file\t",
2987                         bytes, bytes < 100000    2801                         bytes, bytes < 10000000 ? "\t" : "",
2988                         inuse, inuse < 100000    2802                         inuse, inuse < 10000000 ? "\t" : "",
2989                         si->prio);               2803                         si->prio);
2990         return 0;                                2804         return 0;
2991 }                                                2805 }
2992                                                  2806 
2993 static const struct seq_operations swaps_op =    2807 static const struct seq_operations swaps_op = {
2994         .start =        swap_start,              2808         .start =        swap_start,
2995         .next =         swap_next,               2809         .next =         swap_next,
2996         .stop =         swap_stop,               2810         .stop =         swap_stop,
2997         .show =         swap_show                2811         .show =         swap_show
2998 };                                               2812 };
2999                                                  2813 
3000 static int swaps_open(struct inode *inode, st    2814 static int swaps_open(struct inode *inode, struct file *file)
3001 {                                                2815 {
3002         struct seq_file *seq;                    2816         struct seq_file *seq;
3003         int ret;                                 2817         int ret;
3004                                                  2818 
3005         ret = seq_open(file, &swaps_op);         2819         ret = seq_open(file, &swaps_op);
3006         if (ret)                                 2820         if (ret)
3007                 return ret;                      2821                 return ret;
3008                                                  2822 
3009         seq = file->private_data;                2823         seq = file->private_data;
3010         seq->poll_event = atomic_read(&proc_p    2824         seq->poll_event = atomic_read(&proc_poll_event);
3011         return 0;                                2825         return 0;
3012 }                                                2826 }
3013                                                  2827 
3014 static const struct proc_ops swaps_proc_ops =    2828 static const struct proc_ops swaps_proc_ops = {
3015         .proc_flags     = PROC_ENTRY_PERMANEN    2829         .proc_flags     = PROC_ENTRY_PERMANENT,
3016         .proc_open      = swaps_open,            2830         .proc_open      = swaps_open,
3017         .proc_read      = seq_read,              2831         .proc_read      = seq_read,
3018         .proc_lseek     = seq_lseek,             2832         .proc_lseek     = seq_lseek,
3019         .proc_release   = seq_release,           2833         .proc_release   = seq_release,
3020         .proc_poll      = swaps_poll,            2834         .proc_poll      = swaps_poll,
3021 };                                               2835 };
3022                                                  2836 
3023 static int __init procswaps_init(void)           2837 static int __init procswaps_init(void)
3024 {                                                2838 {
3025         proc_create("swaps", 0, NULL, &swaps_    2839         proc_create("swaps", 0, NULL, &swaps_proc_ops);
3026         return 0;                                2840         return 0;
3027 }                                                2841 }
3028 __initcall(procswaps_init);                      2842 __initcall(procswaps_init);
3029 #endif /* CONFIG_PROC_FS */                      2843 #endif /* CONFIG_PROC_FS */
3030                                                  2844 
3031 #ifdef MAX_SWAPFILES_CHECK                       2845 #ifdef MAX_SWAPFILES_CHECK
3032 static int __init max_swapfiles_check(void)      2846 static int __init max_swapfiles_check(void)
3033 {                                                2847 {
3034         MAX_SWAPFILES_CHECK();                   2848         MAX_SWAPFILES_CHECK();
3035         return 0;                                2849         return 0;
3036 }                                                2850 }
3037 late_initcall(max_swapfiles_check);              2851 late_initcall(max_swapfiles_check);
3038 #endif                                           2852 #endif
3039                                                  2853 
3040 static struct swap_info_struct *alloc_swap_in    2854 static struct swap_info_struct *alloc_swap_info(void)
3041 {                                                2855 {
3042         struct swap_info_struct *p;              2856         struct swap_info_struct *p;
3043         struct swap_info_struct *defer = NULL    2857         struct swap_info_struct *defer = NULL;
3044         unsigned int type;                       2858         unsigned int type;
3045         int i;                                   2859         int i;
3046                                                  2860 
3047         p = kvzalloc(struct_size(p, avail_lis    2861         p = kvzalloc(struct_size(p, avail_lists, nr_node_ids), GFP_KERNEL);
3048         if (!p)                                  2862         if (!p)
3049                 return ERR_PTR(-ENOMEM);         2863                 return ERR_PTR(-ENOMEM);
3050                                                  2864 
3051         if (percpu_ref_init(&p->users, swap_u    2865         if (percpu_ref_init(&p->users, swap_users_ref_free,
3052                             PERCPU_REF_INIT_D    2866                             PERCPU_REF_INIT_DEAD, GFP_KERNEL)) {
3053                 kvfree(p);                       2867                 kvfree(p);
3054                 return ERR_PTR(-ENOMEM);         2868                 return ERR_PTR(-ENOMEM);
3055         }                                        2869         }
3056                                                  2870 
3057         spin_lock(&swap_lock);                   2871         spin_lock(&swap_lock);
3058         for (type = 0; type < nr_swapfiles; t    2872         for (type = 0; type < nr_swapfiles; type++) {
3059                 if (!(swap_info[type]->flags     2873                 if (!(swap_info[type]->flags & SWP_USED))
3060                         break;                   2874                         break;
3061         }                                        2875         }
3062         if (type >= MAX_SWAPFILES) {             2876         if (type >= MAX_SWAPFILES) {
3063                 spin_unlock(&swap_lock);         2877                 spin_unlock(&swap_lock);
3064                 percpu_ref_exit(&p->users);      2878                 percpu_ref_exit(&p->users);
3065                 kvfree(p);                       2879                 kvfree(p);
3066                 return ERR_PTR(-EPERM);          2880                 return ERR_PTR(-EPERM);
3067         }                                        2881         }
3068         if (type >= nr_swapfiles) {              2882         if (type >= nr_swapfiles) {
3069                 p->type = type;                  2883                 p->type = type;
3070                 /*                               2884                 /*
3071                  * Publish the swap_info_stru    2885                  * Publish the swap_info_struct after initializing it.
3072                  * Note that kvzalloc() above    2886                  * Note that kvzalloc() above zeroes all its fields.
3073                  */                              2887                  */
3074                 smp_store_release(&swap_info[    2888                 smp_store_release(&swap_info[type], p); /* rcu_assign_pointer() */
3075                 nr_swapfiles++;                  2889                 nr_swapfiles++;
3076         } else {                                 2890         } else {
3077                 defer = p;                       2891                 defer = p;
3078                 p = swap_info[type];             2892                 p = swap_info[type];
3079                 /*                               2893                 /*
3080                  * Do not memset this entry:     2894                  * Do not memset this entry: a racing procfs swap_next()
3081                  * would be relying on p->typ    2895                  * would be relying on p->type to remain valid.
3082                  */                              2896                  */
3083         }                                        2897         }
3084         p->swap_extent_root = RB_ROOT;           2898         p->swap_extent_root = RB_ROOT;
3085         plist_node_init(&p->list, 0);            2899         plist_node_init(&p->list, 0);
3086         for_each_node(i)                         2900         for_each_node(i)
3087                 plist_node_init(&p->avail_lis    2901                 plist_node_init(&p->avail_lists[i], 0);
3088         p->flags = SWP_USED;                     2902         p->flags = SWP_USED;
3089         spin_unlock(&swap_lock);                 2903         spin_unlock(&swap_lock);
3090         if (defer) {                             2904         if (defer) {
3091                 percpu_ref_exit(&defer->users    2905                 percpu_ref_exit(&defer->users);
3092                 kvfree(defer);                   2906                 kvfree(defer);
3093         }                                        2907         }
3094         spin_lock_init(&p->lock);                2908         spin_lock_init(&p->lock);
3095         spin_lock_init(&p->cont_lock);           2909         spin_lock_init(&p->cont_lock);
3096         init_completion(&p->comp);               2910         init_completion(&p->comp);
3097                                                  2911 
3098         return p;                                2912         return p;
3099 }                                                2913 }
3100                                                  2914 
3101 static int claim_swapfile(struct swap_info_st !! 2915 static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
3102 {                                                2916 {
                                                   >> 2917         int error;
                                                   >> 2918 
3103         if (S_ISBLK(inode->i_mode)) {            2919         if (S_ISBLK(inode->i_mode)) {
3104                 si->bdev = I_BDEV(inode);     !! 2920                 p->bdev = blkdev_get_by_dev(inode->i_rdev,
                                                   >> 2921                                    FMODE_READ | FMODE_WRITE | FMODE_EXCL, p);
                                                   >> 2922                 if (IS_ERR(p->bdev)) {
                                                   >> 2923                         error = PTR_ERR(p->bdev);
                                                   >> 2924                         p->bdev = NULL;
                                                   >> 2925                         return error;
                                                   >> 2926                 }
                                                   >> 2927                 p->old_block_size = block_size(p->bdev);
                                                   >> 2928                 error = set_blocksize(p->bdev, PAGE_SIZE);
                                                   >> 2929                 if (error < 0)
                                                   >> 2930                         return error;
3105                 /*                               2931                 /*
3106                  * Zoned block devices contai    2932                  * Zoned block devices contain zones that have a sequential
3107                  * write only restriction.  H    2933                  * write only restriction.  Hence zoned block devices are not
3108                  * suitable for swapping.  Di    2934                  * suitable for swapping.  Disallow them here.
3109                  */                              2935                  */
3110                 if (bdev_is_zoned(si->bdev))  !! 2936                 if (blk_queue_is_zoned(p->bdev->bd_disk->queue))
3111                         return -EINVAL;          2937                         return -EINVAL;
3112                 si->flags |= SWP_BLKDEV;      !! 2938                 p->flags |= SWP_BLKDEV;
3113         } else if (S_ISREG(inode->i_mode)) {     2939         } else if (S_ISREG(inode->i_mode)) {
3114                 si->bdev = inode->i_sb->s_bde !! 2940                 p->bdev = inode->i_sb->s_bdev;
3115         }                                        2941         }
3116                                                  2942 
3117         return 0;                                2943         return 0;
3118 }                                                2944 }
3119                                                  2945 
3120                                                  2946 
3121 /*                                               2947 /*
3122  * Find out how many pages are allowed for a     2948  * Find out how many pages are allowed for a single swap device. There
3123  * are two limiting factors:                     2949  * are two limiting factors:
3124  * 1) the number of bits for the swap offset     2950  * 1) the number of bits for the swap offset in the swp_entry_t type, and
3125  * 2) the number of bits in the swap pte, as     2951  * 2) the number of bits in the swap pte, as defined by the different
3126  * architectures.                                2952  * architectures.
3127  *                                               2953  *
3128  * In order to find the largest possible bit     2954  * In order to find the largest possible bit mask, a swap entry with
3129  * swap type 0 and swap offset ~0UL is create    2955  * swap type 0 and swap offset ~0UL is created, encoded to a swap pte,
3130  * decoded to a swp_entry_t again, and finall    2956  * decoded to a swp_entry_t again, and finally the swap offset is
3131  * extracted.                                    2957  * extracted.
3132  *                                               2958  *
3133  * This will mask all the bits from the initi    2959  * This will mask all the bits from the initial ~0UL mask that can't
3134  * be encoded in either the swp_entry_t or th    2960  * be encoded in either the swp_entry_t or the architecture definition
3135  * of a swap pte.                                2961  * of a swap pte.
3136  */                                              2962  */
3137 unsigned long generic_max_swapfile_size(void)    2963 unsigned long generic_max_swapfile_size(void)
3138 {                                                2964 {
3139         return swp_offset(pte_to_swp_entry(      2965         return swp_offset(pte_to_swp_entry(
3140                         swp_entry_to_pte(swp_    2966                         swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
3141 }                                                2967 }
3142                                                  2968 
3143 /* Can be overridden by an architecture for a    2969 /* Can be overridden by an architecture for additional checks. */
3144 __weak unsigned long arch_max_swapfile_size(v !! 2970 __weak unsigned long max_swapfile_size(void)
3145 {                                                2971 {
3146         return generic_max_swapfile_size();      2972         return generic_max_swapfile_size();
3147 }                                                2973 }
3148                                                  2974 
3149 static unsigned long read_swap_header(struct  !! 2975 static unsigned long read_swap_header(struct swap_info_struct *p,
3150                                         union    2976                                         union swap_header *swap_header,
3151                                         struc    2977                                         struct inode *inode)
3152 {                                                2978 {
3153         int i;                                   2979         int i;
3154         unsigned long maxpages;                  2980         unsigned long maxpages;
3155         unsigned long swapfilepages;             2981         unsigned long swapfilepages;
3156         unsigned long last_page;                 2982         unsigned long last_page;
3157                                                  2983 
3158         if (memcmp("SWAPSPACE2", swap_header-    2984         if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
3159                 pr_err("Unable to find swap-s    2985                 pr_err("Unable to find swap-space signature\n");
3160                 return 0;                        2986                 return 0;
3161         }                                        2987         }
3162                                                  2988 
3163         /* swap partition endianness hack...     2989         /* swap partition endianness hack... */
3164         if (swab32(swap_header->info.version)    2990         if (swab32(swap_header->info.version) == 1) {
3165                 swab32s(&swap_header->info.ve    2991                 swab32s(&swap_header->info.version);
3166                 swab32s(&swap_header->info.la    2992                 swab32s(&swap_header->info.last_page);
3167                 swab32s(&swap_header->info.nr    2993                 swab32s(&swap_header->info.nr_badpages);
3168                 if (swap_header->info.nr_badp    2994                 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
3169                         return 0;                2995                         return 0;
3170                 for (i = 0; i < swap_header->    2996                 for (i = 0; i < swap_header->info.nr_badpages; i++)
3171                         swab32s(&swap_header-    2997                         swab32s(&swap_header->info.badpages[i]);
3172         }                                        2998         }
3173         /* Check the swap header's sub-versio    2999         /* Check the swap header's sub-version */
3174         if (swap_header->info.version != 1) {    3000         if (swap_header->info.version != 1) {
3175                 pr_warn("Unable to handle swa    3001                 pr_warn("Unable to handle swap header version %d\n",
3176                         swap_header->info.ver    3002                         swap_header->info.version);
3177                 return 0;                        3003                 return 0;
3178         }                                        3004         }
3179                                                  3005 
3180         si->lowest_bit  = 1;                  !! 3006         p->lowest_bit  = 1;
3181         si->cluster_next = 1;                 !! 3007         p->cluster_next = 1;
3182         si->cluster_nr = 0;                   !! 3008         p->cluster_nr = 0;
3183                                                  3009 
3184         maxpages = swapfile_maximum_size;     !! 3010         maxpages = max_swapfile_size();
3185         last_page = swap_header->info.last_pa    3011         last_page = swap_header->info.last_page;
3186         if (!last_page) {                        3012         if (!last_page) {
3187                 pr_warn("Empty swap-file\n");    3013                 pr_warn("Empty swap-file\n");
3188                 return 0;                        3014                 return 0;
3189         }                                        3015         }
3190         if (last_page > maxpages) {              3016         if (last_page > maxpages) {
3191                 pr_warn("Truncating oversized    3017                 pr_warn("Truncating oversized swap area, only using %luk out of %luk\n",
3192                         K(maxpages), K(last_p !! 3018                         maxpages << (PAGE_SHIFT - 10),
                                                   >> 3019                         last_page << (PAGE_SHIFT - 10));
3193         }                                        3020         }
3194         if (maxpages > last_page) {              3021         if (maxpages > last_page) {
3195                 maxpages = last_page + 1;        3022                 maxpages = last_page + 1;
3196                 /* p->max is an unsigned int:    3023                 /* p->max is an unsigned int: don't overflow it */
3197                 if ((unsigned int)maxpages ==    3024                 if ((unsigned int)maxpages == 0)
3198                         maxpages = UINT_MAX;     3025                         maxpages = UINT_MAX;
3199         }                                        3026         }
3200         si->highest_bit = maxpages - 1;       !! 3027         p->highest_bit = maxpages - 1;
3201                                                  3028 
3202         if (!maxpages)                           3029         if (!maxpages)
3203                 return 0;                        3030                 return 0;
3204         swapfilepages = i_size_read(inode) >>    3031         swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
3205         if (swapfilepages && maxpages > swapf    3032         if (swapfilepages && maxpages > swapfilepages) {
3206                 pr_warn("Swap area shorter th    3033                 pr_warn("Swap area shorter than signature indicates\n");
3207                 return 0;                        3034                 return 0;
3208         }                                        3035         }
3209         if (swap_header->info.nr_badpages &&     3036         if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
3210                 return 0;                        3037                 return 0;
3211         if (swap_header->info.nr_badpages > M    3038         if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
3212                 return 0;                        3039                 return 0;
3213                                                  3040 
3214         return maxpages;                         3041         return maxpages;
3215 }                                                3042 }
3216                                                  3043 
3217 #define SWAP_CLUSTER_INFO_COLS                   3044 #define SWAP_CLUSTER_INFO_COLS                                          \
3218         DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(s    3045         DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info))
3219 #define SWAP_CLUSTER_SPACE_COLS                  3046 #define SWAP_CLUSTER_SPACE_COLS                                         \
3220         DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES    3047         DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER)
3221 #define SWAP_CLUSTER_COLS                        3048 #define SWAP_CLUSTER_COLS                                               \
3222         max_t(unsigned int, SWAP_CLUSTER_INFO    3049         max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS)
3223                                                  3050 
3224 static int setup_swap_map_and_extents(struct  !! 3051 static int setup_swap_map_and_extents(struct swap_info_struct *p,
3225                                         union    3052                                         union swap_header *swap_header,
3226                                         unsig    3053                                         unsigned char *swap_map,
                                                   >> 3054                                         struct swap_cluster_info *cluster_info,
3227                                         unsig    3055                                         unsigned long maxpages,
3228                                         secto    3056                                         sector_t *span)
3229 {                                                3057 {
                                                   >> 3058         unsigned int j, k;
3230         unsigned int nr_good_pages;              3059         unsigned int nr_good_pages;
3231         unsigned long i;                      << 
3232         int nr_extents;                          3060         int nr_extents;
                                                   >> 3061         unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
                                                   >> 3062         unsigned long col = p->cluster_next / SWAPFILE_CLUSTER % SWAP_CLUSTER_COLS;
                                                   >> 3063         unsigned long i, idx;
3233                                                  3064 
3234         nr_good_pages = maxpages - 1;   /* om    3065         nr_good_pages = maxpages - 1;   /* omit header page */
3235                                                  3066 
                                                   >> 3067         cluster_list_init(&p->free_clusters);
                                                   >> 3068         cluster_list_init(&p->discard_clusters);
                                                   >> 3069 
3236         for (i = 0; i < swap_header->info.nr_    3070         for (i = 0; i < swap_header->info.nr_badpages; i++) {
3237                 unsigned int page_nr = swap_h    3071                 unsigned int page_nr = swap_header->info.badpages[i];
3238                 if (page_nr == 0 || page_nr >    3072                 if (page_nr == 0 || page_nr > swap_header->info.last_page)
3239                         return -EINVAL;          3073                         return -EINVAL;
3240                 if (page_nr < maxpages) {        3074                 if (page_nr < maxpages) {
3241                         swap_map[page_nr] = S    3075                         swap_map[page_nr] = SWAP_MAP_BAD;
3242                         nr_good_pages--;         3076                         nr_good_pages--;
                                                   >> 3077                         /*
                                                   >> 3078                          * Haven't marked the cluster free yet, no list
                                                   >> 3079                          * operation involved
                                                   >> 3080                          */
                                                   >> 3081                         inc_cluster_info_page(p, cluster_info, page_nr);
3243                 }                                3082                 }
3244         }                                        3083         }
3245                                                  3084 
                                                   >> 3085         /* Haven't marked the cluster free yet, no list operation involved */
                                                   >> 3086         for (i = maxpages; i < round_up(maxpages, SWAPFILE_CLUSTER); i++)
                                                   >> 3087                 inc_cluster_info_page(p, cluster_info, i);
                                                   >> 3088 
3246         if (nr_good_pages) {                     3089         if (nr_good_pages) {
3247                 swap_map[0] = SWAP_MAP_BAD;      3090                 swap_map[0] = SWAP_MAP_BAD;
3248                 si->max = maxpages;           !! 3091                 /*
3249                 si->pages = nr_good_pages;    !! 3092                  * Not mark the cluster free yet, no list
3250                 nr_extents = setup_swap_exten !! 3093                  * operation involved
                                                   >> 3094                  */
                                                   >> 3095                 inc_cluster_info_page(p, cluster_info, 0);
                                                   >> 3096                 p->max = maxpages;
                                                   >> 3097                 p->pages = nr_good_pages;
                                                   >> 3098                 nr_extents = setup_swap_extents(p, span);
3251                 if (nr_extents < 0)              3099                 if (nr_extents < 0)
3252                         return nr_extents;       3100                         return nr_extents;
3253                 nr_good_pages = si->pages;    !! 3101                 nr_good_pages = p->pages;
3254         }                                        3102         }
3255         if (!nr_good_pages) {                    3103         if (!nr_good_pages) {
3256                 pr_warn("Empty swap-file\n");    3104                 pr_warn("Empty swap-file\n");
3257                 return -EINVAL;                  3105                 return -EINVAL;
3258         }                                        3106         }
3259                                                  3107 
3260         return nr_extents;                    << 
3261 }                                             << 
3262                                               << 
3263 static struct swap_cluster_info *setup_cluste << 
3264                                               << 
3265                                               << 
3266 {                                             << 
3267         unsigned long nr_clusters = DIV_ROUND << 
3268         unsigned long col = si->cluster_next  << 
3269         struct swap_cluster_info *cluster_inf << 
3270         unsigned long i, j, k, idx;           << 
3271         int cpu, err = -ENOMEM;               << 
3272                                               << 
3273         cluster_info = kvcalloc(nr_clusters,  << 
3274         if (!cluster_info)                       3108         if (!cluster_info)
3275                 goto err;                     !! 3109                 return nr_extents;
3276                                               << 
3277         for (i = 0; i < nr_clusters; i++)     << 
3278                 spin_lock_init(&cluster_info[ << 
3279                                               << 
3280         si->cluster_next_cpu = alloc_percpu(u << 
3281         if (!si->cluster_next_cpu)            << 
3282                 goto err_free;                << 
3283                                               << 
3284         /* Random start position to help with << 
3285         for_each_possible_cpu(cpu)            << 
3286                 per_cpu(*si->cluster_next_cpu << 
3287                 get_random_u32_inclusive(1, s << 
3288                                               << 
3289         si->percpu_cluster = alloc_percpu(str << 
3290         if (!si->percpu_cluster)              << 
3291                 goto err_free;                << 
3292                                               << 
3293         for_each_possible_cpu(cpu) {          << 
3294                 struct percpu_cluster *cluste << 
3295                                               << 
3296                 cluster = per_cpu_ptr(si->per << 
3297                 for (i = 0; i < SWAP_NR_ORDER << 
3298                         cluster->next[i] = SW << 
3299         }                                     << 
3300                                                  3110 
3301         /*                                    << 
3302          * Mark unusable pages as unavailable << 
3303          * marked free yet, so no list operat << 
3304          *                                    << 
3305          * See setup_swap_map_and_extents():  << 
3306          * and the EOF part of the last clust << 
3307          */                                   << 
3308         inc_cluster_info_page(si, cluster_inf << 
3309         for (i = 0; i < swap_header->info.nr_ << 
3310                 inc_cluster_info_page(si, clu << 
3311                                       swap_he << 
3312         for (i = maxpages; i < round_up(maxpa << 
3313                 inc_cluster_info_page(si, clu << 
3314                                               << 
3315         INIT_LIST_HEAD(&si->free_clusters);   << 
3316         INIT_LIST_HEAD(&si->full_clusters);   << 
3317         INIT_LIST_HEAD(&si->discard_clusters) << 
3318                                               << 
3319         for (i = 0; i < SWAP_NR_ORDERS; i++)  << 
3320                 INIT_LIST_HEAD(&si->nonfull_c << 
3321                 INIT_LIST_HEAD(&si->frag_clus << 
3322                 si->frag_cluster_nr[i] = 0;   << 
3323         }                                     << 
3324                                                  3111 
3325         /*                                       3112         /*
3326          * Reduce false cache line sharing be    3113          * Reduce false cache line sharing between cluster_info and
3327          * sharing same address space.           3114          * sharing same address space.
3328          */                                      3115          */
3329         for (k = 0; k < SWAP_CLUSTER_COLS; k+    3116         for (k = 0; k < SWAP_CLUSTER_COLS; k++) {
3330                 j = (k + col) % SWAP_CLUSTER_    3117                 j = (k + col) % SWAP_CLUSTER_COLS;
3331                 for (i = 0; i < DIV_ROUND_UP(    3118                 for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) {
3332                         struct swap_cluster_i << 
3333                         idx = i * SWAP_CLUSTE    3119                         idx = i * SWAP_CLUSTER_COLS + j;
3334                         ci = cluster_info + i << 
3335                         if (idx >= nr_cluster    3120                         if (idx >= nr_clusters)
3336                                 continue;        3121                                 continue;
3337                         if (ci->count) {      !! 3122                         if (cluster_count(&cluster_info[idx]))
3338                                 ci->flags = C << 
3339                                 list_add_tail << 
3340                                 continue;        3123                                 continue;
3341                         }                     !! 3124                         cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
3342                         ci->flags = CLUSTER_F !! 3125                         cluster_list_add_tail(&p->free_clusters, cluster_info,
3343                         list_add_tail(&ci->li !! 3126                                               idx);
3344                 }                                3127                 }
3345         }                                        3128         }
                                                   >> 3129         return nr_extents;
                                                   >> 3130 }
                                                   >> 3131 
                                                   >> 3132 /*
                                                   >> 3133  * Helper to sys_swapon determining if a given swap
                                                   >> 3134  * backing device queue supports DISCARD operations.
                                                   >> 3135  */
                                                   >> 3136 static bool swap_discardable(struct swap_info_struct *si)
                                                   >> 3137 {
                                                   >> 3138         struct request_queue *q = bdev_get_queue(si->bdev);
3346                                                  3139 
3347         return cluster_info;                  !! 3140         if (!q || !blk_queue_discard(q))
                                                   >> 3141                 return false;
3348                                                  3142 
3349 err_free:                                     !! 3143         return true;
3350         kvfree(cluster_info);                 << 
3351 err:                                          << 
3352         return ERR_PTR(err);                  << 
3353 }                                                3144 }
3354                                                  3145 
3355 SYSCALL_DEFINE2(swapon, const char __user *,     3146 SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
3356 {                                                3147 {
3357         struct swap_info_struct *si;          !! 3148         struct swap_info_struct *p;
3358         struct filename *name;                   3149         struct filename *name;
3359         struct file *swap_file = NULL;           3150         struct file *swap_file = NULL;
3360         struct address_space *mapping;           3151         struct address_space *mapping;
3361         struct dentry *dentry;                   3152         struct dentry *dentry;
3362         int prio;                                3153         int prio;
3363         int error;                               3154         int error;
3364         union swap_header *swap_header;          3155         union swap_header *swap_header;
3365         int nr_extents;                          3156         int nr_extents;
3366         sector_t span;                           3157         sector_t span;
3367         unsigned long maxpages;                  3158         unsigned long maxpages;
3368         unsigned char *swap_map = NULL;          3159         unsigned char *swap_map = NULL;
3369         unsigned long *zeromap = NULL;        << 
3370         struct swap_cluster_info *cluster_inf    3160         struct swap_cluster_info *cluster_info = NULL;
3371         struct folio *folio = NULL;           !! 3161         unsigned long *frontswap_map = NULL;
                                                   >> 3162         struct page *page = NULL;
3372         struct inode *inode = NULL;              3163         struct inode *inode = NULL;
3373         bool inced_nr_rotate_swap = false;       3164         bool inced_nr_rotate_swap = false;
3374                                                  3165 
3375         if (swap_flags & ~SWAP_FLAGS_VALID)      3166         if (swap_flags & ~SWAP_FLAGS_VALID)
3376                 return -EINVAL;                  3167                 return -EINVAL;
3377                                                  3168 
3378         if (!capable(CAP_SYS_ADMIN))             3169         if (!capable(CAP_SYS_ADMIN))
3379                 return -EPERM;                   3170                 return -EPERM;
3380                                                  3171 
3381         if (!swap_avail_heads)                   3172         if (!swap_avail_heads)
3382                 return -ENOMEM;                  3173                 return -ENOMEM;
3383                                                  3174 
3384         si = alloc_swap_info();               !! 3175         p = alloc_swap_info();
3385         if (IS_ERR(si))                       !! 3176         if (IS_ERR(p))
3386                 return PTR_ERR(si);           !! 3177                 return PTR_ERR(p);
3387                                                  3178 
3388         INIT_WORK(&si->discard_work, swap_dis !! 3179         INIT_WORK(&p->discard_work, swap_discard_work);
3389         INIT_WORK(&si->reclaim_work, swap_rec << 
3390                                                  3180 
3391         name = getname(specialfile);             3181         name = getname(specialfile);
3392         if (IS_ERR(name)) {                      3182         if (IS_ERR(name)) {
3393                 error = PTR_ERR(name);           3183                 error = PTR_ERR(name);
3394                 name = NULL;                     3184                 name = NULL;
3395                 goto bad_swap;                   3185                 goto bad_swap;
3396         }                                        3186         }
3397         swap_file = file_open_name(name, O_RD !! 3187         swap_file = file_open_name(name, O_RDWR|O_LARGEFILE, 0);
3398         if (IS_ERR(swap_file)) {                 3188         if (IS_ERR(swap_file)) {
3399                 error = PTR_ERR(swap_file);      3189                 error = PTR_ERR(swap_file);
3400                 swap_file = NULL;                3190                 swap_file = NULL;
3401                 goto bad_swap;                   3191                 goto bad_swap;
3402         }                                        3192         }
3403                                                  3193 
3404         si->swap_file = swap_file;            !! 3194         p->swap_file = swap_file;
3405         mapping = swap_file->f_mapping;          3195         mapping = swap_file->f_mapping;
3406         dentry = swap_file->f_path.dentry;       3196         dentry = swap_file->f_path.dentry;
3407         inode = mapping->host;                   3197         inode = mapping->host;
3408                                                  3198 
3409         error = claim_swapfile(si, inode);    !! 3199         error = claim_swapfile(p, inode);
3410         if (unlikely(error))                     3200         if (unlikely(error))
3411                 goto bad_swap;                   3201                 goto bad_swap;
3412                                                  3202 
3413         inode_lock(inode);                       3203         inode_lock(inode);
3414         if (d_unlinked(dentry) || cant_mount(    3204         if (d_unlinked(dentry) || cant_mount(dentry)) {
3415                 error = -ENOENT;                 3205                 error = -ENOENT;
3416                 goto bad_swap_unlock_inode;      3206                 goto bad_swap_unlock_inode;
3417         }                                        3207         }
3418         if (IS_SWAPFILE(inode)) {                3208         if (IS_SWAPFILE(inode)) {
3419                 error = -EBUSY;                  3209                 error = -EBUSY;
3420                 goto bad_swap_unlock_inode;      3210                 goto bad_swap_unlock_inode;
3421         }                                        3211         }
3422                                                  3212 
3423         /*                                       3213         /*
3424          * Read the swap header.                 3214          * Read the swap header.
3425          */                                      3215          */
3426         if (!mapping->a_ops->read_folio) {    !! 3216         if (!mapping->a_ops->readpage) {
3427                 error = -EINVAL;                 3217                 error = -EINVAL;
3428                 goto bad_swap_unlock_inode;      3218                 goto bad_swap_unlock_inode;
3429         }                                        3219         }
3430         folio = read_mapping_folio(mapping, 0 !! 3220         page = read_mapping_page(mapping, 0, swap_file);
3431         if (IS_ERR(folio)) {                  !! 3221         if (IS_ERR(page)) {
3432                 error = PTR_ERR(folio);       !! 3222                 error = PTR_ERR(page);
3433                 goto bad_swap_unlock_inode;      3223                 goto bad_swap_unlock_inode;
3434         }                                        3224         }
3435         swap_header = kmap_local_folio(folio, !! 3225         swap_header = kmap(page);
3436                                                  3226 
3437         maxpages = read_swap_header(si, swap_ !! 3227         maxpages = read_swap_header(p, swap_header, inode);
3438         if (unlikely(!maxpages)) {               3228         if (unlikely(!maxpages)) {
3439                 error = -EINVAL;                 3229                 error = -EINVAL;
3440                 goto bad_swap_unlock_inode;      3230                 goto bad_swap_unlock_inode;
3441         }                                        3231         }
3442                                                  3232 
3443         /* OK, set up the swap map and apply     3233         /* OK, set up the swap map and apply the bad block list */
3444         swap_map = vzalloc(maxpages);            3234         swap_map = vzalloc(maxpages);
3445         if (!swap_map) {                         3235         if (!swap_map) {
3446                 error = -ENOMEM;                 3236                 error = -ENOMEM;
3447                 goto bad_swap_unlock_inode;      3237                 goto bad_swap_unlock_inode;
3448         }                                        3238         }
3449                                                  3239 
3450         error = swap_cgroup_swapon(si->type,  !! 3240         if (p->bdev && blk_queue_stable_writes(p->bdev->bd_disk->queue))
3451         if (error)                            !! 3241                 p->flags |= SWP_STABLE_WRITES;
3452                 goto bad_swap_unlock_inode;   << 
3453                                                  3242 
3454         nr_extents = setup_swap_map_and_exten !! 3243         if (p->bdev && p->bdev->bd_disk->fops->rw_page)
3455                                               !! 3244                 p->flags |= SWP_SYNCHRONOUS_IO;
3456         if (unlikely(nr_extents < 0)) {       << 
3457                 error = nr_extents;           << 
3458                 goto bad_swap_unlock_inode;   << 
3459         }                                     << 
3460                                                  3245 
3461         /*                                    !! 3246         if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
3462          * Use kvmalloc_array instead of bitm !! 3247                 int cpu;
3463          * be above MAX_PAGE_ORDER incase of  !! 3248                 unsigned long ci, nr_cluster;
3464          */                                   !! 3249 
3465         zeromap = kvmalloc_array(BITS_TO_LONG !! 3250                 p->flags |= SWP_SOLIDSTATE;
3466                                     GFP_KERNE !! 3251                 p->cluster_next_cpu = alloc_percpu(unsigned int);
3467         if (!zeromap) {                       !! 3252                 if (!p->cluster_next_cpu) {
3468                 error = -ENOMEM;              !! 3253                         error = -ENOMEM;
3469                 goto bad_swap_unlock_inode;   !! 3254                         goto bad_swap_unlock_inode;
3470         }                                     !! 3255                 }
3471                                               !! 3256                 /*
3472         if (si->bdev && bdev_stable_writes(si !! 3257                  * select a random position to start with to help wear leveling
3473                 si->flags |= SWP_STABLE_WRITE !! 3258                  * SSD
3474                                               !! 3259                  */
3475         if (si->bdev && bdev_synchronous(si-> !! 3260                 for_each_possible_cpu(cpu) {
3476                 si->flags |= SWP_SYNCHRONOUS_ !! 3261                         per_cpu(*p->cluster_next_cpu, cpu) =
                                                   >> 3262                                 1 + prandom_u32_max(p->highest_bit);
                                                   >> 3263                 }
                                                   >> 3264                 nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
                                                   >> 3265 
                                                   >> 3266                 cluster_info = kvcalloc(nr_cluster, sizeof(*cluster_info),
                                                   >> 3267                                         GFP_KERNEL);
                                                   >> 3268                 if (!cluster_info) {
                                                   >> 3269                         error = -ENOMEM;
                                                   >> 3270                         goto bad_swap_unlock_inode;
                                                   >> 3271                 }
3477                                                  3272 
3478         if (si->bdev && bdev_nonrot(si->bdev) !! 3273                 for (ci = 0; ci < nr_cluster; ci++)
3479                 si->flags |= SWP_SOLIDSTATE;  !! 3274                         spin_lock_init(&((cluster_info + ci)->lock));
3480                                                  3275 
3481                 cluster_info = setup_clusters !! 3276                 p->percpu_cluster = alloc_percpu(struct percpu_cluster);
3482                 if (IS_ERR(cluster_info)) {   !! 3277                 if (!p->percpu_cluster) {
3483                         error = PTR_ERR(clust !! 3278                         error = -ENOMEM;
3484                         cluster_info = NULL;  << 
3485                         goto bad_swap_unlock_    3279                         goto bad_swap_unlock_inode;
3486                 }                                3280                 }
                                                   >> 3281                 for_each_possible_cpu(cpu) {
                                                   >> 3282                         struct percpu_cluster *cluster;
                                                   >> 3283                         cluster = per_cpu_ptr(p->percpu_cluster, cpu);
                                                   >> 3284                         cluster_set_null(&cluster->index);
                                                   >> 3285                 }
3487         } else {                                 3286         } else {
3488                 atomic_inc(&nr_rotate_swap);     3287                 atomic_inc(&nr_rotate_swap);
3489                 inced_nr_rotate_swap = true;     3288                 inced_nr_rotate_swap = true;
3490         }                                        3289         }
3491                                                  3290 
3492         if ((swap_flags & SWAP_FLAG_DISCARD)  !! 3291         error = swap_cgroup_swapon(p->type, maxpages);
3493             si->bdev && bdev_max_discard_sect !! 3292         if (error)
                                                   >> 3293                 goto bad_swap_unlock_inode;
                                                   >> 3294 
                                                   >> 3295         nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map,
                                                   >> 3296                 cluster_info, maxpages, &span);
                                                   >> 3297         if (unlikely(nr_extents < 0)) {
                                                   >> 3298                 error = nr_extents;
                                                   >> 3299                 goto bad_swap_unlock_inode;
                                                   >> 3300         }
                                                   >> 3301         /* frontswap enabled? set up bit-per-page map for frontswap */
                                                   >> 3302         if (IS_ENABLED(CONFIG_FRONTSWAP))
                                                   >> 3303                 frontswap_map = kvcalloc(BITS_TO_LONGS(maxpages),
                                                   >> 3304                                          sizeof(long),
                                                   >> 3305                                          GFP_KERNEL);
                                                   >> 3306 
                                                   >> 3307         if (p->bdev && (swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
3494                 /*                               3308                 /*
3495                  * When discard is enabled fo    3309                  * When discard is enabled for swap with no particular
3496                  * policy flagged, we set all    3310                  * policy flagged, we set all swap discard flags here in
3497                  * order to sustain backward     3311                  * order to sustain backward compatibility with older
3498                  * swapon(8) releases.           3312                  * swapon(8) releases.
3499                  */                              3313                  */
3500                 si->flags |= (SWP_DISCARDABLE !! 3314                 p->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD |
3501                              SWP_PAGE_DISCARD    3315                              SWP_PAGE_DISCARD);
3502                                                  3316 
3503                 /*                               3317                 /*
3504                  * By flagging sys_swapon, a     3318                  * By flagging sys_swapon, a sysadmin can tell us to
3505                  * either do single-time area    3319                  * either do single-time area discards only, or to just
3506                  * perform discards for relea    3320                  * perform discards for released swap page-clusters.
3507                  * Now it's time to adjust th    3321                  * Now it's time to adjust the p->flags accordingly.
3508                  */                              3322                  */
3509                 if (swap_flags & SWAP_FLAG_DI    3323                 if (swap_flags & SWAP_FLAG_DISCARD_ONCE)
3510                         si->flags &= ~SWP_PAG !! 3324                         p->flags &= ~SWP_PAGE_DISCARD;
3511                 else if (swap_flags & SWAP_FL    3325                 else if (swap_flags & SWAP_FLAG_DISCARD_PAGES)
3512                         si->flags &= ~SWP_ARE !! 3326                         p->flags &= ~SWP_AREA_DISCARD;
3513                                                  3327 
3514                 /* issue a swapon-time discar    3328                 /* issue a swapon-time discard if it's still required */
3515                 if (si->flags & SWP_AREA_DISC !! 3329                 if (p->flags & SWP_AREA_DISCARD) {
3516                         int err = discard_swa !! 3330                         int err = discard_swap(p);
3517                         if (unlikely(err))       3331                         if (unlikely(err))
3518                                 pr_err("swapo    3332                                 pr_err("swapon: discard_swap(%p): %d\n",
3519                                         si, e !! 3333                                         p, err);
3520                 }                                3334                 }
3521         }                                        3335         }
3522                                                  3336 
3523         error = init_swap_address_space(si->t !! 3337         error = init_swap_address_space(p->type, maxpages);
3524         if (error)                               3338         if (error)
3525                 goto bad_swap_unlock_inode;      3339                 goto bad_swap_unlock_inode;
3526                                                  3340 
3527         error = zswap_swapon(si->type, maxpag << 
3528         if (error)                            << 
3529                 goto free_swap_address_space; << 
3530                                               << 
3531         /*                                       3341         /*
3532          * Flush any pending IO and dirty map    3342          * Flush any pending IO and dirty mappings before we start using this
3533          * swap device.                          3343          * swap device.
3534          */                                      3344          */
3535         inode->i_flags |= S_SWAPFILE;            3345         inode->i_flags |= S_SWAPFILE;
3536         error = inode_drain_writes(inode);       3346         error = inode_drain_writes(inode);
3537         if (error) {                             3347         if (error) {
3538                 inode->i_flags &= ~S_SWAPFILE    3348                 inode->i_flags &= ~S_SWAPFILE;
3539                 goto free_swap_zswap;         !! 3349                 goto free_swap_address_space;
3540         }                                        3350         }
3541                                                  3351 
3542         mutex_lock(&swapon_mutex);               3352         mutex_lock(&swapon_mutex);
3543         prio = -1;                               3353         prio = -1;
3544         if (swap_flags & SWAP_FLAG_PREFER)       3354         if (swap_flags & SWAP_FLAG_PREFER)
3545                 prio =                           3355                 prio =
3546                   (swap_flags & SWAP_FLAG_PRI    3356                   (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
3547         enable_swap_info(si, prio, swap_map,  !! 3357         enable_swap_info(p, prio, swap_map, cluster_info, frontswap_map);
3548                                                  3358 
3549         pr_info("Adding %uk swap on %s.  Prio !! 3359         pr_info("Adding %uk swap on %s.  Priority:%d extents:%d across:%lluk %s%s%s%s%s\n",
3550                 K(si->pages), name->name, si- !! 3360                 p->pages<<(PAGE_SHIFT-10), name->name, p->prio,
3551                 K((unsigned long long)span),  !! 3361                 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
3552                 (si->flags & SWP_SOLIDSTATE)  !! 3362                 (p->flags & SWP_SOLIDSTATE) ? "SS" : "",
3553                 (si->flags & SWP_DISCARDABLE) !! 3363                 (p->flags & SWP_DISCARDABLE) ? "D" : "",
3554                 (si->flags & SWP_AREA_DISCARD !! 3364                 (p->flags & SWP_AREA_DISCARD) ? "s" : "",
3555                 (si->flags & SWP_PAGE_DISCARD !! 3365                 (p->flags & SWP_PAGE_DISCARD) ? "c" : "",
                                                   >> 3366                 (frontswap_map) ? "FS" : "");
3556                                                  3367 
3557         mutex_unlock(&swapon_mutex);             3368         mutex_unlock(&swapon_mutex);
3558         atomic_inc(&proc_poll_event);            3369         atomic_inc(&proc_poll_event);
3559         wake_up_interruptible(&proc_poll_wait    3370         wake_up_interruptible(&proc_poll_wait);
3560                                                  3371 
3561         error = 0;                               3372         error = 0;
3562         goto out;                                3373         goto out;
3563 free_swap_zswap:                              << 
3564         zswap_swapoff(si->type);              << 
3565 free_swap_address_space:                         3374 free_swap_address_space:
3566         exit_swap_address_space(si->type);    !! 3375         exit_swap_address_space(p->type);
3567 bad_swap_unlock_inode:                           3376 bad_swap_unlock_inode:
3568         inode_unlock(inode);                     3377         inode_unlock(inode);
3569 bad_swap:                                        3378 bad_swap:
3570         free_percpu(si->percpu_cluster);      !! 3379         free_percpu(p->percpu_cluster);
3571         si->percpu_cluster = NULL;            !! 3380         p->percpu_cluster = NULL;
3572         free_percpu(si->cluster_next_cpu);    !! 3381         free_percpu(p->cluster_next_cpu);
3573         si->cluster_next_cpu = NULL;          !! 3382         p->cluster_next_cpu = NULL;
                                                   >> 3383         if (inode && S_ISBLK(inode->i_mode) && p->bdev) {
                                                   >> 3384                 set_blocksize(p->bdev, p->old_block_size);
                                                   >> 3385                 blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
                                                   >> 3386         }
3574         inode = NULL;                            3387         inode = NULL;
3575         destroy_swap_extents(si);             !! 3388         destroy_swap_extents(p);
3576         swap_cgroup_swapoff(si->type);        !! 3389         swap_cgroup_swapoff(p->type);
3577         spin_lock(&swap_lock);                   3390         spin_lock(&swap_lock);
3578         si->swap_file = NULL;                 !! 3391         p->swap_file = NULL;
3579         si->flags = 0;                        !! 3392         p->flags = 0;
3580         spin_unlock(&swap_lock);                 3393         spin_unlock(&swap_lock);
3581         vfree(swap_map);                         3394         vfree(swap_map);
3582         kvfree(zeromap);                      << 
3583         kvfree(cluster_info);                    3395         kvfree(cluster_info);
                                                   >> 3396         kvfree(frontswap_map);
3584         if (inced_nr_rotate_swap)                3397         if (inced_nr_rotate_swap)
3585                 atomic_dec(&nr_rotate_swap);     3398                 atomic_dec(&nr_rotate_swap);
3586         if (swap_file)                           3399         if (swap_file)
3587                 filp_close(swap_file, NULL);     3400                 filp_close(swap_file, NULL);
3588 out:                                             3401 out:
3589         if (!IS_ERR_OR_NULL(folio))           !! 3402         if (page && !IS_ERR(page)) {
3590                 folio_release_kmap(folio, swa !! 3403                 kunmap(page);
                                                   >> 3404                 put_page(page);
                                                   >> 3405         }
3591         if (name)                                3406         if (name)
3592                 putname(name);                   3407                 putname(name);
3593         if (inode)                               3408         if (inode)
3594                 inode_unlock(inode);             3409                 inode_unlock(inode);
3595         if (!error)                              3410         if (!error)
3596                 enable_swap_slots_cache();       3411                 enable_swap_slots_cache();
3597         return error;                            3412         return error;
3598 }                                                3413 }
3599                                                  3414 
3600 void si_swapinfo(struct sysinfo *val)            3415 void si_swapinfo(struct sysinfo *val)
3601 {                                                3416 {
3602         unsigned int type;                       3417         unsigned int type;
3603         unsigned long nr_to_be_unused = 0;       3418         unsigned long nr_to_be_unused = 0;
3604                                                  3419 
3605         spin_lock(&swap_lock);                   3420         spin_lock(&swap_lock);
3606         for (type = 0; type < nr_swapfiles; t    3421         for (type = 0; type < nr_swapfiles; type++) {
3607                 struct swap_info_struct *si =    3422                 struct swap_info_struct *si = swap_info[type];
3608                                                  3423 
3609                 if ((si->flags & SWP_USED) &&    3424                 if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK))
3610                         nr_to_be_unused += RE !! 3425                         nr_to_be_unused += si->inuse_pages;
3611         }                                        3426         }
3612         val->freeswap = atomic_long_read(&nr_    3427         val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused;
3613         val->totalswap = total_swap_pages + n    3428         val->totalswap = total_swap_pages + nr_to_be_unused;
3614         spin_unlock(&swap_lock);                 3429         spin_unlock(&swap_lock);
3615 }                                                3430 }
3616                                                  3431 
3617 /*                                               3432 /*
3618  * Verify that nr swap entries are valid and  !! 3433  * Verify that a swap entry is valid and increment its swap map count.
3619  *                                               3434  *
3620  * Returns error code in following case.         3435  * Returns error code in following case.
3621  * - success -> 0                                3436  * - success -> 0
3622  * - swp_entry is invalid -> EINVAL              3437  * - swp_entry is invalid -> EINVAL
3623  * - swp_entry is migration entry -> EINVAL      3438  * - swp_entry is migration entry -> EINVAL
3624  * - swap-cache reference is requested but th    3439  * - swap-cache reference is requested but there is already one. -> EEXIST
3625  * - swap-cache reference is requested but th    3440  * - swap-cache reference is requested but the entry is not used. -> ENOENT
3626  * - swap-mapped reference requested but need    3441  * - swap-mapped reference requested but needs continued swap count. -> ENOMEM
3627  */                                              3442  */
3628 static int __swap_duplicate(swp_entry_t entry !! 3443 static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
3629 {                                                3444 {
3630         struct swap_info_struct *si;          !! 3445         struct swap_info_struct *p;
3631         struct swap_cluster_info *ci;            3446         struct swap_cluster_info *ci;
3632         unsigned long offset;                    3447         unsigned long offset;
3633         unsigned char count;                     3448         unsigned char count;
3634         unsigned char has_cache;                 3449         unsigned char has_cache;
3635         int err, i;                           !! 3450         int err;
3636                                                  3451 
3637         si = swp_swap_info(entry);            !! 3452         p = get_swap_device(entry);
                                                   >> 3453         if (!p)
                                                   >> 3454                 return -EINVAL;
3638                                                  3455 
3639         offset = swp_offset(entry);              3456         offset = swp_offset(entry);
3640         VM_WARN_ON(nr > SWAPFILE_CLUSTER - of !! 3457         ci = lock_cluster_or_swap_info(p, offset);
3641         VM_WARN_ON(usage == 1 && nr > 1);     << 
3642         ci = lock_cluster_or_swap_info(si, of << 
3643                                                  3458 
3644         err = 0;                              !! 3459         count = p->swap_map[offset];
3645         for (i = 0; i < nr; i++) {            << 
3646                 count = si->swap_map[offset + << 
3647                                                  3460 
3648                 /*                            !! 3461         /*
3649                  * swapin_readahead() doesn't !! 3462          * swapin_readahead() doesn't check if a swap entry is valid, so the
3650                  * swap entry could be SWAP_M !! 3463          * swap entry could be SWAP_MAP_BAD. Check here with lock held.
3651                  */                           !! 3464          */
3652                 if (unlikely(swap_count(count !! 3465         if (unlikely(swap_count(count) == SWAP_MAP_BAD)) {
3653                         err = -ENOENT;        !! 3466                 err = -ENOENT;
3654                         goto unlock_out;      !! 3467                 goto unlock_out;
3655                 }                             !! 3468         }
3656                                                  3469 
3657                 has_cache = count & SWAP_HAS_ !! 3470         has_cache = count & SWAP_HAS_CACHE;
3658                 count &= ~SWAP_HAS_CACHE;     !! 3471         count &= ~SWAP_HAS_CACHE;
                                                   >> 3472         err = 0;
3659                                                  3473 
3660                 if (!count && !has_cache) {   !! 3474         if (usage == SWAP_HAS_CACHE) {
3661                         err = -ENOENT;        << 
3662                 } else if (usage == SWAP_HAS_ << 
3663                         if (has_cache)        << 
3664                                 err = -EEXIST << 
3665                 } else if ((count & ~COUNT_CO << 
3666                         err = -EINVAL;        << 
3667                 }                             << 
3668                                                  3475 
3669                 if (err)                      !! 3476                 /* set SWAP_HAS_CACHE if there is no cache and entry is used */
3670                         goto unlock_out;      !! 3477                 if (!has_cache && count)
3671         }                                     !! 3478                         has_cache = SWAP_HAS_CACHE;
                                                   >> 3479                 else if (has_cache)             /* someone else added cache */
                                                   >> 3480                         err = -EEXIST;
                                                   >> 3481                 else                            /* no users remaining */
                                                   >> 3482                         err = -ENOENT;
3672                                                  3483 
3673         for (i = 0; i < nr; i++) {            !! 3484         } else if (count || has_cache) {
3674                 count = si->swap_map[offset + << 
3675                 has_cache = count & SWAP_HAS_ << 
3676                 count &= ~SWAP_HAS_CACHE;     << 
3677                                                  3485 
3678                 if (usage == SWAP_HAS_CACHE)  !! 3486                 if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX)
3679                         has_cache = SWAP_HAS_ << 
3680                 else if ((count & ~COUNT_CONT << 
3681                         count += usage;          3487                         count += usage;
3682                 else if (swap_count_continued !! 3488                 else if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX)
                                                   >> 3489                         err = -EINVAL;
                                                   >> 3490                 else if (swap_count_continued(p, offset, count))
3683                         count = COUNT_CONTINU    3491                         count = COUNT_CONTINUED;
3684                 else {                        !! 3492                 else
3685                         /*                    << 
3686                          * Don't need to roll << 
3687                          * usage == 1, there  << 
3688                          */                   << 
3689                         err = -ENOMEM;           3493                         err = -ENOMEM;
3690                         goto unlock_out;      !! 3494         } else
3691                 }                             !! 3495                 err = -ENOENT;                  /* unused swap entry */
3692                                                  3496 
3693                 WRITE_ONCE(si->swap_map[offse !! 3497         WRITE_ONCE(p->swap_map[offset], count | has_cache);
3694         }                                     << 
3695                                                  3498 
3696 unlock_out:                                      3499 unlock_out:
3697         unlock_cluster_or_swap_info(si, ci);  !! 3500         unlock_cluster_or_swap_info(p, ci);
                                                   >> 3501         if (p)
                                                   >> 3502                 put_swap_device(p);
3698         return err;                              3503         return err;
3699 }                                                3504 }
3700                                                  3505 
3701 /*                                               3506 /*
3702  * Help swapoff by noting that swap entry bel    3507  * Help swapoff by noting that swap entry belongs to shmem/tmpfs
3703  * (in which case its reference count is neve    3508  * (in which case its reference count is never incremented).
3704  */                                              3509  */
3705 void swap_shmem_alloc(swp_entry_t entry, int  !! 3510 void swap_shmem_alloc(swp_entry_t entry)
3706 {                                                3511 {
3707         __swap_duplicate(entry, SWAP_MAP_SHME !! 3512         __swap_duplicate(entry, SWAP_MAP_SHMEM);
3708 }                                                3513 }
3709                                                  3514 
3710 /*                                               3515 /*
3711  * Increase reference count of swap entry by     3516  * Increase reference count of swap entry by 1.
3712  * Returns 0 for success, or -ENOMEM if a swa    3517  * Returns 0 for success, or -ENOMEM if a swap_count_continuation is required
3713  * but could not be atomically allocated.  Re    3518  * but could not be atomically allocated.  Returns 0, just as if it succeeded,
3714  * if __swap_duplicate() fails for another re    3519  * if __swap_duplicate() fails for another reason (-EINVAL or -ENOENT), which
3715  * might occur if a page table entry has got     3520  * might occur if a page table entry has got corrupted.
3716  */                                              3521  */
3717 int swap_duplicate(swp_entry_t entry)            3522 int swap_duplicate(swp_entry_t entry)
3718 {                                                3523 {
3719         int err = 0;                             3524         int err = 0;
3720                                                  3525 
3721         while (!err && __swap_duplicate(entry !! 3526         while (!err && __swap_duplicate(entry, 1) == -ENOMEM)
3722                 err = add_swap_count_continua    3527                 err = add_swap_count_continuation(entry, GFP_ATOMIC);
3723         return err;                              3528         return err;
3724 }                                                3529 }
3725                                                  3530 
3726 /*                                               3531 /*
3727  * @entry: first swap entry from which we all !! 3532  * @entry: swap entry for which we allocate swap cache.
3728  *                                               3533  *
3729  * Called when allocating swap cache for exis !! 3534  * Called when allocating swap cache for existing swap entry,
3730  * This can return error codes. Returns 0 at     3535  * This can return error codes. Returns 0 at success.
3731  * -EEXIST means there is a swap cache.          3536  * -EEXIST means there is a swap cache.
3732  * Note: return code is different from swap_d    3537  * Note: return code is different from swap_duplicate().
3733  */                                              3538  */
3734 int swapcache_prepare(swp_entry_t entry, int  !! 3539 int swapcache_prepare(swp_entry_t entry)
3735 {                                                3540 {
3736         return __swap_duplicate(entry, SWAP_H !! 3541         return __swap_duplicate(entry, SWAP_HAS_CACHE);
3737 }                                                3542 }
3738                                                  3543 
3739 void swapcache_clear(struct swap_info_struct  !! 3544 struct swap_info_struct *swp_swap_info(swp_entry_t entry)
3740 {                                                3545 {
3741         unsigned long offset = swp_offset(ent !! 3546         return swap_type_to_swap_info(swp_type(entry));
3742                                               << 
3743         cluster_swap_free_nr(si, offset, nr,  << 
3744 }                                                3547 }
3745                                                  3548 
3746 struct swap_info_struct *swp_swap_info(swp_en !! 3549 struct swap_info_struct *page_swap_info(struct page *page)
3747 {                                                3550 {
3748         return swap_type_to_swap_info(swp_typ !! 3551         swp_entry_t entry = { .val = page_private(page) };
                                                   >> 3552         return swp_swap_info(entry);
3749 }                                                3553 }
3750                                                  3554 
3751 /*                                               3555 /*
3752  * out-of-line methods to avoid include hell. !! 3556  * out-of-line __page_file_ methods to avoid include hell.
3753  */                                              3557  */
3754 struct address_space *swapcache_mapping(struc !! 3558 struct address_space *__page_file_mapping(struct page *page)
3755 {                                                3559 {
3756         return swp_swap_info(folio->swap)->sw !! 3560         return page_swap_info(page)->swap_file->f_mapping;
3757 }                                                3561 }
3758 EXPORT_SYMBOL_GPL(swapcache_mapping);         !! 3562 EXPORT_SYMBOL_GPL(__page_file_mapping);
3759                                                  3563 
3760 pgoff_t __folio_swap_cache_index(struct folio !! 3564 pgoff_t __page_file_index(struct page *page)
3761 {                                                3565 {
3762         return swap_cache_index(folio->swap); !! 3566         swp_entry_t swap = { .val = page_private(page) };
                                                   >> 3567         return swp_offset(swap);
3763 }                                                3568 }
3764 EXPORT_SYMBOL_GPL(__folio_swap_cache_index);  !! 3569 EXPORT_SYMBOL_GPL(__page_file_index);
3765                                                  3570 
3766 /*                                               3571 /*
3767  * add_swap_count_continuation - called when     3572  * add_swap_count_continuation - called when a swap count is duplicated
3768  * beyond SWAP_MAP_MAX, it allocates a new pa    3573  * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's
3769  * page of the original vmalloc'ed swap_map,     3574  * page of the original vmalloc'ed swap_map, to hold the continuation count
3770  * (for that entry and for its neighbouring P    3575  * (for that entry and for its neighbouring PAGE_SIZE swap entries).  Called
3771  * again when count is duplicated beyond SWAP    3576  * again when count is duplicated beyond SWAP_MAP_MAX * SWAP_CONT_MAX, etc.
3772  *                                               3577  *
3773  * These continuation pages are seldom refere    3578  * These continuation pages are seldom referenced: the common paths all work
3774  * on the original swap_map, only referring t    3579  * on the original swap_map, only referring to a continuation page when the
3775  * low "digit" of a count is incremented or d    3580  * low "digit" of a count is incremented or decremented through SWAP_MAP_MAX.
3776  *                                               3581  *
3777  * add_swap_count_continuation(, GFP_ATOMIC)     3582  * add_swap_count_continuation(, GFP_ATOMIC) can be called while holding
3778  * page table locks; if it fails, add_swap_co    3583  * page table locks; if it fails, add_swap_count_continuation(, GFP_KERNEL)
3779  * can be called after dropping locks.           3584  * can be called after dropping locks.
3780  */                                              3585  */
3781 int add_swap_count_continuation(swp_entry_t e    3586 int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
3782 {                                                3587 {
3783         struct swap_info_struct *si;             3588         struct swap_info_struct *si;
3784         struct swap_cluster_info *ci;            3589         struct swap_cluster_info *ci;
3785         struct page *head;                       3590         struct page *head;
3786         struct page *page;                       3591         struct page *page;
3787         struct page *list_page;                  3592         struct page *list_page;
3788         pgoff_t offset;                          3593         pgoff_t offset;
3789         unsigned char count;                     3594         unsigned char count;
3790         int ret = 0;                             3595         int ret = 0;
3791                                                  3596 
3792         /*                                       3597         /*
3793          * When debugging, it's easier to use    3598          * When debugging, it's easier to use __GFP_ZERO here; but it's better
3794          * for latency not to zero a page whi    3599          * for latency not to zero a page while GFP_ATOMIC and holding locks.
3795          */                                      3600          */
3796         page = alloc_page(gfp_mask | __GFP_HI    3601         page = alloc_page(gfp_mask | __GFP_HIGHMEM);
3797                                                  3602 
3798         si = get_swap_device(entry);             3603         si = get_swap_device(entry);
3799         if (!si) {                               3604         if (!si) {
3800                 /*                               3605                 /*
3801                  * An acceptable race has occ    3606                  * An acceptable race has occurred since the failing
3802                  * __swap_duplicate(): the sw    3607                  * __swap_duplicate(): the swap device may be swapoff
3803                  */                              3608                  */
3804                 goto outer;                      3609                 goto outer;
3805         }                                        3610         }
3806         spin_lock(&si->lock);                    3611         spin_lock(&si->lock);
3807                                                  3612 
3808         offset = swp_offset(entry);              3613         offset = swp_offset(entry);
3809                                                  3614 
3810         ci = lock_cluster(si, offset);           3615         ci = lock_cluster(si, offset);
3811                                                  3616 
3812         count = swap_count(si->swap_map[offse    3617         count = swap_count(si->swap_map[offset]);
3813                                                  3618 
3814         if ((count & ~COUNT_CONTINUED) != SWA    3619         if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) {
3815                 /*                               3620                 /*
3816                  * The higher the swap count,    3621                  * The higher the swap count, the more likely it is that tasks
3817                  * will race to add swap coun    3622                  * will race to add swap count continuation: we need to avoid
3818                  * over-provisioning.            3623                  * over-provisioning.
3819                  */                              3624                  */
3820                 goto out;                        3625                 goto out;
3821         }                                        3626         }
3822                                                  3627 
3823         if (!page) {                             3628         if (!page) {
3824                 ret = -ENOMEM;                   3629                 ret = -ENOMEM;
3825                 goto out;                        3630                 goto out;
3826         }                                        3631         }
3827                                                  3632 
                                                   >> 3633         /*
                                                   >> 3634          * We are fortunate that although vmalloc_to_page uses pte_offset_map,
                                                   >> 3635          * no architecture is using highmem pages for kernel page tables: so it
                                                   >> 3636          * will not corrupt the GFP_ATOMIC caller's atomic page table kmaps.
                                                   >> 3637          */
3828         head = vmalloc_to_page(si->swap_map +    3638         head = vmalloc_to_page(si->swap_map + offset);
3829         offset &= ~PAGE_MASK;                    3639         offset &= ~PAGE_MASK;
3830                                                  3640 
3831         spin_lock(&si->cont_lock);               3641         spin_lock(&si->cont_lock);
3832         /*                                       3642         /*
3833          * Page allocation does not initializ    3643          * Page allocation does not initialize the page's lru field,
3834          * but it does always reset its priva    3644          * but it does always reset its private field.
3835          */                                      3645          */
3836         if (!page_private(head)) {               3646         if (!page_private(head)) {
3837                 BUG_ON(count & COUNT_CONTINUE    3647                 BUG_ON(count & COUNT_CONTINUED);
3838                 INIT_LIST_HEAD(&head->lru);      3648                 INIT_LIST_HEAD(&head->lru);
3839                 set_page_private(head, SWP_CO    3649                 set_page_private(head, SWP_CONTINUED);
3840                 si->flags |= SWP_CONTINUED;      3650                 si->flags |= SWP_CONTINUED;
3841         }                                        3651         }
3842                                                  3652 
3843         list_for_each_entry(list_page, &head-    3653         list_for_each_entry(list_page, &head->lru, lru) {
3844                 unsigned char *map;              3654                 unsigned char *map;
3845                                                  3655 
3846                 /*                               3656                 /*
3847                  * If the previous map said n    3657                  * If the previous map said no continuation, but we've found
3848                  * a continuation page, free     3658                  * a continuation page, free our allocation and use this one.
3849                  */                              3659                  */
3850                 if (!(count & COUNT_CONTINUED    3660                 if (!(count & COUNT_CONTINUED))
3851                         goto out_unlock_cont;    3661                         goto out_unlock_cont;
3852                                                  3662 
3853                 map = kmap_local_page(list_pa !! 3663                 map = kmap_atomic(list_page) + offset;
3854                 count = *map;                    3664                 count = *map;
3855                 kunmap_local(map);            !! 3665                 kunmap_atomic(map);
3856                                                  3666 
3857                 /*                               3667                 /*
3858                  * If this continuation count    3668                  * If this continuation count now has some space in it,
3859                  * free our allocation and us    3669                  * free our allocation and use this one.
3860                  */                              3670                  */
3861                 if ((count & ~COUNT_CONTINUED    3671                 if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX)
3862                         goto out_unlock_cont;    3672                         goto out_unlock_cont;
3863         }                                        3673         }
3864                                                  3674 
3865         list_add_tail(&page->lru, &head->lru)    3675         list_add_tail(&page->lru, &head->lru);
3866         page = NULL;                    /* no    3676         page = NULL;                    /* now it's attached, don't free it */
3867 out_unlock_cont:                                 3677 out_unlock_cont:
3868         spin_unlock(&si->cont_lock);             3678         spin_unlock(&si->cont_lock);
3869 out:                                             3679 out:
3870         unlock_cluster(ci);                      3680         unlock_cluster(ci);
3871         spin_unlock(&si->lock);                  3681         spin_unlock(&si->lock);
3872         put_swap_device(si);                     3682         put_swap_device(si);
3873 outer:                                           3683 outer:
3874         if (page)                                3684         if (page)
3875                 __free_page(page);               3685                 __free_page(page);
3876         return ret;                              3686         return ret;
3877 }                                                3687 }
3878                                                  3688 
3879 /*                                               3689 /*
3880  * swap_count_continued - when the original s    3690  * swap_count_continued - when the original swap_map count is incremented
3881  * from SWAP_MAP_MAX, check if there is alrea    3691  * from SWAP_MAP_MAX, check if there is already a continuation page to carry
3882  * into, carry if so, or else fail until a ne    3692  * into, carry if so, or else fail until a new continuation page is allocated;
3883  * when the original swap_map count is decrem    3693  * when the original swap_map count is decremented from 0 with continuation,
3884  * borrow from the continuation and report wh    3694  * borrow from the continuation and report whether it still holds more.
3885  * Called while __swap_duplicate() or swap_en    3695  * Called while __swap_duplicate() or swap_entry_free() holds swap or cluster
3886  * lock.                                         3696  * lock.
3887  */                                              3697  */
3888 static bool swap_count_continued(struct swap_    3698 static bool swap_count_continued(struct swap_info_struct *si,
3889                                  pgoff_t offs    3699                                  pgoff_t offset, unsigned char count)
3890 {                                                3700 {
3891         struct page *head;                       3701         struct page *head;
3892         struct page *page;                       3702         struct page *page;
3893         unsigned char *map;                      3703         unsigned char *map;
3894         bool ret;                                3704         bool ret;
3895                                                  3705 
3896         head = vmalloc_to_page(si->swap_map +    3706         head = vmalloc_to_page(si->swap_map + offset);
3897         if (page_private(head) != SWP_CONTINU    3707         if (page_private(head) != SWP_CONTINUED) {
3898                 BUG_ON(count & COUNT_CONTINUE    3708                 BUG_ON(count & COUNT_CONTINUED);
3899                 return false;           /* ne    3709                 return false;           /* need to add count continuation */
3900         }                                        3710         }
3901                                                  3711 
3902         spin_lock(&si->cont_lock);               3712         spin_lock(&si->cont_lock);
3903         offset &= ~PAGE_MASK;                    3713         offset &= ~PAGE_MASK;
3904         page = list_next_entry(head, lru);       3714         page = list_next_entry(head, lru);
3905         map = kmap_local_page(page) + offset; !! 3715         map = kmap_atomic(page) + offset;
3906                                                  3716 
3907         if (count == SWAP_MAP_MAX)      /* in    3717         if (count == SWAP_MAP_MAX)      /* initial increment from swap_map */
3908                 goto init_map;          /* ju    3718                 goto init_map;          /* jump over SWAP_CONT_MAX checks */
3909                                                  3719 
3910         if (count == (SWAP_MAP_MAX | COUNT_CO    3720         if (count == (SWAP_MAP_MAX | COUNT_CONTINUED)) { /* incrementing */
3911                 /*                               3721                 /*
3912                  * Think of how you add 1 to     3722                  * Think of how you add 1 to 999
3913                  */                              3723                  */
3914                 while (*map == (SWAP_CONT_MAX    3724                 while (*map == (SWAP_CONT_MAX | COUNT_CONTINUED)) {
3915                         kunmap_local(map);    !! 3725                         kunmap_atomic(map);
3916                         page = list_next_entr    3726                         page = list_next_entry(page, lru);
3917                         BUG_ON(page == head);    3727                         BUG_ON(page == head);
3918                         map = kmap_local_page !! 3728                         map = kmap_atomic(page) + offset;
3919                 }                                3729                 }
3920                 if (*map == SWAP_CONT_MAX) {     3730                 if (*map == SWAP_CONT_MAX) {
3921                         kunmap_local(map);    !! 3731                         kunmap_atomic(map);
3922                         page = list_next_entr    3732                         page = list_next_entry(page, lru);
3923                         if (page == head) {      3733                         if (page == head) {
3924                                 ret = false;     3734                                 ret = false;    /* add count continuation */
3925                                 goto out;        3735                                 goto out;
3926                         }                        3736                         }
3927                         map = kmap_local_page !! 3737                         map = kmap_atomic(page) + offset;
3928 init_map:               *map = 0;                3738 init_map:               *map = 0;               /* we didn't zero the page */
3929                 }                                3739                 }
3930                 *map += 1;                       3740                 *map += 1;
3931                 kunmap_local(map);            !! 3741                 kunmap_atomic(map);
3932                 while ((page = list_prev_entr    3742                 while ((page = list_prev_entry(page, lru)) != head) {
3933                         map = kmap_local_page !! 3743                         map = kmap_atomic(page) + offset;
3934                         *map = COUNT_CONTINUE    3744                         *map = COUNT_CONTINUED;
3935                         kunmap_local(map);    !! 3745                         kunmap_atomic(map);
3936                 }                                3746                 }
3937                 ret = true;                      3747                 ret = true;                     /* incremented */
3938                                                  3748 
3939         } else {                                 3749         } else {                                /* decrementing */
3940                 /*                               3750                 /*
3941                  * Think of how you subtract     3751                  * Think of how you subtract 1 from 1000
3942                  */                              3752                  */
3943                 BUG_ON(count != COUNT_CONTINU    3753                 BUG_ON(count != COUNT_CONTINUED);
3944                 while (*map == COUNT_CONTINUE    3754                 while (*map == COUNT_CONTINUED) {
3945                         kunmap_local(map);    !! 3755                         kunmap_atomic(map);
3946                         page = list_next_entr    3756                         page = list_next_entry(page, lru);
3947                         BUG_ON(page == head);    3757                         BUG_ON(page == head);
3948                         map = kmap_local_page !! 3758                         map = kmap_atomic(page) + offset;
3949                 }                                3759                 }
3950                 BUG_ON(*map == 0);               3760                 BUG_ON(*map == 0);
3951                 *map -= 1;                       3761                 *map -= 1;
3952                 if (*map == 0)                   3762                 if (*map == 0)
3953                         count = 0;               3763                         count = 0;
3954                 kunmap_local(map);            !! 3764                 kunmap_atomic(map);
3955                 while ((page = list_prev_entr    3765                 while ((page = list_prev_entry(page, lru)) != head) {
3956                         map = kmap_local_page !! 3766                         map = kmap_atomic(page) + offset;
3957                         *map = SWAP_CONT_MAX     3767                         *map = SWAP_CONT_MAX | count;
3958                         count = COUNT_CONTINU    3768                         count = COUNT_CONTINUED;
3959                         kunmap_local(map);    !! 3769                         kunmap_atomic(map);
3960                 }                                3770                 }
3961                 ret = count == COUNT_CONTINUE    3771                 ret = count == COUNT_CONTINUED;
3962         }                                        3772         }
3963 out:                                             3773 out:
3964         spin_unlock(&si->cont_lock);             3774         spin_unlock(&si->cont_lock);
3965         return ret;                              3775         return ret;
3966 }                                                3776 }
3967                                                  3777 
3968 /*                                               3778 /*
3969  * free_swap_count_continuations - swapoff fr    3779  * free_swap_count_continuations - swapoff free all the continuation pages
3970  * appended to the swap_map, after swap_map i    3780  * appended to the swap_map, after swap_map is quiesced, before vfree'ing it.
3971  */                                              3781  */
3972 static void free_swap_count_continuations(str    3782 static void free_swap_count_continuations(struct swap_info_struct *si)
3973 {                                                3783 {
3974         pgoff_t offset;                          3784         pgoff_t offset;
3975                                                  3785 
3976         for (offset = 0; offset < si->max; of    3786         for (offset = 0; offset < si->max; offset += PAGE_SIZE) {
3977                 struct page *head;               3787                 struct page *head;
3978                 head = vmalloc_to_page(si->sw    3788                 head = vmalloc_to_page(si->swap_map + offset);
3979                 if (page_private(head)) {        3789                 if (page_private(head)) {
3980                         struct page *page, *n    3790                         struct page *page, *next;
3981                                                  3791 
3982                         list_for_each_entry_s    3792                         list_for_each_entry_safe(page, next, &head->lru, lru) {
3983                                 list_del(&pag    3793                                 list_del(&page->lru);
3984                                 __free_page(p    3794                                 __free_page(page);
3985                         }                        3795                         }
3986                 }                                3796                 }
3987         }                                        3797         }
3988 }                                                3798 }
3989                                                  3799 
3990 #if defined(CONFIG_MEMCG) && defined(CONFIG_B    3800 #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
3991 void __folio_throttle_swaprate(struct folio * !! 3801 void __cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask)
3992 {                                                3802 {
3993         struct swap_info_struct *si, *next;      3803         struct swap_info_struct *si, *next;
3994         int nid = folio_nid(folio);           !! 3804         int nid = page_to_nid(page);
3995                                               << 
3996         if (!(gfp & __GFP_IO))                << 
3997                 return;                       << 
3998                                                  3805 
3999         if (!__has_usable_swap())             !! 3806         if (!(gfp_mask & __GFP_IO))
4000                 return;                          3807                 return;
4001                                                  3808 
4002         if (!blk_cgroup_congested())             3809         if (!blk_cgroup_congested())
4003                 return;                          3810                 return;
4004                                                  3811 
4005         /*                                       3812         /*
4006          * We've already scheduled a throttle    3813          * We've already scheduled a throttle, avoid taking the global swap
4007          * lock.                                 3814          * lock.
4008          */                                      3815          */
4009         if (current->throttle_disk)           !! 3816         if (current->throttle_queue)
4010                 return;                          3817                 return;
4011                                                  3818 
4012         spin_lock(&swap_avail_lock);             3819         spin_lock(&swap_avail_lock);
4013         plist_for_each_entry_safe(si, next, &    3820         plist_for_each_entry_safe(si, next, &swap_avail_heads[nid],
4014                                   avail_lists    3821                                   avail_lists[nid]) {
4015                 if (si->bdev) {                  3822                 if (si->bdev) {
4016                         blkcg_schedule_thrott !! 3823                         blkcg_schedule_throttle(bdev_get_queue(si->bdev), true);
4017                         break;                   3824                         break;
4018                 }                                3825                 }
4019         }                                        3826         }
4020         spin_unlock(&swap_avail_lock);           3827         spin_unlock(&swap_avail_lock);
4021 }                                                3828 }
4022 #endif                                           3829 #endif
4023                                                  3830 
4024 static int __init swapfile_init(void)            3831 static int __init swapfile_init(void)
4025 {                                                3832 {
4026         int nid;                                 3833         int nid;
4027                                                  3834 
4028         swap_avail_heads = kmalloc_array(nr_n    3835         swap_avail_heads = kmalloc_array(nr_node_ids, sizeof(struct plist_head),
4029                                          GFP_    3836                                          GFP_KERNEL);
4030         if (!swap_avail_heads) {                 3837         if (!swap_avail_heads) {
4031                 pr_emerg("Not enough memory f    3838                 pr_emerg("Not enough memory for swap heads, swap is disabled\n");
4032                 return -ENOMEM;                  3839                 return -ENOMEM;
4033         }                                        3840         }
4034                                                  3841 
4035         for_each_node(nid)                       3842         for_each_node(nid)
4036                 plist_head_init(&swap_avail_h    3843                 plist_head_init(&swap_avail_heads[nid]);
4037                                               << 
4038         swapfile_maximum_size = arch_max_swap << 
4039                                               << 
4040 #ifdef CONFIG_MIGRATION                       << 
4041         if (swapfile_maximum_size >= (1UL <<  << 
4042                 swap_migration_ad_supported = << 
4043 #endif  /* CONFIG_MIGRATION */                << 
4044                                                  3844 
4045         return 0;                                3845         return 0;
4046 }                                                3846 }
4047 subsys_initcall(swapfile_init);                  3847 subsys_initcall(swapfile_init);
4048                                                  3848 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php